From d76d01120ed9caffc7b53fce7cf679fe3ade8c0e Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 23 Feb 2022 16:19:22 +0000 Subject: [PATCH 001/168] Try switching to actions. --- .github/workflows/R-CMD-check.yaml | 40 ++++++++++++++++++++ .travis.yml | 59 ------------------------------ 2 files changed, 40 insertions(+), 59 deletions(-) create mode 100644 .github/workflows/R-CMD-check.yaml delete mode 100644 .travis.yml diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..18163e4 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,40 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/master/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v1 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v1 + with: + extra-packages: rcmdcheck + + - uses: r-lib/actions/check-r-package@v1 + + - name: Show testthat output + if: always() + run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@main + with: + name: ${{ runner.os }}-r${{ matrix.config.r }}-results + path: check diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 35ed9ee..0000000 --- a/.travis.yml +++ /dev/null @@ -1,59 +0,0 @@ - -# Continuous integration with travis -language: r -sudo: required -warnings_are_errors: false - -# cache packages to speed up builds -cache: packages - -before_install: - - if [[ "$OSTYPE" != "linux-gnu" ]]; - then sudo tlmgr install index; - else tlmgr install index; - fi - -addons: - apt: - packages: - - libgdal-dev - - libproj-dev - - libudunits2-dev - -r_packages: - - covr - - testthat - - raster - - sp - - devtools - - rgeos - - splancs - - Matrix - - stats - - TMB - - RcppEigen - -coverage: - status: - project: - default: - threshold: 5% - target: 80% - -after_success: - - Rscript -e 'library(covr);codecov()' - -matrix: - include: - - r: release - install: R -e 'install.packages("devtools", dep = TRUE);devtools::install_deps(dep = c("Depends", "Imports"));install.packages("INLA",repos=c(getOption("rep> -' - os: linux - - r: devel - install: R -e 'install.packages("devtools", dep = TRUE);devtools::install_deps(dep = c("Depends", "Imports"));install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE) -' - os: linux - - r: devel - env: _R_CHECK_FORCE_SUGGESTS_=false - install: R -e 'install.packages("devtools", dep = TRUE);devtools::install_deps(dep = c("Depends", "Imports"))' - From 0683f490d379cfc8f768f05a94b706bc1f856182 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 23 Feb 2022 16:21:11 +0000 Subject: [PATCH 002/168] Try to force to trigger on this branch. --- .github/workflows/R-CMD-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 18163e4..71b7ba1 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -2,7 +2,7 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master] + branches: [main, master, actions] pull_request: branches: [main, master] From ae32f131acc58757d3b5f99f070a6840ddf02b27 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 23 Feb 2022 16:21:36 +0000 Subject: [PATCH 003/168] empty to trigger. From 7edee4534dff5541f1fb642ede34a616abb400e3 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 23 Feb 2022 16:28:18 +0000 Subject: [PATCH 004/168] Copy workflow from inlabru --- .github/workflows/R-CMD-check.yaml | 76 +++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 23 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 71b7ba1..cd419ad 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,40 +1,70 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/master/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions on: push: - branches: [main, master, actions] + branches: + - devel + - stable + - release/* + - feature/* pull_request: - branches: [main, master] + branches: + - devel -name: R-CMD-check +name: R-CMD-check-no-suggests jobs: R-CMD-check: - runs-on: ubuntu-latest + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - R_KEEP_PKG_SOURCE: yes + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + steps: - uses: actions/checkout@v2 - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-r@v2 with: - use-public-rspm: true + r-version: ${{ matrix.config.r }} + extra-repositories: "https://inla.r-inla-download.org/R/testing" - - uses: r-lib/actions/setup-r-dependencies@v1 - with: - extra-packages: rcmdcheck + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Install system dependencies on MacOS (X11, gdal) + if: runner.os == 'macOS' + run: | + brew install --cask xquartz + brew install pkg-config + brew install proj@8 + brew install gdal - - uses: r-lib/actions/check-r-package@v1 + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"hard"' + extra-packages: | + rcmdcheck - - name: Show testthat output - if: always() - run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true - shell: bash + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main + - uses: r-lib/actions/check-r-package@v2 + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + _R_CHECK_FORCE_SUGGESTS_: false with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check + build_args: 'c("--no-manual", "--no-build-vignettes")' + args: 'c("--no-manual", "--ignore-vignettes", "--as-cran")' From 783749b0ad967c64925276b1966edeb36400e9c0 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 24 Feb 2022 08:52:33 +0000 Subject: [PATCH 005/168] Try another workflow from inlabru --- .github/workflows/R-CMD-check.yaml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index cd419ad..2609e33 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -11,7 +11,7 @@ on: branches: - devel -name: R-CMD-check-no-suggests +name: R-CMD-check jobs: R-CMD-check: @@ -23,7 +23,10 @@ jobs: fail-fast: false matrix: config: + - {os: windows-latest, r: 'release'} + - {os: macOS-latest, r: 'release'} - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} +# - {os: ubuntu-20.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} env: @@ -36,7 +39,7 @@ jobs: - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} - extra-repositories: "https://inla.r-inla-download.org/R/testing" + extra-repositories: "https://inla.r-inla-download.org/R/stable" - uses: r-lib/actions/setup-pandoc@v2 @@ -50,7 +53,7 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - dependencies: '"hard"' + dependencies: '"all"' extra-packages: | rcmdcheck @@ -64,7 +67,5 @@ jobs: - uses: r-lib/actions/check-r-package@v2 env: _R_CHECK_CRAN_INCOMING_REMOTE_: false - _R_CHECK_FORCE_SUGGESTS_: false with: - build_args: 'c("--no-manual", "--no-build-vignettes")' - args: 'c("--no-manual", "--ignore-vignettes", "--as-cran")' + args: 'c("--no-manual", "--as-cran")' \ No newline at end of file From 3d937579d31e165f410d5a1271fd7c5bd5d902b2 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 24 Feb 2022 08:54:38 +0000 Subject: [PATCH 006/168] Trigger From 4a29decd1a149e926eeb951f0081075b3d04f479 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 24 Feb 2022 08:56:24 +0000 Subject: [PATCH 007/168] Readd actions branch as trigger. --- .github/workflows/R-CMD-check.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 2609e33..8c68c0f 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -4,9 +4,8 @@ on: push: branches: - devel - - stable - - release/* - - feature/* + - master + - actions pull_request: branches: - devel @@ -68,4 +67,4 @@ jobs: env: _R_CHECK_CRAN_INCOMING_REMOTE_: false with: - args: 'c("--no-manual", "--as-cran")' \ No newline at end of file + args: 'c("--no-manual", "--as-cran")' From 4ac6f204838902e15e965fc44d3a7808818f8b94 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 22 Jun 2022 09:52:19 +0100 Subject: [PATCH 008/168] Fix Jacobian correction for iid effect. We were accidentally correcting by log sd rather than log tau. Think this will be minr, but still important to correct. Brought to our attention by Bowen He, Vanderbilt. --- src/disaggregation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/disaggregation.cpp b/src/disaggregation.cpp index 78ca3aa..210e595 100644 --- a/src/disaggregation.cpp +++ b/src/disaggregation.cpp @@ -127,7 +127,7 @@ Type objective_function::operator()() Type lambda = -log(prior_iideffect_sd_prob) / prior_iideffect_sd_max; Type log_pcdensity_iid = log(lambda / 2) - (3/2)*iideffect_log_tau - lambda * pow(iideffect_tau, -1/2); // log(iideffect_sd) from the Jacobian - nll -= log_pcdensity_iid + log(iideffect_sd); + nll -= log_pcdensity_iid + iideffect_log_tau; // Likelihood of random effect for polygons for(int p = 0; p < iideffect.size(); p++) { From 1987b09725846ca292dca8d88fbefec322a4a29b Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 22 Jun 2022 09:54:08 +0100 Subject: [PATCH 009/168] Change maintainor to tim lucas. --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 49ed5cb..e3f3b93 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,8 +3,8 @@ Type: Package Title: Disaggregation Modelling Version: 0.1.3 Authors@R: c( - person("Anita", "Nandi", email = "anita.k.nandi@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-5087-2494")), - person("Tim", "Lucas", email = "timcdlucas@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-4694-8107")), + person("Anita", "Nandi", email = "anita.k.nandi@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-5087-2494")), + person("Tim", "Lucas", email = "timcdlucas@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4694-8107")), person("Rohan", "Arambepola", email = "rarambepola@gmail.com", role = "aut"), person("Andre", "Python", email = "python.andre@gmail.com", role = "aut", comment = c(ORCID = "0000-0001-8094-7226")) ) From 96a214ca3d378edf064d5a4455159eac44041a69 Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 24 Jun 2022 18:20:39 +0100 Subject: [PATCH 010/168] Fix bad default intercept prior. Closes #61 --- R/fit_model.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/fit_model.R b/R/fit_model.R index cf58815..42456f4 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -346,8 +346,8 @@ make_model_object <- function(data, prior_sigma <- sd(data$polygon_data$response/mean(data$polygon_data$response)) # Default priors if they are not specified - default_priors <- list(priormean_intercept = -4.0, - priorsd_intercept = 2.0, + default_priors <- list(priormean_intercept = 0, + priorsd_intercept = 10.0, priormean_slope = 0.0, priorsd_slope = 0.5, prior_rho_min = prior_rho, From 6042b1c2188a48f664736e3d70b40e22aeaf828e Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 24 Jun 2022 18:27:44 +0100 Subject: [PATCH 011/168] Document priors properly. Closes #59 --- R/fit_model.R | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/R/fit_model.R b/R/fit_model.R index 42456f4..1bb993d 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -214,10 +214,24 @@ disag_model <- function(data, #' \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. #' } #' -#' Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field +#' Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field #' are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field #' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}$ and $\sigma_{prob}} for the variation of the field -#' where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect +#' where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect. +#' +#' The precise names and default values for these priors are: +#' \itemize{ +#' \item priormean_intercept: 0 +#' \item priorsd_intercept: 10.0 +#' \item priormean_slope: 0.0 +#' \item priorsd_slope: 0.5 +#' \item prior_rho_min: A third the length of the diagonal of the bounding box. +#' \item prior_rho_prob: 0.1 +#' \item prior_sigma_max: sd(response/mean(response)) +#' \item prior_sigma_prob: 0.1 +#' \item prior_iideffect_sd_max: 0.1 +#' \item prior_iideffect_sd_prob: 0.01 +#' } #' #' The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. #' The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. From 7ada2e9912eeb5016ca0c87dc06886dae5ba5084 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 30 Jun 2022 09:37:40 +0100 Subject: [PATCH 012/168] Trigger actions From d914c152db3d633059461d05835e440e10bb099d Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 30 Jun 2022 09:40:40 +0100 Subject: [PATCH 013/168] Actions on all branches. --- .github/workflows/R-CMD-check.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 8c68c0f..4532483 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -3,12 +3,11 @@ on: push: branches: - - devel - - master - - actions + '**' pull_request: branches: - devel + - master name: R-CMD-check From 04583ded665bf5d5bb33c22105d93cf37508c708 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 30 Jun 2022 09:48:27 +0100 Subject: [PATCH 014/168] Run isinla block in vignette --- vignettes/disaggregation.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 2436e18..f43b48f 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -19,7 +19,7 @@ knitr::opts_chunk$set( ) ``` -```{r, echo=FALSE} +```{r, echo=FALSE, eval = TRUE} isINLA <- requireNamespace('INLA', quietly = TRUE) ``` From 32b9ff876d7342ff863a66c73d35fcfe1d1828d5 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 30 Jun 2022 09:49:17 +0100 Subject: [PATCH 015/168] Only run ci on one system for now. --- .github/workflows/R-CMD-check.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 4532483..5892147 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -22,10 +22,10 @@ jobs: matrix: config: - {os: windows-latest, r: 'release'} - - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + # - {os: macOS-latest, r: 'release'} +# - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} # - {os: ubuntu-20.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} +# - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true From eab23d4a8708586401f9e7252a4718717667a70b Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 30 Jun 2022 10:00:38 +0100 Subject: [PATCH 016/168] Move vignette to .txt to prevent it being built for now. --- vignettes/{disaggregation.Rmd => disaggregation.txt} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename vignettes/{disaggregation.Rmd => disaggregation.txt} (100%) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.txt similarity index 100% rename from vignettes/disaggregation.Rmd rename to vignettes/disaggregation.txt From 57381ee0d419e5cd375d055bf21d1346a2650379 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 30 Jun 2022 10:01:34 +0100 Subject: [PATCH 017/168] Delete unneeded extra .gitignore. --- vignettes/.gitignore | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 vignettes/.gitignore diff --git a/vignettes/.gitignore b/vignettes/.gitignore deleted file mode 100644 index 097b241..0000000 --- a/vignettes/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.html -*.R From f07553663ae7dc2d26b6f28e29b41e50714ff661 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 30 Jun 2022 10:10:30 +0100 Subject: [PATCH 018/168] Use actions badges. --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a6f914e..48e258d 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ Disaggregation ============== -[![Build Status](https://travis-ci.org/aknandi/disaggregation.svg?branch=master)](https://travis-ci.org/aknandi/disaggregation?branch=master) + +[![CRANstatus](https://www.r-pkg.org/badges/version/dplyr)](https://cran.r-project.org/package=dplyr) +[![R-CMD-check](https://github.com/tidyverse/dplyr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidyverse/dplyr/actions/workflows/R-CMD-check.yaml) [![codecov.io](https://codecov.io/github/aknandi/disaggregation/coverage.svg?branch=master)](https://codecov.io/github/aknandi/disaggregation?branch=master) A package containing useful functions for disaggregation modelling From a69e20c08eb028c4bc4d5d8ae0e17fa35c729129 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 30 Jun 2022 10:20:26 +0100 Subject: [PATCH 019/168] Dont fit models with gaussian error and iid in tests to avoid warning. --- tests/testthat/test-fit-model.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index 3a52252..608dacf 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -39,7 +39,7 @@ if(identical(Sys.getenv("NOT_CRAN"), "true")) { makeMesh = FALSE) } -test_that("disag_model produces errors whe expected", { +test_that("disag_model produces errors when expected", { skip_if_not_installed('INLA') skip_on_cran() @@ -58,7 +58,7 @@ test_that("disag_model behaves as expected", { skip_if_not_installed('INLA') skip_on_cran() - result <- disag_model(test_data, iterations = 2) + result <- disag_model(test_data, iterations = 2, iid = FALSE) expect_is(result, 'disag_model') expect_equal(length(result), 5) @@ -81,7 +81,7 @@ test_that("disag_model with 1 covariate behaves as expected", { test_data2$covariate_rasters <- test_data2$covariate_rasters[[1]] test_data2$covariate_data <- test_data2$covariate_data[, 1:3] - result <- disag_model(test_data2, iterations = 2) + result <- disag_model(test_data2, iterations = 2, iid = FALSE) expect_is(result, 'disag_model') expect_equal(length(result), 5) From 728d1bca58aaa63030286bc09a591779a43f040d Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 30 Jun 2022 10:24:51 +0100 Subject: [PATCH 020/168] Use inherits instead of if(class(object) == something) --- R/extract.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/extract.R b/R/extract.R index 7a1d7d6..5a166f3 100644 --- a/R/extract.R +++ b/R/extract.R @@ -67,7 +67,7 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ if(!is.null(fun)){ # If a summary function was given, just bind everything together and add ID column df <- data.frame(do.call(rbind, values)) - if(class(shape) == 'SpatialPolygonsDataFrame'){ + if(inherits(shape, 'SpatialPolygonsDataFrame')){ df <- cbind(ID = as.data.frame(shape)[, id], df) } else{ df <- cbind(ID = names(shape), df) From d30371c4ea500b4b3f361baef53b77f9fe90f7f4 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 30 Jun 2022 16:24:16 +0100 Subject: [PATCH 021/168] Fix single covariate test. --- tests/testthat/test-fit-model.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index 608dacf..078a2b0 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -86,7 +86,8 @@ test_that("disag_model with 1 covariate behaves as expected", { expect_is(result, 'disag_model') expect_equal(length(result), 5) - expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 5) + # Should be intercept, 1 slope, tau gaussian, and 2 for space. None for iid anymore. + expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 3) expect_equal(unique(names(result$sd_out$par.random)), c("iideffect", "nodemean")) # Confirm only two covariates were fitted. From f9efd1c732ef6ad75a5f16f3ebe4e7ea349ae0e3 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 30 Jun 2022 16:29:54 +0100 Subject: [PATCH 022/168] Version bump. --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e3f3b93..1bcaa96 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: disaggregation Type: Package Title: Disaggregation Modelling -Version: 0.1.3 +Version: 0.1.4 Authors@R: c( person("Anita", "Nandi", email = "anita.k.nandi@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-5087-2494")), person("Tim", "Lucas", email = "timcdlucas@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4694-8107")), @@ -16,7 +16,7 @@ Description: Fits disaggregation regression models using 'TMB' ('Template Model License: MIT + file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.2 Imports: maptools, raster, From 0f2f7442d5adf55ed77c25ec7bb2039c8267735b Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 30 Jun 2022 16:31:44 +0100 Subject: [PATCH 023/168] Import some packages to namespace to avoid being told off? --- R/prepare_data.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index 4482048..2443e9c 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -49,7 +49,10 @@ #' \item{coordsForPrediction }{A matrix with two columns of x, y coordinates of pixels in the whole Raster. Used to make predictions.} #' \item{startendindex }{A matrix with two columns containing the start and end index of the pixels within each polygon.} #' \item{mesh }{A INLA mesh to be used for the spatial field of the disaggregation model.} -#' +#' @import rgdal +#' @import splancs +#' @import rgeos +#' @import utils #' @name prepare_data #' #' @examples @@ -170,7 +173,7 @@ prepare_data <- function(polygon_shapefile, } } else { mesh <- NULL - message("A mesh is not being built. You will not be able to run a model without a mesh.") + message("A mesh is not being built. You will not be able to run a spatial model without a mesh.") } disag_data <- list(polygon_shapefile = polygon_shapefile, From a109df9f2399b3c1a1849acb64f431a16c212818 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 30 Jun 2022 16:41:57 +0100 Subject: [PATCH 024/168] Add imports. --- NAMESPACE | 4 ++++ man/make_model_object.Rd | 18 ++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d643336..7fc5bfc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,10 @@ export(predict_model) export(predict_uncertainty) export(prepare_data) import(ggplot2) +import(rgdal) +import(rgeos) +import(splancs) +import(utils) importFrom(doParallel,registerDoParallel) importFrom(foreach,"%dopar%") importFrom(parallel,makeCluster) diff --git a/man/make_model_object.Rd b/man/make_model_object.Rd index bebabe0..be87363 100644 --- a/man/make_model_object.Rd +++ b/man/make_model_object.Rd @@ -59,10 +59,24 @@ The different likelihood correspond to slightly different models (\eqn{y_j}{yi} \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. } -Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field +Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}$ and $\sigma_{prob}} for the variation of the field -where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect +where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect. + +The precise names and default values for these priors are: +\itemize{ +\item priormean_intercept: 0 +\item priorsd_intercept: 10.0 +\item priormean_slope: 0.0 +\item priorsd_slope: 0.5 +\item prior_rho_min: A third the length of the diagonal of the bounding box. +\item prior_rho_prob: 0.1 +\item prior_sigma_max: sd(response/mean(response)) +\item prior_sigma_prob: 0.1 +\item prior_iideffect_sd_max: 0.1 +\item prior_iideffect_sd_prob: 0.01 +} The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. From f4fba4b015e00ddac5f80dae12726b64acddc672 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 1 Jul 2022 10:01:51 +0100 Subject: [PATCH 025/168] Fix some tests with iid removed. --- tests/testthat/test-fit-model.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index 078a2b0..5cefeb8 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -62,8 +62,8 @@ test_that("disag_model behaves as expected", { expect_is(result, 'disag_model') expect_equal(length(result), 5) - expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 5) - expect_equal(unique(names(result$sd_out$par.random)), c("iideffect", "nodemean")) + expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 4) + expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) @@ -88,7 +88,7 @@ test_that("disag_model with 1 covariate behaves as expected", { # Should be intercept, 1 slope, tau gaussian, and 2 for space. None for iid anymore. expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 3) - expect_equal(unique(names(result$sd_out$par.random)), c("iideffect", "nodemean")) + expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) # Confirm only two covariates were fitted. expect_equal(sum(names(result$opt$par) == 'slope'), 1) From 3f55f4954dada9c6ac3d0aa3ee0c260688efac7e Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 1 Jul 2022 10:24:12 +0100 Subject: [PATCH 026/168] Remove vignette for now. --- vignettes/disaggregation.txt | 172 ----------------------------------- 1 file changed, 172 deletions(-) delete mode 100644 vignettes/disaggregation.txt diff --git a/vignettes/disaggregation.txt b/vignettes/disaggregation.txt deleted file mode 100644 index f43b48f..0000000 --- a/vignettes/disaggregation.txt +++ /dev/null @@ -1,172 +0,0 @@ ---- -title: "A short introduction to the disaggregation package" -author: "Anita Nandi" -date: "`r Sys.Date()`" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{A short introduction to the disaggregation package} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - cache = TRUE, - fig.width = 7, - eval = FALSE -) -``` - -```{r, echo=FALSE, eval = TRUE} -isINLA <- requireNamespace('INLA', quietly = TRUE) -``` - -The **disaggregation** package contains functions to run Bayesian disaggregation models. Aggregated response data over large heterongenous regions can be used alongside fine-scale covariate information to predict fine-scale response across the region. The github page for this package can be found [here](https://github.com/aknandi/disaggregation). - -Install **disggregation** using: - -```r -devtools::install_github("aknandi/disaggregation") -``` - -The key functions are `prepare_data`, `fit_model` and `predict`. The `prepare_data` function takes the aggregated data and covariate data to be used in the model and produces an object to be use by `fit_model`. This functions runs the disaggregation model and the out can be passed to `predict` to produce fine-scale predicted maps of the response variable. - -To use the disaggregation `prepare_data` fuction, you must have the aggregated data as a `SpatialPolygonDataFrame` object and a `RasterStack` of the covariate data to be used in the model. - -## Example - -We will demonstrate an example of the **disaggregation** package using areal data of leukemia incidence in New York, using data from the package `SpatialEpi`. - -```{r} -library(SpatialEpi, quietly = TRUE) -library(dplyr, quietly = TRUE) -library(sp, quietly = TRUE) -library(raster, quietly = TRUE) -library(disaggregation, quietly = TRUE) - -map <- NYleukemia$spatial.polygon -df <- NYleukemia$data - -polygon_data <- SpatialPolygonsDataFrame(map, df) -polygon_data -``` - -Now we simulate two covariate rasters for the area of interest and make a `RasterStack`. They are simulated at the resolution of approximately 1km2. - -```{r, fig.show='hold'} -extent_in_km <- 111*(polygon_data@bbox[, 2] - polygon_data@bbox[, 1]) -n_pixels_x <- floor(extent_in_km[[1]]) -n_pixels_y <- floor(extent_in_km[[2]]) -r <- raster::raster(ncol = n_pixels_x, nrow = n_pixels_y) -r <- raster::setExtent(r, raster::extent(polygon_data)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_x != 0, x %% n_pixels_x, n_pixels_x), 3)) -r2 <- raster::raster(ncol = n_pixels_x, nrow = n_pixels_y) -r2 <- raster::setExtent(r2, raster::extent(polygon_data)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_y), 3)) -cov_stack <- raster::stack(r, r2) -cov_stack <- raster::scale(cov_stack) -``` - -We also create a population raster. This is to allow the model to correctly aggregated the pixel values to the polygon level. For this simple example we assume that the population within each polygon is uniformly distributed. - -```{r, fig.show='hold'} -extracted <- raster::extract(r, polygon_data) -n_cells <- sapply(extracted, length) -polygon_data@data$pop_per_cell <- polygon_data@data$population/n_cells -pop_raster <- rasterize(polygon_data, cov_stack, field = 'pop_per_cell') - -``` - -To correct small inconsistencies in the polygon geometry, we run the line below - -```{r, fig.show='hold'} -polygon_data <- rgeos::gBuffer(polygon_data, byid = TRUE, width = 0) -``` - -Now we have setup the data we can use the `prepare_data` function to create the objects needed to run the disaggregation model. The name of the response variable and id variable in the `SpatialPolygonsDataFrame` should be specified. - -The user can also control the parameters of the mesh that is used to create the spatial field. The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. The mesh parameters: `concave`, `convex` and `resolution` refer to the parameters used to create the mesh boundary using the [inla.noncovex.hull function](https://rdrr.io/github/andrewzm/INLA/man/inla.nonconvex.hull.html), while the mesh parameters `max.edge`, `cut` and `offset` refer to the parameters used to create the mesh using the [inla.mesh.2d function](https://rdrr.io/github/andrewzm/INLA/man/inla.mesh.2d.html). - -```{r, fig.show='hold'} -data_for_model <- prepare_data(polygon_data, - cov_stack, - pop_raster, - response_var = 'cases', - id_var = 'censustract.FIPS', - mesh.args = list(cut = 0.01, - offset = c(0.1, 0.5), - max.edge = c(0.1, 0.2), - resolution = 250), - na.action = TRUE, - ncores = 1) -``` - -```{r, fig.show='hold'} -plot(data_for_model) -``` - -Now have our data object we are ready to run the model. Here we can specify the likelihood function as gaussian, binomial or poisson, and we can specify the link function as logit, log or identity. The disaggregation model makes predictions at the pixel level: - -$link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i$ - -where $X$ are the covariates, $GP$ is the gaussian random field and $u_i$ is the iid random effect. The pixel predictions are then aggregated to the polygon level using the weighted sum (via the aggregation raster, $agg_i$): - -$cases_j = \sum_{i \epsilon j} pred_i \times agg_i$ - -$rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}$ - -The different likelihood correspond to slightly different models ($y_j$ is the repsonse count data): - -**Gaussian** ($\sigma_j$ is the dispersion of the polygon data), - -$dnorm(y_j/\sum agg_i, rate_j, \sigma_j)$ - -Here $\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i$, where $\sigma$ is the dispersion of the pixel data, a parameter learnt by the model. - - -**Binomial** (For a survey in polygon j, $y_j$ is the number positive and $N_j$ is the number tested) - -$dbinom(y_j, N_j, rate_j)$ - -**Poisson** (predicts incidence count) - -$dpois(y_j, cases_j)$ - -The user can also specify the priors for the regression parameters. For the field, the user specifies the pc priors for the range, $\rho_{min}$ and $\rho_{prob}$, where $P(\rho < \rho_{min}) = \rho_{prob}$, and the variation, $\sigma_{min}$ and $\sigma_{prob}$, where $P(\sigma > \sigma_{min}) = \sigma_{prob}$, in the field. For the iid effect, the user also specifies pc priors. - -By default the model contains a spatial field and a polygon iid effect. These can be turned off in the `fit_model` function, using `field = FALSE` or `iid = FALSE`. - - -```{r, fig.show='hold', eval=isINLA} -model_result <- disag_model(data_for_model, - iterations = 1000, - family = 'poisson', - link = 'log', - priors = list(priormean_intercept = 0, - priorsd_intercept = 2, - priormean_slope = 0.0, - priorsd_slope = 0.4, - prior_rho_min = 3, - prior_rho_prob = 0.01, - prior_sigma_max = 1, - prior_sigma_prob = 0.01, - prior_iideffect_sd_max = 0.05, - prior_iideffect_sd_prob = 0.01)) -``` - -```{r, fig.show='hold', eval=isINLA} -plot(model_result) -``` - -Now we have the results from the model of the fitted parameters, we can predict Leukemia incidence rate at fine-scale (the scale of the covariate data) across New York. The `predict` function takes the model result and predicts both the mean raster surface and predicts and summarises `N` parameter draws, where `N` is set by the user (default 100). The uncertainty is summarirised via the confidence interval set by the user (default 95% CI). - - -```{r, fig.show='hold', eval=isINLA} -preds <- predict(model_result, - N = 100, - CI = 0.95) - -plot(preds) -``` From 4e43033c685082f0fa7bdbe5e51c054b02d8613b Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 1 Jul 2022 11:14:35 +0100 Subject: [PATCH 027/168] Readd vignette --- vignettes/disaggregation.Rmd | 172 +++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 vignettes/disaggregation.Rmd diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd new file mode 100644 index 0000000..7ba592f --- /dev/null +++ b/vignettes/disaggregation.Rmd @@ -0,0 +1,172 @@ +--- +title: "A short introduction to the disaggregation package" +author: "Anita Nandi" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{A short introduction to the disaggregation package} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + cache = TRUE, + fig.width = 7, + eval = FALSE +) +``` + +```{r, echo=FALSE, eval = TRUE} +isINLA <- requireNamespace('INLA', quietly = TRUE) +``` + +The **disaggregation** package contains functions to run Bayesian disaggregation models. Aggregated response data over large heterongenous regions can be used alongside fine-scale covariate information to predict fine-scale response across the region. The github page for this package can be found [here](https://github.com/aknandi/disaggregation). + +Install **disggregation** using: + +```r +devtools::install_github("aknandi/disaggregation") +``` + +The key functions are `prepare_data`, `fit_model` and `predict`. The `prepare_data` function takes the aggregated data and covariate data to be used in the model and produces an object to be use by `fit_model`. This functions runs the disaggregation model and the out can be passed to `predict` to produce fine-scale predicted maps of the response variable. + +To use the disaggregation `prepare_data` fuction, you must have the aggregated data as a `SpatialPolygonDataFrame` object and a `RasterStack` of the covariate data to be used in the model. + +## Example + +We will demonstrate an example of the **disaggregation** package using areal data of leukemia incidence in New York, using data from the package `SpatialEpi`. + +```{r} +library(SpatialEpi, quietly = TRUE) +library(dplyr, quietly = TRUE) +library(sp, quietly = TRUE) +library(raster, quietly = TRUE) +library(disaggregation, quietly = TRUE) + +map <- NYleukemia$spatial.polygon +df <- NYleukemia$data + +polygon_data <- SpatialPolygonsDataFrame(map, df) +polygon_data +``` + +Now we simulate two covariate rasters for the area of interest and make a `RasterStack`. They are simulated at the resolution of approximately 1km2. + +```{r, fig.show='hold'} +extent_in_km <- 111*(polygon_data@bbox[, 2] - polygon_data@bbox[, 1]) +n_pixels_x <- floor(extent_in_km[[1]]) +n_pixels_y <- floor(extent_in_km[[2]]) +r <- raster::raster(ncol = n_pixels_x, nrow = n_pixels_y) +r <- raster::setExtent(r, raster::extent(polygon_data)) +r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_x != 0, x %% n_pixels_x, n_pixels_x), 3)) +r2 <- raster::raster(ncol = n_pixels_x, nrow = n_pixels_y) +r2 <- raster::setExtent(r2, raster::extent(polygon_data)) +r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_y), 3)) +cov_stack <- raster::stack(r, r2) +cov_stack <- raster::scale(cov_stack) +``` + +We also create a population raster. This is to allow the model to correctly aggregated the pixel values to the polygon level. For this simple example we assume that the population within each polygon is uniformly distributed. + +```{r, fig.show='hold'} +extracted <- raster::extract(r, polygon_data) +n_cells <- sapply(extracted, length) +polygon_data@data$pop_per_cell <- polygon_data@data$population/n_cells +pop_raster <- rasterize(polygon_data, cov_stack, field = 'pop_per_cell') + +``` + +To correct small inconsistencies in the polygon geometry, we run the line below + +```{r, fig.show='hold'} +polygon_data <- rgeos::gBuffer(polygon_data, byid = TRUE, width = 0) +``` + +Now we have setup the data we can use the `prepare_data` function to create the objects needed to run the disaggregation model. The name of the response variable and id variable in the `SpatialPolygonsDataFrame` should be specified. + +The user can also control the parameters of the mesh that is used to create the spatial field. The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. The mesh parameters: `concave`, `convex` and `resolution` refer to the parameters used to create the mesh boundary using the [inla.noncovex.hull function](https://rdrr.io/github/andrewzm/INLA/man/inla.nonconvex.hull.html), while the mesh parameters `max.edge`, `cut` and `offset` refer to the parameters used to create the mesh using the [inla.mesh.2d function](https://rdrr.io/github/andrewzm/INLA/man/inla.mesh.2d.html). + +```{r, fig.show='hold'} +data_for_model <- prepare_data(polygon_data, + cov_stack, + pop_raster, + response_var = 'cases', + id_var = 'censustract.FIPS', + mesh.args = list(cut = 0.01, + offset = c(0.1, 0.5), + max.edge = c(0.1, 0.2), + resolution = 250), + na.action = TRUE, + ncores = 1) +``` + +```{r, fig.show='hold'} +plot(data_for_model) +``` + +Now have our data object we are ready to run the model. Here we can specify the likelihood function as gaussian, binomial or poisson, and we can specify the link function as logit, log or identity. The disaggregation model makes predictions at the pixel level: + +$link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i$ + +where $X$ are the covariates, $GP$ is the gaussian random field and $u_i$ is the iid random effect. The pixel predictions are then aggregated to the polygon level using the weighted sum (via the aggregation raster, $agg_i$): + +$cases_j = \sum_{i \epsilon j} pred_i \times agg_i$ + +$rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}$ + +The different likelihood correspond to slightly different models ($y_j$ is the repsonse count data): + +**Gaussian** ($\sigma_j$ is the dispersion of the polygon data), + +$dnorm(y_j/\sum agg_i, rate_j, \sigma_j)$ + +Here $\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i$, where $\sigma$ is the dispersion of the pixel data, a parameter learnt by the model. + + +**Binomial** (For a survey in polygon j, $y_j$ is the number positive and $N_j$ is the number tested) + +$dbinom(y_j, N_j, rate_j)$ + +**Poisson** (predicts incidence count) + +$dpois(y_j, cases_j)$ + +The user can also specify the priors for the regression parameters. For the field, the user specifies the pc priors for the range, $\rho_{min}$ and $\rho_{prob}$, where $P(\rho < \rho_{min}) = \rho_{prob}$, and the variation, $\sigma_{min}$ and $\sigma_{prob}$, where $P(\sigma > \sigma_{min}) = \sigma_{prob}$, in the field. For the iid effect, the user also specifies pc priors. + +By default the model contains a spatial field and a polygon iid effect. These can be turned off in the `disag_model` function, using `field = FALSE` or `iid = FALSE`. + + +```{r, fig.show='hold', eval=isINLA} +model_result <- disag_model(data_for_model, + iterations = 1000, + family = 'poisson', + link = 'log', + priors = list(priormean_intercept = 0, + priorsd_intercept = 2, + priormean_slope = 0.0, + priorsd_slope = 0.4, + prior_rho_min = 3, + prior_rho_prob = 0.01, + prior_sigma_max = 1, + prior_sigma_prob = 0.01, + prior_iideffect_sd_max = 0.05, + prior_iideffect_sd_prob = 0.01)) +``` + +```{r, fig.show='hold', eval=isINLA} +plot(model_result) +``` + +Now we have the results from the model of the fitted parameters, we can predict Leukemia incidence rate at fine-scale (the scale of the covariate data) across New York. The `predict` function takes the model result and predicts both the mean raster surface and predicts and summarises `N` parameter draws, where `N` is set by the user (default 100). The uncertainty is summarirised via the confidence interval set by the user (default 95% CI). + + +```{r, fig.show='hold', eval=isINLA} +preds <- predict(model_result, + N = 100, + CI = 0.95) + +plot(preds) +``` From f59a4ceebdad31373dc2c91e4ffd8fcd74c039f2 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 1 Jul 2022 11:38:18 +0100 Subject: [PATCH 028/168] cut out most of vignette to work out how to get it to wwork. --- vignettes/disaggregation.Rmd | 131 ----------------------------------- 1 file changed, 131 deletions(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 7ba592f..0c66a36 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -39,134 +39,3 @@ To use the disaggregation `prepare_data` fuction, you must have the aggregated d We will demonstrate an example of the **disaggregation** package using areal data of leukemia incidence in New York, using data from the package `SpatialEpi`. -```{r} -library(SpatialEpi, quietly = TRUE) -library(dplyr, quietly = TRUE) -library(sp, quietly = TRUE) -library(raster, quietly = TRUE) -library(disaggregation, quietly = TRUE) - -map <- NYleukemia$spatial.polygon -df <- NYleukemia$data - -polygon_data <- SpatialPolygonsDataFrame(map, df) -polygon_data -``` - -Now we simulate two covariate rasters for the area of interest and make a `RasterStack`. They are simulated at the resolution of approximately 1km2. - -```{r, fig.show='hold'} -extent_in_km <- 111*(polygon_data@bbox[, 2] - polygon_data@bbox[, 1]) -n_pixels_x <- floor(extent_in_km[[1]]) -n_pixels_y <- floor(extent_in_km[[2]]) -r <- raster::raster(ncol = n_pixels_x, nrow = n_pixels_y) -r <- raster::setExtent(r, raster::extent(polygon_data)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_x != 0, x %% n_pixels_x, n_pixels_x), 3)) -r2 <- raster::raster(ncol = n_pixels_x, nrow = n_pixels_y) -r2 <- raster::setExtent(r2, raster::extent(polygon_data)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_y), 3)) -cov_stack <- raster::stack(r, r2) -cov_stack <- raster::scale(cov_stack) -``` - -We also create a population raster. This is to allow the model to correctly aggregated the pixel values to the polygon level. For this simple example we assume that the population within each polygon is uniformly distributed. - -```{r, fig.show='hold'} -extracted <- raster::extract(r, polygon_data) -n_cells <- sapply(extracted, length) -polygon_data@data$pop_per_cell <- polygon_data@data$population/n_cells -pop_raster <- rasterize(polygon_data, cov_stack, field = 'pop_per_cell') - -``` - -To correct small inconsistencies in the polygon geometry, we run the line below - -```{r, fig.show='hold'} -polygon_data <- rgeos::gBuffer(polygon_data, byid = TRUE, width = 0) -``` - -Now we have setup the data we can use the `prepare_data` function to create the objects needed to run the disaggregation model. The name of the response variable and id variable in the `SpatialPolygonsDataFrame` should be specified. - -The user can also control the parameters of the mesh that is used to create the spatial field. The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. The mesh parameters: `concave`, `convex` and `resolution` refer to the parameters used to create the mesh boundary using the [inla.noncovex.hull function](https://rdrr.io/github/andrewzm/INLA/man/inla.nonconvex.hull.html), while the mesh parameters `max.edge`, `cut` and `offset` refer to the parameters used to create the mesh using the [inla.mesh.2d function](https://rdrr.io/github/andrewzm/INLA/man/inla.mesh.2d.html). - -```{r, fig.show='hold'} -data_for_model <- prepare_data(polygon_data, - cov_stack, - pop_raster, - response_var = 'cases', - id_var = 'censustract.FIPS', - mesh.args = list(cut = 0.01, - offset = c(0.1, 0.5), - max.edge = c(0.1, 0.2), - resolution = 250), - na.action = TRUE, - ncores = 1) -``` - -```{r, fig.show='hold'} -plot(data_for_model) -``` - -Now have our data object we are ready to run the model. Here we can specify the likelihood function as gaussian, binomial or poisson, and we can specify the link function as logit, log or identity. The disaggregation model makes predictions at the pixel level: - -$link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i$ - -where $X$ are the covariates, $GP$ is the gaussian random field and $u_i$ is the iid random effect. The pixel predictions are then aggregated to the polygon level using the weighted sum (via the aggregation raster, $agg_i$): - -$cases_j = \sum_{i \epsilon j} pred_i \times agg_i$ - -$rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}$ - -The different likelihood correspond to slightly different models ($y_j$ is the repsonse count data): - -**Gaussian** ($\sigma_j$ is the dispersion of the polygon data), - -$dnorm(y_j/\sum agg_i, rate_j, \sigma_j)$ - -Here $\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i$, where $\sigma$ is the dispersion of the pixel data, a parameter learnt by the model. - - -**Binomial** (For a survey in polygon j, $y_j$ is the number positive and $N_j$ is the number tested) - -$dbinom(y_j, N_j, rate_j)$ - -**Poisson** (predicts incidence count) - -$dpois(y_j, cases_j)$ - -The user can also specify the priors for the regression parameters. For the field, the user specifies the pc priors for the range, $\rho_{min}$ and $\rho_{prob}$, where $P(\rho < \rho_{min}) = \rho_{prob}$, and the variation, $\sigma_{min}$ and $\sigma_{prob}$, where $P(\sigma > \sigma_{min}) = \sigma_{prob}$, in the field. For the iid effect, the user also specifies pc priors. - -By default the model contains a spatial field and a polygon iid effect. These can be turned off in the `disag_model` function, using `field = FALSE` or `iid = FALSE`. - - -```{r, fig.show='hold', eval=isINLA} -model_result <- disag_model(data_for_model, - iterations = 1000, - family = 'poisson', - link = 'log', - priors = list(priormean_intercept = 0, - priorsd_intercept = 2, - priormean_slope = 0.0, - priorsd_slope = 0.4, - prior_rho_min = 3, - prior_rho_prob = 0.01, - prior_sigma_max = 1, - prior_sigma_prob = 0.01, - prior_iideffect_sd_max = 0.05, - prior_iideffect_sd_prob = 0.01)) -``` - -```{r, fig.show='hold', eval=isINLA} -plot(model_result) -``` - -Now we have the results from the model of the fitted parameters, we can predict Leukemia incidence rate at fine-scale (the scale of the covariate data) across New York. The `predict` function takes the model result and predicts both the mean raster surface and predicts and summarises `N` parameter draws, where `N` is set by the user (default 100). The uncertainty is summarirised via the confidence interval set by the user (default 95% CI). - - -```{r, fig.show='hold', eval=isINLA} -preds <- predict(model_result, - N = 100, - CI = 0.95) - -plot(preds) -``` From 9a530d68f3b6e7c4fa18ad9379f547400e5f3317 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 1 Jul 2022 13:16:21 +0100 Subject: [PATCH 029/168] Readd libs to vignette. Probably the problem. --- vignettes/disaggregation.Rmd | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 0c66a36..fef2e52 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -39,3 +39,16 @@ To use the disaggregation `prepare_data` fuction, you must have the aggregated d We will demonstrate an example of the **disaggregation** package using areal data of leukemia incidence in New York, using data from the package `SpatialEpi`. +```{r} +library(SpatialEpi, quietly = TRUE) +library(dplyr, quietly = TRUE) +library(sp, quietly = TRUE) +library(raster, quietly = TRUE) +library(disaggregation, quietly = TRUE) + +map <- NYleukemia$spatial.polygon +df <- NYleukemia$data + +polygon_data <- SpatialPolygonsDataFrame(map, df) +polygon_data +``` From 0c5b8b747ab7f206d31fbecf48ef1e6a2a0369df Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 1 Jul 2022 13:50:12 +0100 Subject: [PATCH 030/168] more vignette. --- vignettes/disaggregation.Rmd | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index fef2e52..0c0cf2f 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -52,3 +52,38 @@ df <- NYleukemia$data polygon_data <- SpatialPolygonsDataFrame(map, df) polygon_data ``` + +Now we simulate two covariate rasters for the area of interest and make a `RasterStack`. They are simulated at the resolution of approximately 1km2. + +```{r, fig.show='hold'} +extent_in_km <- 111*(polygon_data@bbox[, 2] - polygon_data@bbox[, 1]) +n_pixels_x <- floor(extent_in_km[[1]]) +n_pixels_y <- floor(extent_in_km[[2]]) +r <- raster::raster(ncol = n_pixels_x, nrow = n_pixels_y) +r <- raster::setExtent(r, raster::extent(polygon_data)) +r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_x != 0, x %% n_pixels_x, n_pixels_x), 3)) +r2 <- raster::raster(ncol = n_pixels_x, nrow = n_pixels_y) +r2 <- raster::setExtent(r2, raster::extent(polygon_data)) +r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_y), 3)) +cov_stack <- raster::stack(r, r2) +cov_stack <- raster::scale(cov_stack) +``` + +We also create a population raster. This is to allow the model to correctly aggregated the pixel values to the polygon level. For this simple example we assume that the population within each polygon is uniformly distributed. + +```{r, fig.show='hold'} +extracted <- raster::extract(r, polygon_data) +n_cells <- sapply(extracted, length) +polygon_data@data$pop_per_cell <- polygon_data@data$population/n_cells +pop_raster <- rasterize(polygon_data, cov_stack, field = 'pop_per_cell') + +``` + +To correct small inconsistencies in the polygon geometry, we run the line below + +```{r, fig.show='hold'} +polygon_data <- rgeos::gBuffer(polygon_data, byid = TRUE, width = 0) +``` + +Now we have setup the data we can use the `prepare_data` function to create the objects needed to run the disaggregation model. The name of the response variable and id variable in the `SpatialPolygonsDataFrame` should be specified. + From ea15b2ef737fec5f760f9298c9bb6fb89d48b921 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 1 Jul 2022 14:10:21 +0100 Subject: [PATCH 031/168] more vignette. --- vignettes/disaggregation.Rmd | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 0c0cf2f..d448a3c 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -87,3 +87,23 @@ polygon_data <- rgeos::gBuffer(polygon_data, byid = TRUE, width = 0) Now we have setup the data we can use the `prepare_data` function to create the objects needed to run the disaggregation model. The name of the response variable and id variable in the `SpatialPolygonsDataFrame` should be specified. +The user can also control the parameters of the mesh that is used to create the spatial field. The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. The mesh parameters: `concave`, `convex` and `resolution` refer to the parameters used to create the mesh boundary using the [inla.noncovex.hull function](https://rdrr.io/github/andrewzm/INLA/man/inla.nonconvex.hull.html), while the mesh parameters `max.edge`, `cut` and `offset` refer to the parameters used to create the mesh using the [inla.mesh.2d function](https://rdrr.io/github/andrewzm/INLA/man/inla.mesh.2d.html). + +```{r, fig.show='hold'} +data_for_model <- prepare_data(polygon_data, + cov_stack, + pop_raster, + response_var = 'cases', + id_var = 'censustract.FIPS', + mesh.args = list(cut = 0.01, + offset = c(0.1, 0.5), + max.edge = c(0.1, 0.2), + resolution = 250), + na.action = TRUE, + ncores = 1) +``` + +```{r, fig.show='hold'} +plot(data_for_model) +``` + From c4109d11188a6869766b4591a6bdeb1790d8492b Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 1 Jul 2022 14:50:44 +0100 Subject: [PATCH 032/168] Full vignette again. --- vignettes/disaggregation.Rmd | 77 ++++++++++++++++++++++++++++++++---- 1 file changed, 70 insertions(+), 7 deletions(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index d448a3c..8141360 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -1,15 +1,15 @@ --- -title: "A short introduction to the disaggregation package" + title: "A short introduction to the disaggregation package" author: "Anita Nandi" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{A short introduction to the disaggregation package} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} +%\VignetteEngine{knitr::rmarkdown} +%\VignetteEncoding{UTF-8} --- - -```{r setup, include = FALSE} + + ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", @@ -26,8 +26,8 @@ isINLA <- requireNamespace('INLA', quietly = TRUE) The **disaggregation** package contains functions to run Bayesian disaggregation models. Aggregated response data over large heterongenous regions can be used alongside fine-scale covariate information to predict fine-scale response across the region. The github page for this package can be found [here](https://github.com/aknandi/disaggregation). Install **disggregation** using: - -```r + + ```r devtools::install_github("aknandi/disaggregation") ``` @@ -107,3 +107,66 @@ data_for_model <- prepare_data(polygon_data, plot(data_for_model) ``` +Now have our data object we are ready to run the model. Here we can specify the likelihood function as gaussian, binomial or poisson, and we can specify the link function as logit, log or identity. The disaggregation model makes predictions at the pixel level: + + $link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i$ + + where $X$ are the covariates, $GP$ is the gaussian random field and $u_i$ is the iid random effect. The pixel predictions are then aggregated to the polygon level using the weighted sum (via the aggregation raster, $agg_i$): + + $cases_j = \sum_{i \epsilon j} pred_i \times agg_i$ + + $rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}$ + + The different likelihood correspond to slightly different models ($y_j$ is the repsonse count data): + + **Gaussian** ($\sigma_j$ is the dispersion of the polygon data), + +$dnorm(y_j/\sum agg_i, rate_j, \sigma_j)$ + + Here $\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i$, where $\sigma$ is the dispersion of the pixel data, a parameter learnt by the model. + + +**Binomial** (For a survey in polygon j, $y_j$ is the number positive and $N_j$ is the number tested) + +$dbinom(y_j, N_j, rate_j)$ + + **Poisson** (predicts incidence count) + +$dpois(y_j, cases_j)$ + + The user can also specify the priors for the regression parameters. For the field, the user specifies the pc priors for the range, $\rho_{min}$ and $\rho_{prob}$, where $P(\rho < \rho_{min}) = \rho_{prob}$, and the variation, $\sigma_{min}$ and $\sigma_{prob}$, where $P(\sigma > \sigma_{min}) = \sigma_{prob}$, in the field. For the iid effect, the user also specifies pc priors. + +By default the model contains a spatial field and a polygon iid effect. These can be turned off in the `disag_model` function, using `field = FALSE` or `iid = FALSE`. + + +```{r, fig.show='hold', eval=isINLA} +model_result <- disag_model(data_for_model, + iterations = 1000, + family = 'poisson', + link = 'log', + priors = list(priormean_intercept = 0, + priorsd_intercept = 2, + priormean_slope = 0.0, + priorsd_slope = 0.4, + prior_rho_min = 3, + prior_rho_prob = 0.01, + prior_sigma_max = 1, + prior_sigma_prob = 0.01, + prior_iideffect_sd_max = 0.05, + prior_iideffect_sd_prob = 0.01)) +``` + +```{r, fig.show='hold', eval=isINLA} +plot(model_result) +``` + +Now we have the results from the model of the fitted parameters, we can predict Leukemia incidence rate at fine-scale (the scale of the covariate data) across New York. The `predict` function takes the model result and predicts both the mean raster surface and predicts and summarises `N` parameter draws, where `N` is set by the user (default 100). The uncertainty is summarirised via the confidence interval set by the user (default 95% CI). + + +```{r, fig.show='hold', eval=isINLA} +preds <- predict(model_result, + N = 100, + CI = 0.95) + +plot(preds) +``` From cb8763d11175af3b7ac2a5e996544ec9c44ecf68 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 1 Jul 2022 15:21:07 +0100 Subject: [PATCH 033/168] Vignette fixs --- vignettes/disaggregation.Rmd | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 8141360..b63d560 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -1,25 +1,25 @@ --- - title: "A short introduction to the disaggregation package" +title: "A short introduction to the disaggregation package" author: "Anita Nandi" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{A short introduction to the disaggregation package} -%\VignetteEngine{knitr::rmarkdown} -%\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} --- - ```{r setup, include = FALSE} +```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", cache = TRUE, fig.width = 7, - eval = FALSE + eval = TRUE ) ``` -```{r, echo=FALSE, eval = TRUE} +```{r, echo = FALSE, eval = TRUE} isINLA <- requireNamespace('INLA', quietly = TRUE) ``` @@ -27,7 +27,7 @@ The **disaggregation** package contains functions to run Bayesian disaggregation Install **disggregation** using: - ```r +```r devtools::install_github("aknandi/disaggregation") ``` @@ -89,7 +89,7 @@ Now we have setup the data we can use the `prepare_data` function to create the The user can also control the parameters of the mesh that is used to create the spatial field. The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. The mesh parameters: `concave`, `convex` and `resolution` refer to the parameters used to create the mesh boundary using the [inla.noncovex.hull function](https://rdrr.io/github/andrewzm/INLA/man/inla.nonconvex.hull.html), while the mesh parameters `max.edge`, `cut` and `offset` refer to the parameters used to create the mesh using the [inla.mesh.2d function](https://rdrr.io/github/andrewzm/INLA/man/inla.mesh.2d.html). -```{r, fig.show='hold'} +```{r, fig.show='hold', eval= isINLA} data_for_model <- prepare_data(polygon_data, cov_stack, pop_raster, @@ -103,7 +103,7 @@ data_for_model <- prepare_data(polygon_data, ncores = 1) ``` -```{r, fig.show='hold'} +```{r, fig.show='hold', eval= isINLA} plot(data_for_model) ``` From 531f4ddd5d4e009c38424fd2887267e8292f68da Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 4 Jul 2022 10:03:30 +0100 Subject: [PATCH 034/168] Add some info about mesh defaults. Closes #63. --- R/prepare_data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index 2443e9c..698f34c 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -32,7 +32,7 @@ #' @param id_var Name of column in SpatialPolygonDataFrame object with the polygon id. #' @param response_var Name of column in SpatialPolygonDataFrame object with the response data. #' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. -#' @param mesh.args list of parameters that control the mesh structure with the same names as used by INLA. +#' @param mesh.args list of parameters that control the mesh structure with the same names as used by INLA. Defaults assume degrees so may be very bad defaults in other units. #' @param na.action logical. If TRUE, NAs in response will be removed, covariate NAs will be given the median value, aggregation NAs will be set to zero. Default FALSE (NAs in response or covariate data within the polygons will give errors). #' @param makeMesh logical. If TRUE, build INLA mesh, takes some time. Default TRUE. #' @param ncores Number of cores used to perform covariate extraction. From 0f5fb98c7adbdb04b2f20b7620c18fa30ba656b7 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 4 Jul 2022 16:17:57 +0100 Subject: [PATCH 035/168] Make mesh creation dynamic. Closes #65 and hopefully makes #64 redundant. --- R/build_mesh.R | 14 ++++++++++---- R/prepare_data.R | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/R/build_mesh.R b/R/build_mesh.R index 6e7f48e..671b39e 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -46,12 +46,18 @@ build_mesh <- function(shapes, mesh.args = NULL) { stopifnot(inherits(shapes, 'SpatialPolygons')) if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) + limits <- sp::bbox(shapes) + hypotenuse <- sqrt((limits[1,2] - limits[1,1])^2 + (limits[2,2] - limits[2,1])^2) + maxedge <- hypotenuse/10 + + pars <- list(convex = -0.01, concave = -0.5, resolution = 300, - max.edge = c(3.0, 8), - cut = 0.4, - offset = c(1, 15)) + max.edge = c(maxedge, maxedge * 2), + cut = 0.1, + offset = c(hypotenuse / 10, hypotenuse / 10)) + pars[names(mesh.args)] <- mesh.args @@ -74,6 +80,6 @@ build_mesh <- function(shapes, mesh.args = NULL) { cut = pars$cut, offset = pars$offset) - + return(mesh) } diff --git a/R/prepare_data.R b/R/prepare_data.R index 698f34c..2443e9c 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -32,7 +32,7 @@ #' @param id_var Name of column in SpatialPolygonDataFrame object with the polygon id. #' @param response_var Name of column in SpatialPolygonDataFrame object with the response data. #' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. -#' @param mesh.args list of parameters that control the mesh structure with the same names as used by INLA. Defaults assume degrees so may be very bad defaults in other units. +#' @param mesh.args list of parameters that control the mesh structure with the same names as used by INLA. #' @param na.action logical. If TRUE, NAs in response will be removed, covariate NAs will be given the median value, aggregation NAs will be set to zero. Default FALSE (NAs in response or covariate data within the polygons will give errors). #' @param makeMesh logical. If TRUE, build INLA mesh, takes some time. Default TRUE. #' @param ncores Number of cores used to perform covariate extraction. From 6e70b457c2982de45f99d81ee80420984aefc578 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Tue, 5 Jul 2022 21:22:33 +0100 Subject: [PATCH 036/168] trigger actions. From e3059f35a24ee176eeafe6adced87e9fcfe36a92 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 15 Jul 2022 08:04:45 +0100 Subject: [PATCH 037/168] SpatialEpi back in to suggests. --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1bcaa96..2df5355 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,7 +40,8 @@ Suggests: testthat, INLA, knitr, - rmarkdown + rmarkdown, + SpatialEpi LinkingTo: TMB, RcppEigen From a4630adb9790be2011918b25c6aaff37eda5b4d6 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 15 Jul 2022 08:24:06 +0100 Subject: [PATCH 038/168] Readd other OSs to actions. --- .github/workflows/R-CMD-check.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 5892147..7c60bf7 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -22,10 +22,10 @@ jobs: matrix: config: - {os: windows-latest, r: 'release'} - # - {os: macOS-latest, r: 'release'} -# - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} -# - {os: ubuntu-20.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} -# - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: macOS-latest, r: 'release'} + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: ubuntu-20.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true From 2a411f83eb29cb954cf5c9dcf31c633724442b40 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 15 Jul 2022 08:34:19 +0100 Subject: [PATCH 039/168] Add nosuggests tests to check package test doesnt fail without inla. --- .github/workflows/R-CMD-check-nosuggests.yaml | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 .github/workflows/R-CMD-check-nosuggests.yaml diff --git a/.github/workflows/R-CMD-check-nosuggests.yaml b/.github/workflows/R-CMD-check-nosuggests.yaml new file mode 100644 index 0000000..5a266fe --- /dev/null +++ b/.github/workflows/R-CMD-check-nosuggests.yaml @@ -0,0 +1,96 @@ +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +# +# Largely copied from: https://github.com/inlabru-org/inlabru/blob/devel/.github/workflows/R-CMD-check-no-suggests.yaml +# Want to test without suggests to ensure things don't fail on cran when INLA isn't there. + +on: + push: + branches: + '**' + pull_request: + branches: + - devel + - master + +name: R-CMD-check-no-suggests + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} + - {os: macOS-latest, r: 'release'} + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + extra-repositories: "https://inla.r-inla-download.org/R/testing" + + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Install system dependencies on MacOS (X11, gdal) + if: runner.os == 'macOS' + run: | + brew install --cask xquartz + brew install pkg-config + brew install proj@9 + brew install gdal + + - name: Has inla? Check. + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + "INLA" %in% pkgs + shell: Rscript {0} + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"hard"' + extra-packages: | + rcmdcheck + testthat + + - name: Has inla? Check, and remove. + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + "INLA" %in% pkgs + if ("INLA" %in% pkgs) { + remove.packages("INLA") + } + shell: Rscript {0} + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + + - uses: r-lib/actions/check-r-package@v2 + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + _R_CHECK_FORCE_SUGGESTS_: false + with: + build_args: 'c("--no-manual", "--no-build-vignettes")' + args: 'c("--no-manual", "--ignore-vignettes", "--as-cran")' + + + + \ No newline at end of file From 974f8bf0ba960bcccc601e1a10e46fe3b837d21d Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 15 Jul 2022 08:34:55 +0100 Subject: [PATCH 040/168] Change name of nosuggets. --- .../{R-CMD-check-nosuggests.yaml => R-CMD-check-no-suggests.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{R-CMD-check-nosuggests.yaml => R-CMD-check-no-suggests.yaml} (100%) diff --git a/.github/workflows/R-CMD-check-nosuggests.yaml b/.github/workflows/R-CMD-check-no-suggests.yaml similarity index 100% rename from .github/workflows/R-CMD-check-nosuggests.yaml rename to .github/workflows/R-CMD-check-no-suggests.yaml From cc275b60e7aae3d1da0d4142f653728ad1e0ad71 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 15 Jul 2022 08:36:48 +0100 Subject: [PATCH 041/168] Remove macosx for now. --- .github/workflows/R-CMD-check-no-suggests.yaml | 2 +- .github/workflows/R-CMD-check.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check-no-suggests.yaml b/.github/workflows/R-CMD-check-no-suggests.yaml index 5a266fe..5b8be7c 100644 --- a/.github/workflows/R-CMD-check-no-suggests.yaml +++ b/.github/workflows/R-CMD-check-no-suggests.yaml @@ -26,7 +26,7 @@ jobs: matrix: config: - {os: windows-latest, r: 'release'} - - {os: macOS-latest, r: 'release'} + # - {os: macOS-latest, r: 'release'} - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 7c60bf7..9deddea 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -22,7 +22,7 @@ jobs: matrix: config: - {os: windows-latest, r: 'release'} - - {os: macOS-latest, r: 'release'} + # - {os: macOS-latest, r: 'release'} - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - {os: ubuntu-20.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} From 4b927bbf104b6ccdb5415e21fbd052ea9800da04 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 15 Jul 2022 09:41:43 +0100 Subject: [PATCH 042/168] Trigger actions. From d9d5df64e25e56537c4602d57746fbeb0058967d Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 15 Jul 2022 10:28:08 +0100 Subject: [PATCH 043/168] Formatting problems in no suggests. --- .../workflows/R-CMD-check-no-suggests.yaml | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/R-CMD-check-no-suggests.yaml b/.github/workflows/R-CMD-check-no-suggests.yaml index 5b8be7c..0b0bd92 100644 --- a/.github/workflows/R-CMD-check-no-suggests.yaml +++ b/.github/workflows/R-CMD-check-no-suggests.yaml @@ -48,15 +48,15 @@ jobs: if: runner.os == 'macOS' run: | brew install --cask xquartz - brew install pkg-config - brew install proj@9 - brew install gdal + brew install pkg-config + brew install proj@9 + brew install gdal - name: Has inla? Check. run: | options(width = 100) - pkgs <- installed.packages()[, "Package"] - "INLA" %in% pkgs + pkgs <- installed.packages()[, "Package"] + "INLA" %in% pkgs shell: Rscript {0} - uses: r-lib/actions/setup-r-dependencies@v2 @@ -64,32 +64,32 @@ jobs: dependencies: '"hard"' extra-packages: | rcmdcheck - testthat + testthat - name: Has inla? Check, and remove. run: | options(width = 100) - pkgs <- installed.packages()[, "Package"] - "INLA" %in% pkgs - if ("INLA" %in% pkgs) { - remove.packages("INLA") - } + pkgs <- installed.packages()[, "Package"] + "INLA" %in% pkgs + if ("INLA" %in% pkgs) { + remove.packages("INLA") + } shell: Rscript {0} - name: Session info run: | options(width = 100) - pkgs <- installed.packages()[, "Package"] - sessioninfo::session_info(pkgs, include_base = TRUE) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) shell: Rscript {0} - uses: r-lib/actions/check-r-package@v2 env: _R_CHECK_CRAN_INCOMING_REMOTE_: false - _R_CHECK_FORCE_SUGGESTS_: false + _R_CHECK_FORCE_SUGGESTS_: false with: build_args: 'c("--no-manual", "--no-build-vignettes")' - args: 'c("--no-manual", "--ignore-vignettes", "--as-cran")' + args: 'c("--no-manual", "--ignore-vignettes", "--as-cran")' From c2460ff71e9606d5fd333a14f2113c00ec394e6f Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 15 Jul 2022 10:30:35 +0100 Subject: [PATCH 044/168] More formatting. --- .github/workflows/R-CMD-check-no-suggests.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/R-CMD-check-no-suggests.yaml b/.github/workflows/R-CMD-check-no-suggests.yaml index 0b0bd92..2ba3b1c 100644 --- a/.github/workflows/R-CMD-check-no-suggests.yaml +++ b/.github/workflows/R-CMD-check-no-suggests.yaml @@ -32,7 +32,7 @@ jobs: env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} + RSPM: ${{ matrix.config.rspm }} steps: - uses: actions/checkout@v2 @@ -62,9 +62,9 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: dependencies: '"hard"' - extra-packages: | - rcmdcheck - testthat + extra-packages: | + rcmdcheck + testthat - name: Has inla? Check, and remove. run: | From ee43a8b9f6515520a4b1c580263389865e3cde55 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 15 Jul 2022 10:32:04 +0100 Subject: [PATCH 045/168] Even more formatting. --- .github/workflows/R-CMD-check-no-suggests.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check-no-suggests.yaml b/.github/workflows/R-CMD-check-no-suggests.yaml index 2ba3b1c..07b2b77 100644 --- a/.github/workflows/R-CMD-check-no-suggests.yaml +++ b/.github/workflows/R-CMD-check-no-suggests.yaml @@ -6,8 +6,8 @@ on: push: - branches: - '**' + branches: + '**' pull_request: branches: - devel From f0b560509ba88c509351c05eb178e979f5da426c Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 15 Jul 2022 10:35:03 +0100 Subject: [PATCH 046/168] Even more formatting 2. --- .../workflows/R-CMD-check-no-suggests.yaml | 138 +++++++++--------- 1 file changed, 69 insertions(+), 69 deletions(-) diff --git a/.github/workflows/R-CMD-check-no-suggests.yaml b/.github/workflows/R-CMD-check-no-suggests.yaml index 07b2b77..8e8f258 100644 --- a/.github/workflows/R-CMD-check-no-suggests.yaml +++ b/.github/workflows/R-CMD-check-no-suggests.yaml @@ -17,80 +17,80 @@ name: R-CMD-check-no-suggests jobs: R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: windows-latest, r: 'release'} - # - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - - steps: - - uses: actions/checkout@v2 + runs-on: ${{ matrix.config.os }} - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - extra-repositories: "https://inla.r-inla-download.org/R/testing" + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - uses: r-lib/actions/setup-pandoc@v2 + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} + # - {os: macOS-latest, r: 'release'} + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - name: Install system dependencies on MacOS (X11, gdal) - if: runner.os == 'macOS' - run: | - brew install --cask xquartz - brew install pkg-config - brew install proj@9 - brew install gdal + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} - - name: Has inla? Check. - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - "INLA" %in% pkgs - shell: Rscript {0} - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - dependencies: '"hard"' - extra-packages: | - rcmdcheck - testthat - - - name: Has inla? Check, and remove. - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - "INLA" %in% pkgs - if ("INLA" %in% pkgs) { - remove.packages("INLA") - } - shell: Rscript {0} - - - name: Session info - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - sessioninfo::session_info(pkgs, include_base = TRUE) - shell: Rscript {0} - - - uses: r-lib/actions/check-r-package@v2 - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - _R_CHECK_FORCE_SUGGESTS_: false - with: - build_args: 'c("--no-manual", "--no-build-vignettes")' - args: 'c("--no-manual", "--ignore-vignettes", "--as-cran")' + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + extra-repositories: "https://inla.r-inla-download.org/R/testing" + + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Install system dependencies on MacOS (X11, gdal) + if: runner.os == 'macOS' + run: | + brew install --cask xquartz + brew install pkg-config + brew install proj@9 + brew install gdal + + - name: Has inla? Check. + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + "INLA" %in% pkgs + shell: Rscript {0} + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"hard"' + extra-packages: | + rcmdcheck + testthat + + - name: Has inla? Check, and remove. + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + "INLA" %in% pkgs + if ("INLA" %in% pkgs) { + remove.packages("INLA") + } + shell: Rscript {0} + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + - uses: r-lib/actions/check-r-package@v2 + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + _R_CHECK_FORCE_SUGGESTS_: false + with: + build_args: 'c("--no-manual", "--no-build-vignettes")' + args: 'c("--no-manual", "--ignore-vignettes", "--as-cran")' + \ No newline at end of file From cf6cfb17d00b071cf50c2d70f0f631aba47f4d95 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 15 Jul 2022 11:27:27 +0100 Subject: [PATCH 047/168] Even more formatting 3. --- .github/workflows/R-CMD-check-no-suggests.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/R-CMD-check-no-suggests.yaml b/.github/workflows/R-CMD-check-no-suggests.yaml index 8e8f258..e4db198 100644 --- a/.github/workflows/R-CMD-check-no-suggests.yaml +++ b/.github/workflows/R-CMD-check-no-suggests.yaml @@ -23,12 +23,12 @@ jobs: strategy: fail-fast: false - matrix: - config: - - {os: windows-latest, r: 'release'} - # - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + matrix: + config: + - {os: windows-latest, r: 'release'} + # - {os: macOS-latest, r: 'release'} + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true From 4db5851d5b5d4a37d5af2b2523fc675b9c31e0a9 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 15 Jul 2022 11:54:01 +0100 Subject: [PATCH 048/168] Fix byte error thing. From cfe316979acf78aa6d0cf4c559375c97536d681c Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 15 Jul 2022 14:07:25 +0100 Subject: [PATCH 049/168] Fix badges --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 48e258d..7b7d14d 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,10 @@ Disaggregation ============== -[![CRANstatus](https://www.r-pkg.org/badges/version/dplyr)](https://cran.r-project.org/package=dplyr) -[![R-CMD-check](https://github.com/tidyverse/dplyr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidyverse/dplyr/actions/workflows/R-CMD-check.yaml) +[![CRANstatus](https://www.r-pkg.org/badges/version/disaggregation)](https://cran.r-project.org/package=disaggregation) +[![R-CMD-check](https://github.com/aknandi/disaggregation/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/aknandi/disaggregation/actions/workflows/R-CMD-check.yaml) + + [![codecov.io](https://codecov.io/github/aknandi/disaggregation/coverage.svg?branch=master)](https://codecov.io/github/aknandi/disaggregation?branch=master) A package containing useful functions for disaggregation modelling From 9bfab2c53ce1a762f4c217082dc87093e8b254eb Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 15 Jul 2022 14:09:32 +0100 Subject: [PATCH 050/168] Remove codecov because Im not sure were using it. --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 7b7d14d..96c535d 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,6 @@ Disaggregation [![R-CMD-check](https://github.com/aknandi/disaggregation/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/aknandi/disaggregation/actions/workflows/R-CMD-check.yaml) -[![codecov.io](https://codecov.io/github/aknandi/disaggregation/coverage.svg?branch=master)](https://codecov.io/github/aknandi/disaggregation?branch=master) - A package containing useful functions for disaggregation modelling Installation From 7ad3566bc121d015981368422f7c21fa44521cda Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 15 Jul 2022 14:34:30 +0100 Subject: [PATCH 051/168] Fix byte error thing again. From 2b3ed961465130b8ca75e3c7ad5285e3d0e3b457 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Tue, 16 Aug 2022 21:04:19 +0100 Subject: [PATCH 052/168] Doc with new roxygen. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2df5355..cb7f293 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,7 @@ Description: Fits disaggregation regression models using 'TMB' ('Template Model License: MIT + file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.1 Imports: maptools, raster, From 681f9dd4692a7b992c0582a4eada705adaa5045f Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Tue, 16 Aug 2022 21:04:39 +0100 Subject: [PATCH 053/168] Update cran comments. --- cran-comments.md | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/cran-comments.md b/cran-comments.md index 2a601ad..f9e4260 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,29 +1,23 @@ ## Update -This is a package update (version 0.1.3). The changes in this version are: +This is a package update (version 0.1.4). The changes in this version are: -* Renamed fit_model function to disag_model. Deprecated fit_model, will be removed in the next version +* Change maintainer. Anita Nandi has emailed to confirm. Anita has moved industry and no longer has time to maintain this package. +* Fixed mistake in model definition. We were adjusting the jacobian for a change of variables incorrectly. -* Renamed classes disag.data and fit.result to disag_data and disag_model +* Fixed predictions in models with no field -* Created a disag_predictions class which is returned by the predict function and contains the - mean and uncertainty predictions. This has replaced the predictions and uncertainty classes. - Plot, summary and print methods have been implemented for the disag_predictions class +* Better documentation for priors. -* Extracted the function make_model_object to allow the user to make a TMB model object on its own, - so it can be used in different optimiser or a for MCMC - -* Neatened up plot.disag_data function to produce 3 plots on the same canvas, with an optional which - argument for the user to choose which plots to display -* Made the summary and print function return different outputs. Print functions show minmial output, - summary function are more deatiled ## Test environments * local Windows 10, R 3.6.1 * Ubuntu 16.04.6 LTS (on travis-ci, devel and release) * win-builder (devel and release) + + ## R CMD check results There were no ERRORs or WARNINGs. @@ -52,5 +46,7 @@ There were 3 NOTEs: To compile large C++ source files on Windows a compilation flag is needed + + ## Downstream dependencies There are currently no downstream dependencies for this package From bff1f065ea2acdc2b25c16c5f965049c77fd3d64 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Tue, 16 Aug 2022 21:09:05 +0100 Subject: [PATCH 054/168] Add actions for html5 update. --- .github/workflows/R-CMD-check-HTML5.yaml | 42 ++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .github/workflows/R-CMD-check-HTML5.yaml diff --git a/.github/workflows/R-CMD-check-HTML5.yaml b/.github/workflows/R-CMD-check-HTML5.yaml new file mode 100644 index 0000000..f526f6f --- /dev/null +++ b/.github/workflows/R-CMD-check-HTML5.yaml @@ -0,0 +1,42 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [html5] + pull_request: + branches: [html5] + +name: R-CMD-check-HTML5 + +jobs: + R-CMD-check: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + _R_CHECK_RD_VALIDATE_RD2HTML_: TRUE + steps: + - uses: actions/checkout@v2 + + - name: Install pdflatex + run: sudo apt-get install texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-latex-extra + + - name: Install tidy + run: sudo apt install tidy + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: 'devel' + http-user-agent: 'release' + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + args: '"--as-cran"' + build_args: 'character()' + error-on: '"note"' From d40578d160f9b64c47f23f87ce241c3c28fbdb05 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Tue, 16 Aug 2022 21:15:32 +0100 Subject: [PATCH 055/168] Install INLA on html5 actions. --- .github/workflows/R-CMD-check-HTML5.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/R-CMD-check-HTML5.yaml b/.github/workflows/R-CMD-check-HTML5.yaml index f526f6f..1e71e50 100644 --- a/.github/workflows/R-CMD-check-HTML5.yaml +++ b/.github/workflows/R-CMD-check-HTML5.yaml @@ -18,6 +18,11 @@ jobs: steps: - uses: actions/checkout@v2 + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + extra-repositories: "https://inla.r-inla-download.org/R/stable" + - name: Install pdflatex run: sudo apt-get install texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-latex-extra From a6d0c6c4dd963ffd7925f3d80e1dc6944b62fbbc Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Sat, 20 Aug 2022 20:22:07 +0100 Subject: [PATCH 056/168] R build ignore github action files. --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.Rbuildignore b/.Rbuildignore index af84591..d96ffe0 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,3 +5,4 @@ README.md .travis.yml vignettes/disaggregation_cache/* cran-comments.md +^\.github$ From b52db1ba296d381f2dd974b848999adb29a3a076 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Sat, 20 Aug 2022 20:22:41 +0100 Subject: [PATCH 057/168] Add inla install to html5 actions. --- .github/workflows/R-CMD-check-HTML5.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/R-CMD-check-HTML5.yaml b/.github/workflows/R-CMD-check-HTML5.yaml index 1e71e50..847511a 100644 --- a/.github/workflows/R-CMD-check-HTML5.yaml +++ b/.github/workflows/R-CMD-check-HTML5.yaml @@ -15,6 +15,7 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes _R_CHECK_RD_VALIDATE_RD2HTML_: TRUE + steps: - uses: actions/checkout@v2 From a52765775d28b8229262ee331a4b10892c71f857 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Sat, 20 Aug 2022 20:35:40 +0100 Subject: [PATCH 058/168] Try to fix inla install in html5 actions. --- .github/workflows/R-CMD-check-HTML5.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check-HTML5.yaml b/.github/workflows/R-CMD-check-HTML5.yaml index 847511a..a1ad9e4 100644 --- a/.github/workflows/R-CMD-check-HTML5.yaml +++ b/.github/workflows/R-CMD-check-HTML5.yaml @@ -38,8 +38,9 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::rcmdcheck - needs: check + dependencies: '"all"' + extra-packages: | + rcmdcheck - uses: r-lib/actions/check-r-package@v2 with: From 9b8352bc7fd3c997ac903ff1e74f7f74fcf7e600 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Sat, 20 Aug 2022 21:30:34 +0100 Subject: [PATCH 059/168] Try to fix inla install in html5 actions 2. --- .github/workflows/R-CMD-check-HTML5.yaml | 25 +++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/.github/workflows/R-CMD-check-HTML5.yaml b/.github/workflows/R-CMD-check-HTML5.yaml index a1ad9e4..57a2b54 100644 --- a/.github/workflows/R-CMD-check-HTML5.yaml +++ b/.github/workflows/R-CMD-check-HTML5.yaml @@ -1,5 +1,5 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions on: push: branches: [html5] @@ -30,11 +30,15 @@ jobs: - name: Install tidy run: sudo apt install tidy - - uses: r-lib/actions/setup-r@v2 - with: - r-version: 'devel' - http-user-agent: 'release' - use-public-rspm: true + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Install system dependencies on MacOS (X11, gdal) + if: runner.os == 'macOS' + run: | + brew install --cask xquartz + brew install pkg-config + brew install proj@8 + brew install gdal - uses: r-lib/actions/setup-r-dependencies@v2 with: @@ -42,6 +46,13 @@ jobs: extra-packages: | rcmdcheck + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + - uses: r-lib/actions/check-r-package@v2 with: args: '"--as-cran"' From 177eab6173ade52a6949014055f64a959eedb311 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 24 Aug 2022 15:28:00 +0100 Subject: [PATCH 060/168] Run check html action as exactly the same as normal before readding the bit that maybe runs the actual check... --- .github/workflows/R-CMD-check-HTML5.yaml | 32 +++++++++++++----------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/.github/workflows/R-CMD-check-HTML5.yaml b/.github/workflows/R-CMD-check-HTML5.yaml index 57a2b54..7136ba5 100644 --- a/.github/workflows/R-CMD-check-HTML5.yaml +++ b/.github/workflows/R-CMD-check-HTML5.yaml @@ -6,16 +6,24 @@ on: pull_request: branches: [html5] -name: R-CMD-check-HTML5 +name: R-CMD-check jobs: R-CMD-check: - runs-on: ubuntu-latest + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} + env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - R_KEEP_PKG_SOURCE: yes - _R_CHECK_RD_VALIDATE_RD2HTML_: TRUE - + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + steps: - uses: actions/checkout@v2 @@ -24,12 +32,6 @@ jobs: r-version: ${{ matrix.config.r }} extra-repositories: "https://inla.r-inla-download.org/R/stable" - - name: Install pdflatex - run: sudo apt-get install texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-latex-extra - - - name: Install tidy - run: sudo apt install tidy - - uses: r-lib/actions/setup-pandoc@v2 - name: Install system dependencies on MacOS (X11, gdal) @@ -54,7 +56,7 @@ jobs: shell: Rscript {0} - uses: r-lib/actions/check-r-package@v2 + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false with: - args: '"--as-cran"' - build_args: 'character()' - error-on: '"note"' + args: 'c("--no-manual", "--as-cran")' From a5824abff64c62dcba14371b28f5b22bd1066dc7 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 24 Aug 2022 20:08:05 +0100 Subject: [PATCH 061/168] Now add in final line of html5 checks. --- .github/workflows/R-CMD-check-HTML5.yaml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/R-CMD-check-HTML5.yaml b/.github/workflows/R-CMD-check-HTML5.yaml index 7136ba5..333bfc6 100644 --- a/.github/workflows/R-CMD-check-HTML5.yaml +++ b/.github/workflows/R-CMD-check-HTML5.yaml @@ -32,6 +32,10 @@ jobs: r-version: ${{ matrix.config.r }} extra-repositories: "https://inla.r-inla-download.org/R/stable" + + - name: Install pdflatex + run: sudo apt-get install texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-latex-extra + - uses: r-lib/actions/setup-pandoc@v2 - name: Install system dependencies on MacOS (X11, gdal) @@ -56,7 +60,7 @@ jobs: shell: Rscript {0} - uses: r-lib/actions/check-r-package@v2 - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false with: - args: 'c("--no-manual", "--as-cran")' + args: '"--as-cran"' + build_args: 'character()' + error-on: '"note"' From 04915f9d89f9a7b985bd917c43f5e7bd76677a09 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 24 Aug 2022 20:19:07 +0100 Subject: [PATCH 062/168] Try again Now add in final line of html5 checks. --- .github/workflows/R-CMD-check-HTML5.yaml | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/.github/workflows/R-CMD-check-HTML5.yaml b/.github/workflows/R-CMD-check-HTML5.yaml index 333bfc6..9080726 100644 --- a/.github/workflows/R-CMD-check-HTML5.yaml +++ b/.github/workflows/R-CMD-check-HTML5.yaml @@ -6,20 +6,12 @@ on: pull_request: branches: [html5] -name: R-CMD-check +name: R-CMD-check-html5 -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: windows-latest, r: 'release'} +jobs: + HTML5-check: + runs-on: ubuntu-latest env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true RSPM: ${{ matrix.config.rspm }} From e7c41c894d63a310f64e8e975c1fffd48c342464 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 24 Aug 2022 21:13:48 +0100 Subject: [PATCH 063/168] Dont error on note for html5. Probably not good long term. --- .github/workflows/R-CMD-check-HTML5.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check-HTML5.yaml b/.github/workflows/R-CMD-check-HTML5.yaml index 9080726..e651e70 100644 --- a/.github/workflows/R-CMD-check-HTML5.yaml +++ b/.github/workflows/R-CMD-check-HTML5.yaml @@ -55,4 +55,4 @@ jobs: with: args: '"--as-cran"' build_args: 'character()' - error-on: '"note"' + #error-on: '"note"' From 3d3be6b90714a98a0f330e0b08742ecb1d47b936 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 24 Aug 2022 21:56:27 +0100 Subject: [PATCH 064/168] Edit cran comments. --- cran-comments.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cran-comments.md b/cran-comments.md index f9e4260..1892954 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -9,13 +9,15 @@ This is a package update (version 0.1.4). The changes in this version are: * Better documentation for priors. +* redocument to fix html5 issues. -## Test environments -* local Windows 10, R 3.6.1 -* Ubuntu 16.04.6 LTS (on travis-ci, devel and release) -* win-builder (devel and release) +## Test environments +Windows, R release +Ubuntu 20, R release +Ubuntu 20, r Oldrel +Ubuntu 20, R devel ## R CMD check results @@ -23,8 +25,6 @@ There were no ERRORs or WARNINGs. There were 3 NOTEs: -* checking CRAN incoming feasibility ... NOTE - Maintainer: 'Anita Nandi ' Suggests or Enhances not in mainstream repositories: INLA From 8825a3033c43aa3dbfecdc9a275a98bdceda3532 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 24 Aug 2022 22:54:31 +0100 Subject: [PATCH 065/168] Try to build and upload package . --- .github/workflows/R-CMD-build.yaml | 68 ++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 .github/workflows/R-CMD-build.yaml diff --git a/.github/workflows/R-CMD-build.yaml b/.github/workflows/R-CMD-build.yaml new file mode 100644 index 0000000..4895de4 --- /dev/null +++ b/.github/workflows/R-CMD-build.yaml @@ -0,0 +1,68 @@ +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +on: + push: + branches: + 'build' + +name: R-CMD-build + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + extra-repositories: "https://inla.r-inla-download.org/R/stable" + + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Install system dependencies on MacOS (X11, gdal) + if: runner.os == 'macOS' + run: | + brew install --cask xquartz + brew install pkg-config + brew install proj@8 + brew install gdal + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"all"' + extra-packages: | + rcmdcheck + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + + - uses: r-lib/actions/check-r-package@v2 + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + with: + args: 'c("--no-manual", "--as-cran")' + + - name: Upload built package + uses: actions/upload-artifact@v2 + with: + name: pkg + path: /home/runner/work/ + From 640320461cd50200a679e1f84b7686430e99e7ca Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 29 Aug 2022 16:30:24 +0100 Subject: [PATCH 066/168] Redoc after latex change. --- R/fit_model.R | 4 ++-- man/fit_model.Rd | 2 +- man/make_model_object.Rd | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/fit_model.R b/R/fit_model.R index 1bb993d..8861791 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -26,7 +26,7 @@ #' #' Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field #' are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -#' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}$ and $\sigma_{prob}} for the variation of the field +#' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field #' where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect #' #' The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. @@ -216,7 +216,7 @@ disag_model <- function(data, #' #' Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field #' are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -#' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}$ and $\sigma_{prob}} for the variation of the field +#' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field #' where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect. #' #' The precise names and default values for these priors are: diff --git a/man/fit_model.Rd b/man/fit_model.Rd index a762d3e..2cfcc0f 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -94,7 +94,7 @@ The different likelihood correspond to slightly different models (\eqn{y_j}{yi} Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}$ and $\sigma_{prob}} for the variation of the field +where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. diff --git a/man/make_model_object.Rd b/man/make_model_object.Rd index be87363..f9d98f9 100644 --- a/man/make_model_object.Rd +++ b/man/make_model_object.Rd @@ -61,7 +61,7 @@ The different likelihood correspond to slightly different models (\eqn{y_j}{yi} Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}$ and $\sigma_{prob}} for the variation of the field +where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect. The precise names and default values for these priors are: From de824268f37f700f2c483e683805131fc18a6f48 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 29 Aug 2022 16:37:43 +0100 Subject: [PATCH 067/168] Spell check. --- R/build_mesh.R | 2 +- R/fit_model.R | 6 +++--- R/matching.R | 2 +- R/plotting.R | 8 ++++---- man/build_mesh.Rd | 2 +- man/fit_model.Rd | 4 ++-- man/getStartendindex.Rd | 2 +- man/make_model_object.Rd | 2 +- man/plot.disag_data.Rd | 4 ++-- man/plot.disag_model.Rd | 2 +- man/plot.disag_prediction.Rd | 2 +- vignettes/disaggregation.Rmd | 8 ++++---- 12 files changed, 22 insertions(+), 22 deletions(-) diff --git a/R/build_mesh.R b/R/build_mesh.R index 671b39e..6857417 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -8,7 +8,7 @@ #' #' Six mesh parameters can be specified as arguments: \emph{convex}, \emph{concave} and \emph{resolution}, #' to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, -#' with the names meaing the same as used by INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. +#' with the names meaning the same as used by INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. #' #' Defaults are: #' pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, 8), cut = 0.4, offset = c(1, 15)). diff --git a/R/fit_model.R b/R/fit_model.R index 8861791..0f7159d 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -11,7 +11,7 @@ #' \deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} #' \deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} #' -#' The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the repsonse count data): +#' The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): #' \itemize{ #' \item Gaussian: #' If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where @@ -38,7 +38,7 @@ #' #' The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. #' -#' The \emph{silent} argument can be used to publish/supress verbose output. Default TRUE. +#' The \emph{silent} argument can be used to publish/suppress verbose output. Default TRUE. #' #' #' @param data disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting @@ -201,7 +201,7 @@ disag_model <- function(data, #' \deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} #' \deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} #' -#' The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the repsonse count data): +#' The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): #' \itemize{ #' \item Gaussian: #' If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where diff --git a/R/matching.R b/R/matching.R index f98c967..f2b7825 100644 --- a/R/matching.R +++ b/R/matching.R @@ -1,6 +1,6 @@ #' Function to match pixels to their corresponding polygon #' -#' From the covaraite data and polygon data, the function matches the polygon id between the two to find +#' From the covariate data and polygon data, the function matches the polygon id between the two to find #' which pixels from the covariate data are contained in each of the polygons. #' #' Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, diff --git a/R/plotting.R b/R/plotting.R index d2f3c40..de26ae7 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -1,11 +1,11 @@ #' Plot input data for disaggregation #' -#' Plotting function for class \emph{disag_data} (the input data for disaggragation). +#' Plotting function for class \emph{disag_data} (the input data for disaggregation). #' #' Produces three plots: polygon response data, covariate rasters and INLA mesh. #' #' @param x Object of class \emph{disag_data} to be plotted. -#' @param which If a subset of plots is requied, specify a subset of the numbers 1:3 +#' @param which If a subset of plots is required, specify a subset of the numbers 1:3 #' @param ... Further arguments to \emph{plot} function. #' #' @return A list of three plots: the polygon plot (ggplot), covariate plot (spplot) and INLA mesh plot (ggplot) @@ -44,7 +44,7 @@ plot.disag_data <- function(x, which = c(1,2,3), ...) { #' Plot results of fitted model #' -#' Plotting function for class \emph{disag_model} (the result of the disaggragation fitting). +#' Plotting function for class \emph{disag_model} (the result of the disaggregation fitting). #' #' Produces two plots: results of the fixed effects and in-sample observed vs predicted plot. #' @@ -103,7 +103,7 @@ plot.disag_model <- function(x, ...){ #' Plot mean and uncertainty predictions from the disaggregation model results #' -#' Plotting function for class \emph{disag_prediction} (the mean and uncertainty predictions of the disaggragation fitting). +#' Plotting function for class \emph{disag_prediction} (the mean and uncertainty predictions of the disaggregation fitting). #' #' Produces raster plots of the mean prediction, and the lower and upper confidence intervals. #' diff --git a/man/build_mesh.Rd b/man/build_mesh.Rd index ef6f2b7..ea92181 100644 --- a/man/build_mesh.Rd +++ b/man/build_mesh.Rd @@ -26,7 +26,7 @@ and having a small region outside with a coarser mesh to avoid edge effects. Six mesh parameters can be specified as arguments: \emph{convex}, \emph{concave} and \emph{resolution}, to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, -with the names meaing the same as used by INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. +with the names meaning the same as used by INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. Defaults are: pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, 8), cut = 0.4, offset = c(1, 15)). diff --git a/man/fit_model.Rd b/man/fit_model.Rd index 2cfcc0f..0efffc2 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -79,7 +79,7 @@ And then aggregates these predictions to the polygon level using the weighted su \deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} \deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} -The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the repsonse count data): +The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): \itemize{ \item Gaussian: If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where @@ -106,7 +106,7 @@ The field and iid effect can be turned on or off via the \emph{field} and \emph{ The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. -The \emph{silent} argument can be used to publish/supress verbose output. Default TRUE. +The \emph{silent} argument can be used to publish/suppress verbose output. Default TRUE. } \examples{ \dontrun{ diff --git a/man/getStartendindex.Rd b/man/getStartendindex.Rd index c5a3464..4d35f90 100644 --- a/man/getStartendindex.Rd +++ b/man/getStartendindex.Rd @@ -19,7 +19,7 @@ covariate data that corresponds to that polygon, the second column is the index covariate data that corresponds to that polygon. } \description{ -From the covaraite data and polygon data, the function matches the polygon id between the two to find +From the covariate data and polygon data, the function matches the polygon id between the two to find which pixels from the covariate data are contained in each of the polygons. } \details{ diff --git a/man/make_model_object.Rd b/man/make_model_object.Rd index f9d98f9..8348828 100644 --- a/man/make_model_object.Rd +++ b/man/make_model_object.Rd @@ -46,7 +46,7 @@ And then aggregates these predictions to the polygon level using the weighted su \deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} \deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} -The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the repsonse count data): +The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): \itemize{ \item Gaussian: If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where diff --git a/man/plot.disag_data.Rd b/man/plot.disag_data.Rd index 998a900..bc4f774 100644 --- a/man/plot.disag_data.Rd +++ b/man/plot.disag_data.Rd @@ -9,7 +9,7 @@ \arguments{ \item{x}{Object of class \emph{disag_data} to be plotted.} -\item{which}{If a subset of plots is requied, specify a subset of the numbers 1:3} +\item{which}{If a subset of plots is required, specify a subset of the numbers 1:3} \item{...}{Further arguments to \emph{plot} function.} } @@ -17,7 +17,7 @@ A list of three plots: the polygon plot (ggplot), covariate plot (spplot) and INLA mesh plot (ggplot) } \description{ -Plotting function for class \emph{disag_data} (the input data for disaggragation). +Plotting function for class \emph{disag_data} (the input data for disaggregation). } \details{ Produces three plots: polygon response data, covariate rasters and INLA mesh. diff --git a/man/plot.disag_model.Rd b/man/plot.disag_model.Rd index 35a19b3..6fbcab3 100644 --- a/man/plot.disag_model.Rd +++ b/man/plot.disag_model.Rd @@ -15,7 +15,7 @@ A list of two ggplot plots: results of the fixed effects and an in-sample observed vs predicted plot } \description{ -Plotting function for class \emph{disag_model} (the result of the disaggragation fitting). +Plotting function for class \emph{disag_model} (the result of the disaggregation fitting). } \details{ Produces two plots: results of the fixed effects and in-sample observed vs predicted plot. diff --git a/man/plot.disag_prediction.Rd b/man/plot.disag_prediction.Rd index 9990eb9..5942bf3 100644 --- a/man/plot.disag_prediction.Rd +++ b/man/plot.disag_prediction.Rd @@ -15,7 +15,7 @@ A list of plots of rasters from the prediction: mean prediction, lower CI and upper CI. } \description{ -Plotting function for class \emph{disag_prediction} (the mean and uncertainty predictions of the disaggragation fitting). +Plotting function for class \emph{disag_prediction} (the mean and uncertainty predictions of the disaggregation fitting). } \details{ Produces raster plots of the mean prediction, and the lower and upper confidence intervals. diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index b63d560..1f9c09f 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( isINLA <- requireNamespace('INLA', quietly = TRUE) ``` -The **disaggregation** package contains functions to run Bayesian disaggregation models. Aggregated response data over large heterongenous regions can be used alongside fine-scale covariate information to predict fine-scale response across the region. The github page for this package can be found [here](https://github.com/aknandi/disaggregation). +The **disaggregation** package contains functions to run Bayesian disaggregation models. Aggregated response data over large heterogenous regions can be used alongside fine-scale covariate information to predict fine-scale response across the region. The github page for this package can be found [here](https://github.com/aknandi/disaggregation). Install **disggregation** using: @@ -33,7 +33,7 @@ devtools::install_github("aknandi/disaggregation") The key functions are `prepare_data`, `fit_model` and `predict`. The `prepare_data` function takes the aggregated data and covariate data to be used in the model and produces an object to be use by `fit_model`. This functions runs the disaggregation model and the out can be passed to `predict` to produce fine-scale predicted maps of the response variable. -To use the disaggregation `prepare_data` fuction, you must have the aggregated data as a `SpatialPolygonDataFrame` object and a `RasterStack` of the covariate data to be used in the model. +To use the disaggregation `prepare_data` function, you must have the aggregated data as a `SpatialPolygonDataFrame` object and a `RasterStack` of the covariate data to be used in the model. ## Example @@ -117,7 +117,7 @@ Now have our data object we are ready to run the model. Here we can specify the $rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}$ - The different likelihood correspond to slightly different models ($y_j$ is the repsonse count data): + The different likelihood correspond to slightly different models ($y_j$ is the response count data): **Gaussian** ($\sigma_j$ is the dispersion of the polygon data), @@ -160,7 +160,7 @@ model_result <- disag_model(data_for_model, plot(model_result) ``` -Now we have the results from the model of the fitted parameters, we can predict Leukemia incidence rate at fine-scale (the scale of the covariate data) across New York. The `predict` function takes the model result and predicts both the mean raster surface and predicts and summarises `N` parameter draws, where `N` is set by the user (default 100). The uncertainty is summarirised via the confidence interval set by the user (default 95% CI). +Now we have the results from the model of the fitted parameters, we can predict Leukemia incidence rate at fine-scale (the scale of the covariate data) across New York. The `predict` function takes the model result and predicts both the mean raster surface and predicts and summarises `N` parameter draws, where `N` is set by the user (default 100). The uncertainty is summarised via the confidence interval set by the user (default 95% CI). ```{r, fig.show='hold', eval=isINLA} From d04d7230950e413203622f97f2968d97a6ece964 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 21 Apr 2023 11:14:28 +0100 Subject: [PATCH 068/168] Update CITATION to point to JSS paper. --- inst/CITATION | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/inst/CITATION b/inst/CITATION index 0650da5..ee20286 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,16 +1,16 @@ -citHeader("To cite the disaggregation package in publications, please use:") - -citEntry(entry = "Article", - author = as.person(c( - "Anita K Nandi [aut, cre]", - "Tim C D Lucas [aut]", - "Rohan Arambepola [aut]", - "Peter Gething", - "Dan Weiss" - )), - title = "disaggregation: An R Package for Bayesian Spatial Disaggregation Modelling", - journal = "arxiv", - year = "2020", - url = "https://arxiv.org/abs/2001.04847", - textVersion = "Nandi, A. K., Lucas, T. C., Arambepola, R., Gething, P., & Weiss, D. J. (2020). disaggregation: An R Package for Bayesian Spatial Disaggregation Modelling. arXiv preprint arXiv:2001.04847." +bibentry(bibtype = "Article", + title = "{disaggregation}: An {R} Package for {B}ayesian Spatial Disaggregation Modeling", + author = c(person(given = c("Anita", "K."), family = "Nandi"), + person(given = c("Tim", "C.", "D."), family = "Lucas"), + person(given = "Rohan", family = "Arambepola"), + person(given = "Peter", family = "Gething"), + person(given = c("Daniel", "J."), family = "Weiss")), + journal = "Journal of Statistical Software", + year = "2023", + volume = "106", + number = "11", + pages = "1--19", + doi = "10.18637/jss.v106.i11", + header = "To cite disaggregation in publications use:" ) + From 634678326594bb48d8ab83db9b0ee412ce9d8d99 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 21 Apr 2023 11:15:35 +0100 Subject: [PATCH 069/168] minor vignette change. --- vignettes/disaggregation.Rmd | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index b63d560..2898e08 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -25,7 +25,13 @@ isINLA <- requireNamespace('INLA', quietly = TRUE) The **disaggregation** package contains functions to run Bayesian disaggregation models. Aggregated response data over large heterongenous regions can be used alongside fine-scale covariate information to predict fine-scale response across the region. The github page for this package can be found [here](https://github.com/aknandi/disaggregation). -Install **disggregation** using: +Install **disaggregation** using: + +```r +install.packages('disaggregation') +``` + +or from github using ```r devtools::install_github("aknandi/disaggregation") From 653f7f540d05dabf9d54f21b0c77c58c58718338 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 27 Apr 2023 11:42:12 +0100 Subject: [PATCH 070/168] Update JSS refs. --- DESCRIPTION | 4 ++-- R/fit_model.R | 7 +++++-- man/fit_model.Rd | 9 +++++++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cb7f293..1f5e78c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,8 +11,8 @@ Authors@R: c( Description: Fits disaggregation regression models using 'TMB' ('Template Model Builder'). When the response data are aggregated to polygon level but the predictor variables are at a higher resolution, these models can be - useful. Regression models with spatial random fields. A useful reference for disaggregation modelling is - Lucas et al. (2019) . + useful. Regression models with spatial random fields. The package is + described in detail in Nandi et al. (2023) . License: MIT + file LICENSE Encoding: UTF-8 LazyData: true diff --git a/R/fit_model.R b/R/fit_model.R index 0f7159d..58cb7ae 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -1,10 +1,11 @@ #' Fit the disaggregation model #' -#' \emph{fit_model} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} and performs a Bayesian disaggregation fit. +#' \emph{fit_model} function takes a \emph{disag_data} object created by +#' \code{\link{prepare_data}} and performs a Bayesian disaggregation fit. #' #' \strong{The model definition} #' -#' The disaggregation model make predictions at the pixel level: +#' The disaggregation model makes predictions at the pixel level: #' \deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} #' #' And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): @@ -66,6 +67,8 @@ #' \item{model_setup }{A list of information on the model setup. Likelihood function (\emph{family}), link function(\emph{link}), logical: whether a field was used (\emph{field}) and logical: whether an iid effect was used (\emph{iid}).} #' #' @name fit_model +#' @references Nanda et al. (2023) disaggregation: An R Package for Bayesian +#' Spatial Disaggregation Modeling. #' #' @examples #' \dontrun{ diff --git a/man/fit_model.Rd b/man/fit_model.Rd index 0efffc2..1b4500c 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -67,12 +67,13 @@ The list of class \code{disag_model} contains: \item{model_setup }{A list of information on the model setup. Likelihood function (\emph{family}), link function(\emph{link}), logical: whether a field was used (\emph{field}) and logical: whether an iid effect was used (\emph{iid}).} } \description{ -\emph{fit_model} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} and performs a Bayesian disaggregation fit. +\emph{fit_model} function takes a \emph{disag_data} object created by +\code{\link{prepare_data}} and performs a Bayesian disaggregation fit. } \details{ \strong{The model definition} -The disaggregation model make predictions at the pixel level: +The disaggregation model makes predictions at the pixel level: \deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): @@ -141,3 +142,7 @@ The \emph{silent} argument can be used to publish/suppress verbose output. Defau } } +\references{ +Nanda et al. (2023) disaggregation: An R Package for Bayesian +Spatial Disaggregation Modeling. +} From fb1d0df683e98d7600bb112eba65c0e20ef5673e Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 27 Apr 2023 11:43:09 +0100 Subject: [PATCH 071/168] Ignore a couple of temp files. --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index d8a4b5a..260b964 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ inst/doc *.so vignettes/disaggregation_cache/* vignettes/disaggregation_files/* +.github/workflows/R-CMD-check-HTML5.archyaml +vignettes/spatio_temporal_disaggregation.Rmd \ No newline at end of file From 252b4bc3a8d6eb50461c454f3ab3225783e0663d Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 27 Apr 2023 11:43:29 +0100 Subject: [PATCH 072/168] Ignore a couple of temp files. --- .Rbuildignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.Rbuildignore b/.Rbuildignore index d96ffe0..4808471 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,3 +6,5 @@ README.md vignettes/disaggregation_cache/* cran-comments.md ^\.github$ +.github/workflows/R-CMD-check-HTML5.archyaml +vignettes/spatio_temporal_disaggregation.Rmd \ No newline at end of file From b055698f1da2f2a326fa461b4c2679fb6fa569b7 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 27 Apr 2023 11:44:44 +0100 Subject: [PATCH 073/168] Add an ignore file for github, not exactly what its for. --- .github/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/.gitignore diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html From 43ed1728b8a893f04c65f2dbec8089faa060df6f Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 28 Apr 2023 18:03:39 +0100 Subject: [PATCH 074/168] Bump to version 0.2" --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1f5e78c..c61db06 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: disaggregation Type: Package Title: Disaggregation Modelling -Version: 0.1.4 +Version: 0.2.0 Authors@R: c( person("Anita", "Nandi", email = "anita.k.nandi@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-5087-2494")), person("Tim", "Lucas", email = "timcdlucas@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4694-8107")), From 25219e085677938da3df219da4b5f3658ded20e0 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 13 Jul 2023 20:38:26 +0100 Subject: [PATCH 075/168] Remove rgeos from fit model. --- R/build_mesh.R | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/R/build_mesh.R b/R/build_mesh.R index 6857417..151bbdb 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -39,6 +39,8 @@ #' my_mesh <- build_mesh(spdf) #' } #' +#' +#' #' @export build_mesh <- function(shapes, mesh.args = NULL) { @@ -61,13 +63,10 @@ build_mesh <- function(shapes, mesh.args = NULL) { pars[names(mesh.args)] <- mesh.args - outline <- maptools::unionSpatialPolygons(shapes, IDs = rep(1, length(shapes))) - - coords <- list() - for(i in seq_len(length(outline@polygons[[1]]@Polygons))){ - coords[[i]] <- outline@polygons[[1]]@Polygons[[i]]@coords - } - coords <- do.call(rbind, coords) + outline <- sf::st_union(sf::st_as_sf(shapes)) + coords <- sf::st_coordinates(outline) + + outline.hull <- INLA::inla.nonconvex.hull(coords, convex = pars$convex, From 8431308d2e5b252f8783235b3f1599f3662cbc35 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 13 Jul 2023 20:42:46 +0100 Subject: [PATCH 076/168] Add sf. not sure exactly when I removed rgeos etc. --- DESCRIPTION | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c61db06..b36c7eb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,15 +18,13 @@ Encoding: UTF-8 LazyData: true RoxygenNote: 7.2.1 Imports: - maptools, raster, foreach, sp, + sf, parallel, doParallel, - rgeos, splancs, - rgdal, Matrix, stats, TMB, From 12ae1b616ea489370b528e907907657f6d361366 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 13 Jul 2023 20:43:10 +0100 Subject: [PATCH 077/168] Remove gBugger (from rgeos) from vignette) --- vignettes/disaggregation.Rmd | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index f411d5f..c18253b 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -51,6 +51,7 @@ library(dplyr, quietly = TRUE) library(sp, quietly = TRUE) library(raster, quietly = TRUE) library(disaggregation, quietly = TRUE) +library(sf) map <- NYleukemia$spatial.polygon df <- NYleukemia$data @@ -85,10 +86,14 @@ pop_raster <- rasterize(polygon_data, cov_stack, field = 'pop_per_cell') ``` -To correct small inconsistencies in the polygon geometry, we run the line below +To correct small inconsistencies in the polygon geometry, we run the code below. +We are a bit in between packages at the moment. +Most of the package is built on sp. But as rgeos has been depreciated we have to +switch the polygons to simple features and back again. ```{r, fig.show='hold'} -polygon_data <- rgeos::gBuffer(polygon_data, byid = TRUE, width = 0) +polygon_data <- sf:::as_Spatial(st_buffer(st_as_sf(polygon_data), dist = 0)) + ``` Now we have setup the data we can use the `prepare_data` function to create the objects needed to run the disaggregation model. The name of the response variable and id variable in the `SpatialPolygonsDataFrame` should be specified. From 3bbee8b725e322a390d80a5892eb8c44ff492888 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 13 Jul 2023 20:51:04 +0100 Subject: [PATCH 078/168] Fit model once instead of twice. Closes #74 Still gives warning if no convergance. Assuming users will increase its as prompted. --- R/fit_model.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/fit_model.R b/R/fit_model.R index 58cb7ae..67be23c 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -163,6 +163,8 @@ disag_model <- function(data, message('Fitting model. This may be slow.') opt <- stats::nlminb(obj$par, obj$fn, obj$gr, control = list(iter.max = iterations, trace = 0)) + if(opt$convergence != 0) warning('The model did not converge. Try increasing the number of iterations') + # Get hess control parameters into a list. hess_control <- setup_hess_control(opt, hess_control_parscale, hess_control_ndeps) @@ -172,12 +174,9 @@ disag_model <- function(data, # Calc uncertainty using the fixed hessian from above. sd_out <- TMB::sdreport(obj, getJointPrecision = TRUE, hessian.fixed = hess) - message('Fitting model. This may be slow.') - opt <- stats::nlminb(obj$par, obj$fn, obj$gr, control = list(iter.max = iterations, trace = 0)) sd_out <- TMB::sdreport(obj, getJointPrecision = TRUE) - if(opt$convergence != 0) warning('The model did not converge. Try increasing the number of iterations') model_output <- list(obj = obj, opt = opt, From e8cb0877129446de8a9baef59ea35eb7628ce3e5 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 13 Jul 2023 20:54:35 +0100 Subject: [PATCH 079/168] Add paper to readme. Closes #72 --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 96c535d..2497a62 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,8 @@ Disaggregation [![R-CMD-check](https://github.com/aknandi/disaggregation/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/aknandi/disaggregation/actions/workflows/R-CMD-check.yaml) -A package containing useful functions for disaggregation modelling +A package containing useful functions for disaggregation modelling. +An overview of the package is given in [our paper](https://www.jstatsoft.org/article/view/v106i11). Installation ------------ From 65232f82ec653119795b3a0bdd43baacf7ecf8ae Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 13 Jul 2023 21:25:39 +0100 Subject: [PATCH 080/168] Add layer names to pars for plotting. #69 --- R/fit_model.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/fit_model.R b/R/fit_model.R index 67be23c..c150bae 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -177,6 +177,8 @@ disag_model <- function(data, sd_out <- TMB::sdreport(obj, getJointPrecision = TRUE) + # Rename parameters to match layers + names(opt$par)[names(opt$par) == 'slope'] <- names(data$covariate_rasters) model_output <- list(obj = obj, opt = opt, From 8892132348fb791d57921d08ad1bbd1fc9849267 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 14 Jul 2023 10:41:25 +0100 Subject: [PATCH 081/168] Just leaving a note to remind myself what to fix. --- R/fit_model.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/fit_model.R b/R/fit_model.R index c150bae..f83c4ab 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -178,6 +178,7 @@ disag_model <- function(data, sd_out <- TMB::sdreport(obj, getJointPrecision = TRUE) # Rename parameters to match layers + # Need to change in sd_out as well names(opt$par)[names(opt$par) == 'slope'] <- names(data$covariate_rasters) model_output <- list(obj = obj, From 92bf0cd612aa7539da97e097f2aee09f19131166 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 14 Jul 2023 10:42:28 +0100 Subject: [PATCH 082/168] Trigger build. From b95f47443f19e4ef02d9e7c255625886ba472ec8 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 14 Jul 2023 11:19:59 +0100 Subject: [PATCH 083/168] Copy actions build from inlabru to fix windows build? --- .github/workflows/R-CMD-build.yaml | 37 ++++++++++++++++++------------ 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/.github/workflows/R-CMD-build.yaml b/.github/workflows/R-CMD-build.yaml index 4895de4..01e9167 100644 --- a/.github/workflows/R-CMD-build.yaml +++ b/.github/workflows/R-CMD-build.yaml @@ -4,8 +4,11 @@ on: push: branches: 'build' + pull_request: + branches: + - master -name: R-CMD-build +name: R-CMD-check jobs: R-CMD-check: @@ -17,30 +20,41 @@ jobs: fail-fast: false matrix: config: - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: macOS-12, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} + R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} - extra-repositories: "https://inla.r-inla-download.org/R/stable" - - - uses: r-lib/actions/setup-pandoc@v2 + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + extra-repositories: "https://inla.r-inla-download.org/R/testing" - name: Install system dependencies on MacOS (X11, gdal) if: runner.os == 'macOS' run: | brew install --cask xquartz brew install pkg-config - brew install proj@8 + brew install proj@9 brew install gdal + - name: Install system dependencies on Linux (GL) + if: runner.os == 'Linux' + run: | + sudo apt-get update -y && sudo apt-get install -y libglu1-mesa-dev + - uses: r-lib/actions/setup-r-dependencies@v2 with: dependencies: '"all"' @@ -59,10 +73,3 @@ jobs: _R_CHECK_CRAN_INCOMING_REMOTE_: false with: args: 'c("--no-manual", "--as-cran")' - - - name: Upload built package - uses: actions/upload-artifact@v2 - with: - name: pkg - path: /home/runner/work/ - From d51b0789471ca48875fb76e123c3732ddcfe61bb Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 14 Jul 2023 15:37:29 +0100 Subject: [PATCH 084/168] Undo previous name change as it breaks prediction. --- R/fit_model.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/fit_model.R b/R/fit_model.R index f83c4ab..f3fdcad 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -179,7 +179,7 @@ disag_model <- function(data, # Rename parameters to match layers # Need to change in sd_out as well - names(opt$par)[names(opt$par) == 'slope'] <- names(data$covariate_rasters) + # names(opt$par)[names(opt$par) == 'slope'] <- names(data$covariate_rasters) model_output <- list(obj = obj, opt = opt, From 5d49279f070016c78b78a791deacee3d326c4a34 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Tue, 18 Jul 2023 17:09:18 +0100 Subject: [PATCH 085/168] Increase its on test to avoid stupid Matrix error. --- tests/testthat/test-predict-model.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index 5da19a9..f855799 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -39,7 +39,7 @@ test_that("Check predict.disag_model function works as expected", { skip_if_not_installed('INLA') skip_on_cran() - result <- disag_model(test_data, iterations = 2) + result <- disag_model(test_data, iterations = 100) pred2 <- predict(result) @@ -87,7 +87,7 @@ test_that("Check predict.disag_model function works as expected", { # For a model with no field or iid - result <- disag_model(test_data, iterations = 2, field = FALSE, iid = FALSE) + result <- disag_model(test_data, iterations = 100, field = FALSE, iid = FALSE) pred2 <- predict(result) From 953bc37702bdd6cbab0b3025ca555ec756a6d9fb Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Tue, 18 Jul 2023 18:31:22 +0100 Subject: [PATCH 086/168] More extra its to avoid errors in tests --- tests/testthat/test-predict-model.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index f855799..735d607 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -119,7 +119,7 @@ test_that("Check predict.disag_model function works with newdata", { skip_if_not_installed('INLA') skip_on_cran() - result <- disag_model(test_data, field = FALSE, iid = TRUE, iterations = 2) + result <- disag_model(test_data, field = FALSE, iid = TRUE, iterations = 100) newdata <- raster::crop(raster::stack(r, r2), c(0, 10, 0, 10)) pred1 <- predict(result) @@ -155,7 +155,7 @@ test_that('Check that check_newdata works', { skip_if_not_installed('INLA') skip_on_cran() - result <- disag_model(test_data, field = FALSE, iterations = 2) + result <- disag_model(test_data, field = FALSE, iterations = 100) newdata <- raster::crop(raster::stack(r, r2), c(0, 10, 0, 10)) nd1 <- check_newdata(newdata, result) @@ -180,7 +180,7 @@ test_that('Check that setup_objects works', { skip_if_not_installed('INLA') skip_on_cran() - result <- disag_model(test_data, iterations = 2) + result <- disag_model(test_data, iterations = 100) objects <- setup_objects(result) @@ -214,7 +214,7 @@ test_that('Check that predict_single_raster works', { skip_if_not_installed('INLA') skip_on_cran() - result <- disag_model(test_data, iterations = 2) + result <- disag_model(test_data, iterations = 100) objects <- setup_objects(result) From 06bd041ca4595d9c920dba30ba93360fb84176a5 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 19 Jul 2023 14:19:10 +0100 Subject: [PATCH 087/168] Remove gdal and rgeos from namespace via document() --- NAMESPACE | 2 -- 1 file changed, 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 7fc5bfc..5669eb7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,8 +23,6 @@ export(predict_model) export(predict_uncertainty) export(prepare_data) import(ggplot2) -import(rgdal) -import(rgeos) import(splancs) import(utils) importFrom(doParallel,registerDoParallel) From 1de593ed28c5df737cb6225b13c135eaca051cc5 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 19 Jul 2023 14:19:29 +0100 Subject: [PATCH 088/168] roxygen version. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index b36c7eb..e7f4a6f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,7 @@ Description: Fits disaggregation regression models using 'TMB' ('Template Model License: MIT + file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 Imports: raster, foreach, From 093d446ee57138b6b142c5a3a38b2f5b88442dc9 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 19 Jul 2023 14:19:59 +0100 Subject: [PATCH 089/168] Some comments about bits that didnt work. --- R/predict.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/predict.R b/R/predict.R index 47a04b0..1b78aad 100644 --- a/R/predict.R +++ b/R/predict.R @@ -157,6 +157,12 @@ predict_uncertainty <- function(model_output, newdata = NULL, predict_iid = FALS # If we have either of the random effects, we have the jointPrecision matrix. # but if we have neither, we don't get that matrix and should use the # covariance matrix instead + + #CH <- Matrix::Cholesky(as(S, 'dsCMatrix')) + #x <- rmvn.sparse(10, mu, CH, prec=FALSE) ## 10 random draws of x + #d <- dmvn.sparse(x, mu, CH, prec=FALSE) ## densities of the 10 draws + + if(model_output$model_setup$iid | model_output$model_setup$field){ ch <- Matrix::Cholesky(model_output$sd_out$jointPrecision) par_draws <- sparseMVN::rmvn.sparse(N, parameters, ch, prec = TRUE) From 4e304401340c581cbe657f53da495f1bd8bee7de Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 19 Jul 2023 14:20:20 +0100 Subject: [PATCH 090/168] Remove @import rgdal and rgeos --- R/prepare_data.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index 2443e9c..85af1bf 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -49,9 +49,7 @@ #' \item{coordsForPrediction }{A matrix with two columns of x, y coordinates of pixels in the whole Raster. Used to make predictions.} #' \item{startendindex }{A matrix with two columns containing the start and end index of the pixels within each polygon.} #' \item{mesh }{A INLA mesh to be used for the spatial field of the disaggregation model.} -#' @import rgdal #' @import splancs -#' @import rgeos #' @import utils #' @name prepare_data #' From c9c73c1c1744699c59c636c9f2fdfca7cf47f039 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 19 Jul 2023 14:20:28 +0100 Subject: [PATCH 091/168] Rebuild man pages. --- man/build_mesh.Rd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/man/build_mesh.Rd b/man/build_mesh.Rd index ea92181..f30a92f 100644 --- a/man/build_mesh.Rd +++ b/man/build_mesh.Rd @@ -48,4 +48,6 @@ pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, my_mesh <- build_mesh(spdf) } + + } From 812b73026af25f372b7c49cdcb6949f7d74995df Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 19 Jul 2023 20:28:38 +0100 Subject: [PATCH 092/168] Rewrite polygon plotting without fortify --- R/plotting.R | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index de26ae7..7497cce 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -142,22 +142,19 @@ plot.disag_prediction <- function(x, ...) { plot_polygon_data <- function(x, names) { # Rename the response variable for plotting - shp <- x - shp@data <- dplyr::rename(shp@data, 'response' = names$response_var) - shp@data <- dplyr::rename(shp@data, 'area_id' = names$id_var) + shp <- sf::st_as_sf(x) + shp <- dplyr::rename(shp, 'response' = names$response_var) + shp <- dplyr::rename(shp, 'area_id' = names$id_var) area_id <- long <- lat <- group <- response <- NULL - stopifnot(inherits(shp, 'SpatialPolygonsDataFrame')) + stopifnot(inherits(shp, 'sf')) - df_fortify <- fortify(shp, region = 'area_id') - - df <- shp@data - df <- dplyr::mutate(df, area_id = as.character(area_id)) - df <- dplyr::left_join(df_fortify, df, by = c('id' = 'area_id')) - - p <- ggplot(df, aes(long, lat, group = group, fill = response)) + - geom_polygon() + - coord_equal() + + + shp <- dplyr::mutate(shp, area_id = as.character(area_id)) + + p <- ggplot(shp, aes(fill = response)) + + geom_sf() + + #coord_equal() + scale_fill_viridis_c(trans = 'identity') return(invisible(p)) From 9399e43ef14c0ce707e384d0ea005fda429c1a1d Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 19 Jul 2023 20:28:52 +0100 Subject: [PATCH 093/168] Minor edits to vignette text. --- vignettes/disaggregation.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index c18253b..95511bc 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -87,7 +87,7 @@ pop_raster <- rasterize(polygon_data, cov_stack, field = 'pop_per_cell') ``` To correct small inconsistencies in the polygon geometry, we run the code below. -We are a bit in between packages at the moment. +We are a bit inbetween frameworks at the moment. Most of the package is built on sp. But as rgeos has been depreciated we have to switch the polygons to simple features and back again. From f9f23799129fbf1e68a501cc0dba0b4378082efc Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 19 Jul 2023 20:29:46 +0100 Subject: [PATCH 094/168] More minor edits to vignette text. --- vignettes/disaggregation.Rmd | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 95511bc..534e048 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -118,11 +118,14 @@ data_for_model <- prepare_data(polygon_data, plot(data_for_model) ``` -Now have our data object we are ready to run the model. Here we can specify the likelihood function as gaussian, binomial or poisson, and we can specify the link function as logit, log or identity. The disaggregation model makes predictions at the pixel level: +Now have our data object we are ready to run the model. Here we can specify +the likelihood function as Gaussian, binomial or poisson, and we can specify +the link function as logit, log or identity. The disaggregation model makes +predictions at the pixel level: $link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i$ - where $X$ are the covariates, $GP$ is the gaussian random field and $u_i$ is the iid random effect. The pixel predictions are then aggregated to the polygon level using the weighted sum (via the aggregation raster, $agg_i$): +where $X$ are the covariates, $GP$ is the gaussian random field and $u_i$ is the iid random effect. The pixel predictions are then aggregated to the polygon level using the weighted sum (via the aggregation raster, $agg_i$): $cases_j = \sum_{i \epsilon j} pred_i \times agg_i$ From 84657ca054652a23285e795394f88b5883b56287 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 20 Jul 2023 10:19:49 +0100 Subject: [PATCH 095/168] Also bump its in models that then predict in plotting tests. --- tests/testthat/test-plotting.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R index 7d82a0d..4d8b8c4 100644 --- a/tests/testthat/test-plotting.R +++ b/tests/testthat/test-plotting.R @@ -83,9 +83,9 @@ test_that("Check plot.disag_model function works as expected", { skip_if_not_installed('INLA') skip_on_cran() - fit_result <- disag_model(test_data, iterations = 2) + fit_result <- disag_model(test_data, iterations = 10) - fit_result_nofield <- disag_model(test_data, iterations = 2, field = FALSE) + fit_result_nofield <- disag_model(test_data, iterations = 10, field = FALSE) p1 <- plot(fit_result) @@ -105,7 +105,7 @@ test_that("Check plot.disag_prediction function works as expected", { skip_if_not_installed('INLA') skip_on_cran() - fit_result <- disag_model(test_data, iterations = 2) + fit_result <- disag_model(test_data, iterations = 100) pred <- predict(fit_result) p <- plot(pred) From 6db67ab7dedbf7dc9c05597b45fc2021064b922c Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 20 Jul 2023 10:21:24 +0100 Subject: [PATCH 096/168] Remove windows test because INLA isnt installing and I cant be bothered dealing with it right now. --- .github/workflows/R-CMD-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 9deddea..41332d5 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: config: - - {os: windows-latest, r: 'release'} + # - {os: windows-latest, r: 'release'} # - {os: macOS-latest, r: 'release'} - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - {os: ubuntu-20.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} From c5d958fc2f040cd91660c5f44a595f9268faaf97 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 20 Jul 2023 11:40:56 +0100 Subject: [PATCH 097/168] Try priors to make small data models converge in tests. --- tests/testthat/test-predict-model.R | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index 735d607..6c8f956 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -39,7 +39,17 @@ test_that("Check predict.disag_model function works as expected", { skip_if_not_installed('INLA') skip_on_cran() - result <- disag_model(test_data, iterations = 100) + result <- disag_model(test_data, iterations = 100, + priors = list(priormean_intercept = 0, + priorsd_intercept = 1, + priormean_slope = 0.0, + priorsd_slope = 0.4, + prior_rho_min = 3, + prior_rho_prob = 0.01, + prior_sigma_max = 1, + prior_sigma_prob = 0.01, + prior_iideffect_sd_max = 0.01, + prior_iideffect_sd_prob = 0.01)) pred2 <- predict(result) From b49e63bb7ca0a17d4e308708e6afdee60b53318a Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 20 Jul 2023 13:33:09 +0100 Subject: [PATCH 098/168] More messing around trying to get models in tests to converge. --- tests/testthat/test-predict-model.R | 48 ++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index 6c8f956..1a453dc 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -40,13 +40,15 @@ test_that("Check predict.disag_model function works as expected", { skip_on_cran() result <- disag_model(test_data, iterations = 100, + iid = TRUE, + field = TRUE, priors = list(priormean_intercept = 0, priorsd_intercept = 1, priormean_slope = 0.0, priorsd_slope = 0.4, - prior_rho_min = 3, + prior_rho_min = 1, prior_rho_prob = 0.01, - prior_sigma_max = 1, + prior_sigma_max = 0.1, prior_sigma_prob = 0.01, prior_iideffect_sd_max = 0.01, prior_iideffect_sd_prob = 0.01)) @@ -129,7 +131,19 @@ test_that("Check predict.disag_model function works with newdata", { skip_if_not_installed('INLA') skip_on_cran() - result <- disag_model(test_data, field = FALSE, iid = TRUE, iterations = 100) + result <- disag_model(test_data, field = FALSE, iid = TRUE, iterations = 100, + iid = TRUE, + field = TRUE, + priors = list(priormean_intercept = 0, + priorsd_intercept = 1, + priormean_slope = 0.0, + priorsd_slope = 0.4, + prior_rho_min = 1, + prior_rho_prob = 0.01, + prior_sigma_max = 0.1, + prior_sigma_prob = 0.01, + prior_iideffect_sd_max = 0.0001, + prior_iideffect_sd_prob = 0.01)) newdata <- raster::crop(raster::stack(r, r2), c(0, 10, 0, 10)) pred1 <- predict(result) @@ -190,7 +204,19 @@ test_that('Check that setup_objects works', { skip_if_not_installed('INLA') skip_on_cran() - result <- disag_model(test_data, iterations = 100) + result <- disag_model(test_data, iterations = 100, + iid = FALSE, + field = TRUE, + priors = list(priormean_intercept = 0, + priorsd_intercept = 1, + priormean_slope = 0.0, + priorsd_slope = 0.4, + prior_rho_min = 1, + prior_rho_prob = 0.01, + prior_sigma_max = 0.1, + prior_sigma_prob = 0.01, + prior_iideffect_sd_max = 0.01, + prior_iideffect_sd_prob = 0.01)) objects <- setup_objects(result) @@ -224,7 +250,19 @@ test_that('Check that predict_single_raster works', { skip_if_not_installed('INLA') skip_on_cran() - result <- disag_model(test_data, iterations = 100) + result <- disag_model(test_data, iterations = 100, + iid = TRUE, + field = TRUE, + priors = list(priormean_intercept = 0, + priorsd_intercept = 1, + priormean_slope = 0.0, + priorsd_slope = 0.4, + prior_rho_min = 1, + prior_rho_prob = 0.01, + prior_sigma_max = 0.1, + prior_sigma_prob = 0.01, + prior_iideffect_sd_max = 0.01, + prior_iideffect_sd_prob = 0.01)) objects <- setup_objects(result) From 7f132dedbe1f6186370463d62fdf499e39e5ec80 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 20 Jul 2023 15:03:35 +0100 Subject: [PATCH 099/168] More messing around trying to get models in tests to converge. --- tests/testthat/test-predict-model.R | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index 1a453dc..cc28d9e 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -132,7 +132,6 @@ test_that("Check predict.disag_model function works with newdata", { skip_on_cran() result <- disag_model(test_data, field = FALSE, iid = TRUE, iterations = 100, - iid = TRUE, field = TRUE, priors = list(priormean_intercept = 0, priorsd_intercept = 1, From b72bc1756fe209b9ac2fdc5d9774d023eee3f18f Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 20 Jul 2023 16:10:40 +0100 Subject: [PATCH 100/168] More messing around trying to get models in tests to converge. --- tests/testthat/test-predict-model.R | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index cc28d9e..149b4dc 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -132,7 +132,6 @@ test_that("Check predict.disag_model function works with newdata", { skip_on_cran() result <- disag_model(test_data, field = FALSE, iid = TRUE, iterations = 100, - field = TRUE, priors = list(priormean_intercept = 0, priorsd_intercept = 1, priormean_slope = 0.0, From 2254a84bf23453b3842e8b9ba572f420bc5ed374 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 20 Jul 2023 16:40:28 +0100 Subject: [PATCH 101/168] More messing around trying to get models in tests to converge. --- tests/testthat/test-predict-model.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index 149b4dc..dc479aa 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -203,7 +203,7 @@ test_that('Check that setup_objects works', { skip_on_cran() result <- disag_model(test_data, iterations = 100, - iid = FALSE, + iid = TRUE, field = TRUE, priors = list(priormean_intercept = 0, priorsd_intercept = 1, From 7faa620443c992a273f21b57deecfcb3adabc910 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 20 Jul 2023 17:05:50 +0100 Subject: [PATCH 102/168] More messing around trying to get models in tests to converge. --- tests/testthat/test-predict-model.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index dc479aa..c612770 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -46,11 +46,11 @@ test_that("Check predict.disag_model function works as expected", { priorsd_intercept = 1, priormean_slope = 0.0, priorsd_slope = 0.4, - prior_rho_min = 1, + prior_rho_min = 0.1, prior_rho_prob = 0.01, prior_sigma_max = 0.1, prior_sigma_prob = 0.01, - prior_iideffect_sd_max = 0.01, + prior_iideffect_sd_max = 0.0001, prior_iideffect_sd_prob = 0.01)) pred2 <- predict(result) From 1b63d4f0e341d590480e2910a0f9f3071ef18ac2 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 20 Jul 2023 19:01:48 +0100 Subject: [PATCH 103/168] More messing around trying to get models in tests to converge. --- tests/testthat/test-predict-model.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index c612770..8ad8bd2 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -39,14 +39,14 @@ test_that("Check predict.disag_model function works as expected", { skip_if_not_installed('INLA') skip_on_cran() - result <- disag_model(test_data, iterations = 100, + result <- disag_model(test_data, iterations = 1000, iid = TRUE, field = TRUE, priors = list(priormean_intercept = 0, - priorsd_intercept = 1, + priorsd_intercept = 0.1, priormean_slope = 0.0, - priorsd_slope = 0.4, - prior_rho_min = 0.1, + priorsd_slope = 0.1, + prior_rho_min = 5, prior_rho_prob = 0.01, prior_sigma_max = 0.1, prior_sigma_prob = 0.01, From 03c940f2596a485921a299414fbe8e5cc00010a2 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Thu, 20 Jul 2023 19:11:24 +0100 Subject: [PATCH 104/168] More messing around trying to get models in tests to converge. --- tests/testthat/test-predict-model.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index 8ad8bd2..322a5bf 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -42,6 +42,8 @@ test_that("Check predict.disag_model function works as expected", { result <- disag_model(test_data, iterations = 1000, iid = TRUE, field = TRUE, + family = 'poisson', + link = 'log', priors = list(priormean_intercept = 0, priorsd_intercept = 0.1, priormean_slope = 0.0, From 3660556820ea710105af511f8649065eb91aa9b5 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 21 Jul 2023 09:45:00 +0100 Subject: [PATCH 105/168] Add priors to model fit in plotting tests as well to ensure convergence. --- tests/testthat/test-plotting.R | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R index 4d8b8c4..786ffa7 100644 --- a/tests/testthat/test-plotting.R +++ b/tests/testthat/test-plotting.R @@ -105,8 +105,22 @@ test_that("Check plot.disag_prediction function works as expected", { skip_if_not_installed('INLA') skip_on_cran() - fit_result <- disag_model(test_data, iterations = 100) + result <- disag_model(test_data, iterations = 1000, + iid = TRUE, + field = TRUE, + family = 'poisson', + link = 'log', + priors = list(priormean_intercept = 0, + priorsd_intercept = 0.1, + priormean_slope = 0.0, + priorsd_slope = 0.1, + prior_rho_min = 5, + prior_rho_prob = 0.01, + prior_sigma_max = 0.1, + prior_sigma_prob = 0.01, + prior_iideffect_sd_max = 0.0001, + prior_iideffect_sd_prob = 0.01)) pred <- predict(fit_result) p <- plot(pred) From 283ab16706c8ab9b994ec58058c40f4181afa383 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 21 Jul 2023 09:45:05 +0100 Subject: [PATCH 106/168] Typo. --- R/fit_model.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/fit_model.R b/R/fit_model.R index f3fdcad..f74d5af 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -334,7 +334,7 @@ make_model_object <- function(data, } if(family == 'gaussian' & iid) { - warning('You are using both a gaussian likeihood and an iid effect. Using both of these is redundant as they are + warning('You are using both a gaussian likelihood and an iid effect. Using both of these is redundant as they are having the same effect on the model. Consider setting iid = FALSE.') } From 83f5b91c61299a1ec4bf9e1d2b5f2db55919466b Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 21 Jul 2023 10:16:17 +0100 Subject: [PATCH 107/168] STupid error in test. --- tests/testthat/test-plotting.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R index 786ffa7..7e4e567 100644 --- a/tests/testthat/test-plotting.R +++ b/tests/testthat/test-plotting.R @@ -106,7 +106,7 @@ test_that("Check plot.disag_prediction function works as expected", { skip_on_cran() - result <- disag_model(test_data, iterations = 1000, + fit_result <- disag_model(test_data, iterations = 1000, iid = TRUE, field = TRUE, family = 'poisson', From 00bd0ffcd118c7ad683c8a87a1e10b5547988f39 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 21 Jul 2023 11:08:15 +0100 Subject: [PATCH 108/168] Remove aes_string and instead use .data --- R/plotting.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index 7497cce..a816517 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -208,11 +208,11 @@ plot_mesh <- function(x, main = '', col = 'blue', lwd = 0.5, linecol = 'darkgrey p <- ggplot2::ggplot(data = d, - ggplot2::aes_string('x', 'y', - colour = 'type', - size = 'type')) + + ggplot2::aes(.data$x, .data$y, + colour = .data$type, + size = .data$type)) + ggplot2::geom_segment(data = segments, - ggplot2::aes_string(x = 'x1', y = 'y1', xend = 'x2', yend = 'y2')) + + ggplot2::aes(x = .data$x1, y = .data$y1, xend = .data$x2, yend = .data$y2)) + ggplot2::geom_point() + ggplot2::theme_minimal() + ggplot2::theme(legend.position = 'none') From 02b36990f201a3764d3074e07e9299eecd5e739a Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 21 Jul 2023 11:17:37 +0100 Subject: [PATCH 109/168] Remove using size for lines in mesh plot. --- R/plotting.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index a816517..7c49eca 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -206,20 +206,22 @@ plot_mesh <- function(x, main = '', col = 'blue', lwd = 0.5, linecol = 'darkgrey segments <- rbind(segments, innerouter) - + #size = .data$type p <- ggplot2::ggplot(data = d, ggplot2::aes(.data$x, .data$y, - colour = .data$type, - size = .data$type)) + + colour = .data$type)) + ggplot2::geom_segment(data = segments, - ggplot2::aes(x = .data$x1, y = .data$y1, xend = .data$x2, yend = .data$y2)) + - ggplot2::geom_point() + + ggplot2::aes(x = .data$x1, y = .data$y1, + xend = .data$x2, yend = .data$y2, + linewidth = .data$type)) + + ggplot2::geom_point(aes(size = .data$type)) + ggplot2::theme_minimal() + ggplot2::theme(legend.position = 'none') #stroke p <- p + ggplot2::scale_colour_manual(values = c(col, linecol, 'black', 'black', 'black'), drop = FALSE) + ggplot2::scale_size_manual(values = c(size, lwd, 1.3, 1.3, 0), drop = FALSE) + + ggplot2::scale_linewidth_manual(values = c(size, lwd, 1.3, 1.3, 0), drop = FALSE) + ggtitle(main) return(invisible(p)) From 6fe97a61f82d468440fc302e207a9211c5b4b49a Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 21 Jul 2023 13:39:06 +0100 Subject: [PATCH 110/168] Add proper slope names and split parameters plots to aid comparison of fixed effects. Closes #69 --- R/plotting.R | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index 7c49eca..d6080b4 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -65,11 +65,23 @@ plot.disag_model <- function(x, ...){ posteriors <- as.data.frame(summary(x$sd_out, select = 'fixed')) posteriors <- dplyr::mutate(posteriors, name = rownames(posteriors)) names(posteriors) <- c('mean', 'sd', 'parameter') - + posteriors$fixed <- grepl('slope', posteriors$parameter) + posteriors$type <- ifelse(posteriors$fixed, 'Slope', 'Other') + + # Check name lengths match before substituting. + lengths_match <- raster::nlayers(x$data$covariate_rasters) == sum(posteriors$fixed) + if(lengths_match){ + posteriors$parameter[grepl('slope', posteriors$parameter)] <- names(x$data$covariate_rasters) + } + fixedeffects <- ggplot() + - geom_errorbar(posteriors, mapping = aes(x = parameter, ymin = mean - sd, ymax = mean + sd), width = 0.2, color = "blue") + + geom_errorbar(posteriors, mapping = aes(x = parameter, ymin = mean - sd, + ymax = mean + sd), + width = 0.2, color = "blue") + geom_point(posteriors, mapping = aes(x = parameter, y = mean)) + - ggtitle("Fixed effects") + facet_wrap( ~ type , scales = 'free') + + coord_flip() + + ggtitle("Parameters (excluding random effects)") report <- x$obj$report() From c786e66be8f0a409eadaf1846fb3454581fe970e Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 21 Jul 2023 15:27:50 +0100 Subject: [PATCH 111/168] Version bump. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e7f4a6f..c6ad584 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: disaggregation Type: Package Title: Disaggregation Modelling -Version: 0.2.0 +Version: 0.2.1 Authors@R: c( person("Anita", "Nandi", email = "anita.k.nandi@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-5087-2494")), person("Tim", "Lucas", email = "timcdlucas@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4694-8107")), From 8295b42bdd743b375579b8153414b7aa5d1b7116 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Tue, 5 Sep 2023 16:08:48 +0100 Subject: [PATCH 112/168] Remove flags added in makevars to see what happens. Getting NOTES from CRAN. --- src/Makevars.win | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Makevars.win b/src/Makevars.win index bd0bfc6..e69de29 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1 +0,0 @@ -PKG_CXXFLAGS = -Wa,-mbig-obj \ No newline at end of file From 8caaae79204cd4373c0b582af43674b8eb13255a Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 6 Sep 2023 07:58:55 +0100 Subject: [PATCH 113/168] More messing with tests to avoid numerical errors. --- tests/testthat/test-summary.R | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-summary.R b/tests/testthat/test-summary.R index 7b8c574..c913ecf 100644 --- a/tests/testthat/test-summary.R +++ b/tests/testthat/test-summary.R @@ -99,7 +99,17 @@ test_that("Check summary.disag_predictions function works as expected", { skip_if_not_installed('INLA') skip_on_cran() - result <- disag_model(test_data, iid = FALSE, iterations = 2) + result <- disag_model(test_data, iid = FALSE, iterations = 100, + list(priormean_intercept = 0, + priorsd_intercept = 0.1, + priormean_slope = 0.0, + priorsd_slope = 0.1, + prior_rho_min = 5, + prior_rho_prob = 0.01, + prior_sigma_max = 0.1, + prior_sigma_prob = 0.01, + prior_iideffect_sd_max = 0.0001, + prior_iideffect_sd_prob = 0.01)) pred <- predict(result) @@ -119,7 +129,17 @@ test_that("Check print.disag_predictions function works as expected", { skip_if_not_installed('INLA') skip_on_cran() - result <- disag_model(test_data, iid = FALSE, iterations = 2) + result <- disag_model(test_data, iid = FALSE, iterations = 100, + list(priormean_intercept = 0, + priorsd_intercept = 0.1, + priormean_slope = 0.0, + priorsd_slope = 0.1, + prior_rho_min = 5, + prior_rho_prob = 0.01, + prior_sigma_max = 0.1, + prior_sigma_prob = 0.01, + prior_iideffect_sd_max = 0.0001, + prior_iideffect_sd_prob = 0.01)) pred <- predict(result) From afc8a40bb7d408e2601ba5b6dd80627023df4734 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Tue, 2 May 2023 16:40:40 +0100 Subject: [PATCH 114/168] Start converting vignette to terra and sf --- vignettes/disaggregation.Rmd | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 534e048..3fad507 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -53,27 +53,38 @@ library(raster, quietly = TRUE) library(disaggregation, quietly = TRUE) library(sf) -map <- NYleukemia$spatial.polygon +library(sf) +library(terra) + + +polygons <- sf::st_as_sf(NYleukemia$spatial.polygon) + df <- NYleukemia$data +df <- merge(polygons, df) -polygon_data <- SpatialPolygonsDataFrame(map, df) -polygon_data ``` Now we simulate two covariate rasters for the area of interest and make a `RasterStack`. They are simulated at the resolution of approximately 1km2. ```{r, fig.show='hold'} -extent_in_km <- 111*(polygon_data@bbox[, 2] - polygon_data@bbox[, 1]) + +bbox <- sf::st_bbox(df) + +extent_in_km <- 111*(bbox[c(3, 4)] - bbox[c(1, 2)]) n_pixels_x <- floor(extent_in_km[[1]]) n_pixels_y <- floor(extent_in_km[[2]]) -r <- raster::raster(ncol = n_pixels_x, nrow = n_pixels_y) -r <- raster::setExtent(r, raster::extent(polygon_data)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_x != 0, x %% n_pixels_x, n_pixels_x), 3)) -r2 <- raster::raster(ncol = n_pixels_x, nrow = n_pixels_y) -r2 <- raster::setExtent(r2, raster::extent(polygon_data)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_y), 3)) -cov_stack <- raster::stack(r, r2) -cov_stack <- raster::scale(cov_stack) + +r <- terra::rast(ncols = n_pixels_x, nrows = n_pixels_y) +terra::ext(r) <- terra::ext(df) +values(r) <- sapply(seq(terra::ncell(r)), function(x) rnorm(1, ifelse(x %% n_pixels_x != 0, x %% n_pixels_x, n_pixels_x), 3)) +r2 <- terra::rast(ncol = n_pixels_x, nrow = n_pixels_y) +terra::ext(r2) <- terra::ext(df) +values(r2) <- sapply(seq(terra::ncell(r)), function(x) rnorm(1, ceiling(x/n_pixels_y), 3)) + + +cov_stack <- terra::rast(list(r, r2)) +cov_stack <- terra::scale(cov_stack) + ``` We also create a population raster. This is to allow the model to correctly aggregated the pixel values to the polygon level. For this simple example we assume that the population within each polygon is uniformly distributed. From 281fb2f4b8a31429ecaf296932f44cdf4c3aa18f Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 22 May 2023 16:21:35 +0100 Subject: [PATCH 115/168] Vignette preamble now terra. Except gbuffer which Im not sure if I need to replace or not. --- vignettes/disaggregation.Rmd | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 3fad507..3dfeafa 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -48,19 +48,20 @@ We will demonstrate an example of the **disaggregation** package using areal dat ```{r} library(SpatialEpi, quietly = TRUE) library(dplyr, quietly = TRUE) -library(sp, quietly = TRUE) +library(sp, quietly = TRUE) # Don't need to read data. So just here while I learn sf. library(raster, quietly = TRUE) library(disaggregation, quietly = TRUE) -library(sf) - +library(ggplot2) library(sf) library(terra) polygons <- sf::st_as_sf(NYleukemia$spatial.polygon) -df <- NYleukemia$data -df <- merge(polygons, df) +df <- cbind(polygons, NYleukemia$data) + +ggplot() + geom_sf(data = df, aes(fill = cases / population)) + ``` @@ -76,10 +77,16 @@ n_pixels_y <- floor(extent_in_km[[2]]) r <- terra::rast(ncols = n_pixels_x, nrows = n_pixels_y) terra::ext(r) <- terra::ext(df) -values(r) <- sapply(seq(terra::ncell(r)), function(x) rnorm(1, ifelse(x %% n_pixels_x != 0, x %% n_pixels_x, n_pixels_x), 3)) + +data_generate <- function(x){ + rnorm(1, ifelse(x %% n_pixels_x != 0, x %% n_pixels_x, n_pixels_x), 3) +} + +terra::values(r) <- sapply(seq(terra::ncell(r)), data_generate) r2 <- terra::rast(ncol = n_pixels_x, nrow = n_pixels_y) terra::ext(r2) <- terra::ext(df) -values(r2) <- sapply(seq(terra::ncell(r)), function(x) rnorm(1, ceiling(x/n_pixels_y), 3)) +terra::values(r2) <- sapply(seq(terra::ncell(r2)), + function(x) rnorm(1, ceiling(x/n_pixels_y), 3)) cov_stack <- terra::rast(list(r, r2)) @@ -90,10 +97,10 @@ cov_stack <- terra::scale(cov_stack) We also create a population raster. This is to allow the model to correctly aggregated the pixel values to the polygon level. For this simple example we assume that the population within each polygon is uniformly distributed. ```{r, fig.show='hold'} -extracted <- raster::extract(r, polygon_data) -n_cells <- sapply(extracted, length) -polygon_data@data$pop_per_cell <- polygon_data@data$population/n_cells -pop_raster <- rasterize(polygon_data, cov_stack, field = 'pop_per_cell') +extracted <- terra::extract(r, terra::vect(df$geometry), fun = sum) +n_cells <- terra::extract(r, terra::vect(df$geometry), fun = length) +df$pop_per_cell <- df$population/n_cells$lyr.1 +pop_raster <- rasterize(terra::vect(df), cov_stack, field = 'pop_per_cell') ``` From cfd56592b895df72dc2342ab18a431682655b560 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 22 May 2023 16:24:25 +0100 Subject: [PATCH 116/168] Replaced gBuffer with sf equivalent even though Im not 100pc its needed. --- vignettes/disaggregation.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 3dfeafa..a089e70 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -112,6 +112,7 @@ switch the polygons to simple features and back again. ```{r, fig.show='hold'} polygon_data <- sf:::as_Spatial(st_buffer(st_as_sf(polygon_data), dist = 0)) +df <- sf::st_buffer(df, dist = 0) ``` Now we have setup the data we can use the `prepare_data` function to create the objects needed to run the disaggregation model. The name of the response variable and id variable in the `SpatialPolygonsDataFrame` should be specified. From 7f8197ca1ab02f726d8017c1f0de3f571d3f8fd1 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 22 May 2023 17:00:07 +0100 Subject: [PATCH 117/168] Mid work, working through prepare_data making it terra --- R/extract.R | 16 ++++++------- R/prepare_data.R | 44 ++++++++++++++++++------------------ vignettes/disaggregation.Rmd | 3 ++- 3 files changed, 32 insertions(+), 31 deletions(-) diff --git a/R/extract.R b/R/extract.R index 5a166f3..05eb555 100644 --- a/R/extract.R +++ b/R/extract.R @@ -57,17 +57,17 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ call. = FALSE) } - shape@data[, id] <- as.character(shape@data[, id]) + shape[, id] <- as.character(shape[, id, drop = TRUE]) i <- NULL # Run extract in parallel. - values <- foreach::foreach(i = seq_along(shape)) %dopar% { - raster::extract(raster, shape[i, ], fun = fun, na.rm = TRUE, cellnumbers = TRUE, ...) + values <- foreach::foreach(i = seq(nrow(shape))) %dopar% { + terra::extract(raster, terra::vect(shape[i, ]), fun = fun, na.rm = TRUE, cells = TRUE, ...) } if(!is.null(fun)){ # If a summary function was given, just bind everything together and add ID column df <- data.frame(do.call(rbind, values)) - if(inherits(shape, 'SpatialPolygonsDataFrame')){ + if(inherits(shape, 'df')){ df <- cbind(ID = as.data.frame(shape)[, id], df) } else{ df <- cbind(ID = names(shape), df) @@ -83,7 +83,7 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ # Want to make covariates columns, rbind shapes, and add shape and cell id columns. # list of vectors, one for each covariate - values_id <- lapply(seq_along(values), function(x) data.frame(shape@data[, id][x], values[[x]][[1]])) + values_id <- lapply(seq_along(values), function(x) data.frame(shape[, id, drop = TRUE][x], values[[x]][[1]])) df <- do.call(rbind, values_id) @@ -101,7 +101,7 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ #' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does #' not exist), this column will contain NAs. #' -#' @param shape A SpatialPolygons object containing response data. +#' @param shape A sf object containing response data. #' @param id_var Name of column in shape object with the polygon id. Default 'area_id'. #' @param response_var Name of column in shape object with the response data. Default 'response'. #' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. Default NULL. @@ -132,10 +132,10 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ getPolygonData <- function(shape, id_var = 'area_id', response_var = 'response', sample_size_var = NULL) { if(is.null(sample_size_var)) { - polygon_df <- shape@data[, c(id_var, response_var)] + polygon_df <- shape[, c(id_var, response_var), drop = TRUE] polygon_df$N <- rep(NA, nrow(polygon_df)) } else { - polygon_df <- shape@data[, c(id_var, response_var, sample_size_var)] + polygon_df <- shape[, c(id_var, response_var, sample_size_var), drop = TRUE] } names(polygon_df) <- c('area_id', 'response', 'N') diff --git a/R/prepare_data.R b/R/prepare_data.R index 85af1bf..7c4d75f 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -26,7 +26,7 @@ #' will automatically deal with NAs. It removes any polygons that have NAs as a response, sets any aggregation pixels with NA to zero #' and sets covariate NAs pixels to the median value for the that covariate. #' -#' @param polygon_shapefile SpatialPolygonDataFrame containing at least two columns: one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}). +#' @param x sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}). #' @param covariate_rasters RasterStack of covariate rasters to be used in the model. #' @param aggregation_raster Raster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used. #' @param id_var Name of column in SpatialPolygonDataFrame object with the polygon id. @@ -40,7 +40,7 @@ #' @return A list is returned of class \code{disag_data}. #' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. #' The list of class \code{disag_data} contains: -#' \item{polygon_shapefile }{The SpatialPolygonDataFrame used as an input.} +#' \item{x }{The SpatialPolygonDataFrame used as an input.} #' \item{covariate_rasters }{The RasterStack used as an input.} #' \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} #' \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} @@ -75,7 +75,7 @@ #' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) #' cov_rasters <- raster::stack(r, r2) #' -#' test_data <- prepare_data(polygon_shapefile = spdf, +#' test_data <- prepare_data(x = spdf, #' covariate_rasters = cov_rasters) #' } #' @@ -83,7 +83,7 @@ #' #' -prepare_data <- function(polygon_shapefile, +prepare_data <- function(x, covariate_rasters, aggregation_raster = NULL, id_var = 'area_id', @@ -94,24 +94,24 @@ prepare_data <- function(polygon_shapefile, makeMesh = TRUE, ncores = 2) { - stopifnot(inherits(polygon_shapefile, 'SpatialPolygonsDataFrame')) - stopifnot(inherits(covariate_rasters, 'Raster')) - if(!is.null(aggregation_raster)) stopifnot(inherits(aggregation_raster, 'Raster')) + stopifnot(inherits(x, 'sf')) + stopifnot(inherits(covariate_rasters, 'SpatRaster')) + if(!is.null(aggregation_raster)) stopifnot(inherits(aggregation_raster, 'SpatRaster')) stopifnot(inherits(id_var, 'character')) stopifnot(inherits(response_var, 'character')) if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) # Check for NAs in response data - na_rows <- is.na(polygon_shapefile@data[, response_var]) + na_rows <- is.na(x[, response_var, drop = TRUE]) if(sum(na_rows) != 0) { if(na.action) { - polygon_shapefile <- polygon_shapefile[!na_rows, ] + x <- x[!na_rows, ] } else { stop('There are NAs in the response data. Please deal with these, or set na.action = TRUE') } } - polygon_data <- getPolygonData(polygon_shapefile, id_var, response_var, sample_size_var) + polygon_data <- getPolygonData(x, id_var, response_var, sample_size_var) # Save raster layer names so we can reassign it to make sure names don't change. @@ -120,15 +120,15 @@ prepare_data <- function(polygon_shapefile, # If no aggregation raster is given, use a 'unity' raster if(is.null(aggregation_raster)) { aggregation_raster <- covariate_rasters[[1]] - aggregation_raster <- raster::setValues(aggregation_raster, rep(1, raster::ncell(aggregation_raster))) + terra::values(aggregation_raster) <- rep(1, terra::ncell(aggregation_raster)) } names(aggregation_raster) <- 'aggregation_raster' - covariate_rasters <- raster::addLayer(covariate_rasters, aggregation_raster) + covariate_rasters <- c(covariate_rasters, aggregation_raster) cl <- parallel::makeCluster(ncores) doParallel::registerDoParallel(cl) - covariate_data <- parallelExtract(covariate_rasters, polygon_shapefile, fun = NULL, id = id_var) + covariate_data <- parallelExtract(covariate_rasters, x, fun = NULL, id = id_var) parallel::stopCluster(cl) foreach::registerDoSEQ() @@ -167,14 +167,14 @@ prepare_data <- function(polygon_shapefile, mesh <- NULL message("Cannot build mesh as INLA is not installed. If you need a spatial field in your model, you must install INLA.") } else { - mesh <- build_mesh(polygon_shapefile, mesh.args) + mesh <- build_mesh(x, mesh.args) } } else { mesh <- NULL message("A mesh is not being built. You will not be able to run a spatial model without a mesh.") } - disag_data <- list(polygon_shapefile = polygon_shapefile, + disag_data <- list(x = x, shapefile_names = list(id_var = id_var, response_var = response_var), covariate_rasters = covariate_rasters, polygon_data = polygon_data, @@ -193,13 +193,13 @@ prepare_data <- function(polygon_shapefile, #' Function to fit the disaggregation model #' -#' @param polygon_shapefile SpatialPolygonDataFrame containing the response data -#' @param shapefile_names List of 2: polygon id variable name and response variable name from polygon_shapefile +#' @param x SpatialPolygonDataFrame containing the response data +#' @param shapefile_names List of 2: polygon id variable name and response variable name from x #' @param covariate_rasters RasterStack of covariates #' @param polygon_data data.frame with two columns: polygon id and response #' @param covariate_data data.frame with cell id, polygon id and covariate columns #' @param aggregation_pixels vector with value of aggregation raster at each pixel -#' @param coordsForFit coordinates of the covariate data points within the polygons in polygon_shapefile +#' @param coordsForFit coordinates of the covariate data points within the polygons in x #' @param coordsForPrediction coordinates of the covariate data points in the whole raster extent #' @param startendindex matrix containing the start and end index for each polygon #' @param mesh inla.mesh object to use in the fit @@ -207,7 +207,7 @@ prepare_data <- function(polygon_shapefile, #' @return A list is returned of class \code{disag_data}. #' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. #' The list of class \code{disag_data} contains: -#' \item{polygon_shapefile }{The SpatialPolygonDataFrame used as an input.} +#' \item{x }{The SpatialPolygonDataFrame used as an input.} #' \item{covariate_rasters }{The RasterStack used as an input.} #' \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} #' \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} @@ -222,7 +222,7 @@ prepare_data <- function(polygon_shapefile, #' @export -as.disag_data <- function(polygon_shapefile, +as.disag_data <- function(x, shapefile_names, covariate_rasters, polygon_data, @@ -233,7 +233,7 @@ as.disag_data <- function(polygon_shapefile, startendindex, mesh = NULL) { - stopifnot(inherits(polygon_shapefile, 'SpatialPolygonsDataFrame')) + stopifnot(inherits(x, 'SpatialPolygonsDataFrame')) stopifnot(inherits(shapefile_names, 'list')) stopifnot(inherits(covariate_rasters, c('RasterBrick', 'RasterStack'))) stopifnot(inherits(polygon_data, 'data.frame')) @@ -246,7 +246,7 @@ as.disag_data <- function(polygon_shapefile, stopifnot(inherits(mesh, 'inla.mesh')) } - disag_data <- list(polygon_shapefile = polygon_shapefile, + disag_data <- list(x = x, shapefile_names = shapefile_names, covariate_rasters = covariate_rasters, polygon_data = polygon_data, diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index a089e70..8d2944b 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -91,6 +91,7 @@ terra::values(r2) <- sapply(seq(terra::ncell(r2)), cov_stack <- terra::rast(list(r, r2)) cov_stack <- terra::scale(cov_stack) +names(cov_stack) <- c('layer1', 'layer2') ``` @@ -120,7 +121,7 @@ Now we have setup the data we can use the `prepare_data` function to create the The user can also control the parameters of the mesh that is used to create the spatial field. The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. The mesh parameters: `concave`, `convex` and `resolution` refer to the parameters used to create the mesh boundary using the [inla.noncovex.hull function](https://rdrr.io/github/andrewzm/INLA/man/inla.nonconvex.hull.html), while the mesh parameters `max.edge`, `cut` and `offset` refer to the parameters used to create the mesh using the [inla.mesh.2d function](https://rdrr.io/github/andrewzm/INLA/man/inla.mesh.2d.html). ```{r, fig.show='hold', eval= isINLA} -data_for_model <- prepare_data(polygon_data, +data_for_model <- prepare_data(x = df, cov_stack, pop_raster, response_var = 'cases', From 93b65cd6614cf011caa94ccfe5d531d8133693bb Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 22 May 2023 17:00:43 +0100 Subject: [PATCH 118/168] Old change, but adding what changed made for 0.2.0. --- cran-comments.md | 71 ++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 26 deletions(-) diff --git a/cran-comments.md b/cran-comments.md index 1892954..6d8f73b 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,15 +1,8 @@ ## Update -This is a package update (version 0.1.4). The changes in this version are: +This is a package update (version 0.2.0). The only real change in this version +is updating references to our Journal of Statistical Science paper that is in +press. -* Change maintainer. Anita Nandi has emailed to confirm. Anita has moved industry and no longer has time to maintain this package. - -* Fixed mistake in model definition. We were adjusting the jacobian for a change of variables incorrectly. - -* Fixed predictions in models with no field - -* Better documentation for priors. - -* redocument to fix html5 issues. @@ -23,29 +16,55 @@ Ubuntu 20, R devel ## R CMD check results There were no ERRORs or WARNINGs. -There were 3 NOTEs: +There were 3 NOTES. + + +* checking CRAN incoming feasibility ... [14s] NOTE +Maintainer: 'Tim Lucas ' + +Possibly misspelled words in DESCRIPTION: + Nandi (15:28) + +Suggests or Enhances not in mainstream repositories: + INLA +Availability using Additional_repositories specification: + INLA yes https://inla.r-inla-download.org/R/stable + +Found the following (possibly) invalid DOIs: + DOI: 10.18637/jss.v106.i11 + From: DESCRIPTION + inst/CITATION + Status: 404 + Message: Not Found + + +Examples with CPU (user + system) or elapsed time > 10s + user system elapsed +getPolygonData 9.89 0.17 10.08 + + + +Response: Anita Nandi's name is spelled correctly. The INLA availability +issue is the same as previous submissions. The doi is for our new Journal +of the Statistical Society paper and has been reserved but not registered yet. + + +* checking package dependencies ... NOTE +Package suggested but not available for checking: 'INLA' - Suggests or Enhances not in mainstream repositories: - INLA - Availability using Additional_repositories specification: - INLA yes https://inla.r-inla-download.org/R/stable +Response: Same as above. - The package uses INLA, my understanding of this NOTE is that it is fine. -* checking installed package size ... NOTE - installed size is 12.8Mb - sub-directories of 1Mb or more: - libs 12.5Mb +* checking examples ... [16s] NOTE +Examples with CPU (user + system) or elapsed time > 10s + user system elapsed +getPolygonData 9.89 0.17 10.08 - Packages based on C++ can have large compiled libraries. This is as small as it can be, hope that is ok. I got a similar, but slightly different note when using R CMD check compared to devtools::check(). The gist was the same though. -* checking compilation flags used ... NOTE - Compilation used the following non-portable flag(s): - '-Wa,-mbig-obj' - - To compile large C++ source files on Windows a compilation flag is needed +Response: As this is only just over the 10 second limit we hope it is ok. We +have done our best to make the examples small throughout. ## Downstream dependencies From 7c3ee57551c1fc8d6f6c04ffc7fa88dd79cc7575 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 31 May 2023 15:53:45 +0100 Subject: [PATCH 119/168] Switch parallelExtract to 1 core terra. I cant get terra to work in parallel so just doing this for now. --- R/extract.R | 24 +++++------------------- R/prepare_data.R | 6 +----- 2 files changed, 6 insertions(+), 24 deletions(-) diff --git a/R/extract.R b/R/extract.R index 05eb555..9137adf 100644 --- a/R/extract.R +++ b/R/extract.R @@ -52,21 +52,14 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ - if (!requireNamespace("foreach", quietly = TRUE)) { - stop("foreach needed for this function to work. Please install it.", - call. = FALSE) - } - shape[, id] <- as.character(shape[, id, drop = TRUE]) - i <- NULL # Run extract in parallel. - values <- foreach::foreach(i = seq(nrow(shape))) %dopar% { - terra::extract(raster, terra::vect(shape[i, ]), fun = fun, na.rm = TRUE, cells = TRUE, ...) - } + values <- terra::extract(raster, terra::vect(shape), fun = fun, na.rm = TRUE, cells = TRUE, ...) + if(!is.null(fun)){ # If a summary function was given, just bind everything together and add ID column - df <- data.frame(do.call(rbind, values)) + df <- values if(inherits(shape, 'df')){ df <- cbind(ID = as.data.frame(shape)[, id], df) } else{ @@ -78,15 +71,8 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ return(df) } else { - # If no summary was given we get a list of length n.shapes - # each entry in the list is a dataframe with n.covariates columns - # Want to make covariates columns, rbind shapes, and add shape and cell id columns. - - # list of vectors, one for each covariate - values_id <- lapply(seq_along(values), function(x) data.frame(shape[, id, drop = TRUE][x], values[[x]][[1]])) - - - df <- do.call(rbind, values_id) + df <- values[, 2:(ncol(values) - 1)] + df <- cbind(values$ID, values$cell, df) names(df) <- c(id, 'cellid', names(raster)) return(df) diff --git a/R/prepare_data.R b/R/prepare_data.R index 7c4d75f..c04e46d 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -126,12 +126,8 @@ prepare_data <- function(x, covariate_rasters <- c(covariate_rasters, aggregation_raster) - cl <- parallel::makeCluster(ncores) - doParallel::registerDoParallel(cl) covariate_data <- parallelExtract(covariate_rasters, x, fun = NULL, id = id_var) - parallel::stopCluster(cl) - foreach::registerDoSEQ() - + covariate_rasters <- raster::dropLayer(covariate_rasters, raster::nlayers(covariate_rasters)) names(covariate_rasters) <- cov_names From b8770df81bd24055d89e6de0f9241eb6a1cf4871 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 31 May 2023 16:13:01 +0100 Subject: [PATCH 120/168] Currently broken. raster extract wasnt adding proper polygon id column. --- .Rbuildignore | 18 +- .github/workflows/R-CMD-check-HTML5.yaml | 116 ++-- .../workflows/R-CMD-check-no-suggests.yaml | 190 +++---- .gitignore | 20 +- NAMESPACE | 1 + R/build_mesh.R | 50 +- R/extract.R | 14 +- R/fit_model.R | 282 +++++----- R/matching.R | 106 ++-- R/plotting.R | 127 ++--- R/predict.R | 225 ++++---- R/prepare_data.R | 4 +- README.md | 1 - cran-comments.md | 142 ++--- man/build_mesh.Rd | 8 +- man/fit_model.Rd | 296 +++++----- man/getStartendindex.Rd | 84 +-- man/make_model_object.Rd | 248 ++++----- man/plot.disag_data.Rd | 48 +- man/plot.disag_model.Rd | 44 +- man/plot.disag_prediction.Rd | 44 +- src/disaggregation.cpp | 524 +++++++++--------- tests/testthat/test-fit-model.R | 336 +++++------ tests/testthat/test-predict-model.R | 106 ++-- vignettes/disaggregation.Rmd | 4 +- 25 files changed, 1521 insertions(+), 1517 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 4808471..3ad7674 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,10 +1,10 @@ -^.*\.Rproj$ -^\.Rproj\.user$ -^\.git* -README.md -.travis.yml -vignettes/disaggregation_cache/* -cran-comments.md -^\.github$ -.github/workflows/R-CMD-check-HTML5.archyaml +^.*\.Rproj$ +^\.Rproj\.user$ +^\.git* +README.md +.travis.yml +vignettes/disaggregation_cache/* +cran-comments.md +^\.github$ +.github/workflows/R-CMD-check-HTML5.archyaml vignettes/spatio_temporal_disaggregation.Rmd \ No newline at end of file diff --git a/.github/workflows/R-CMD-check-HTML5.yaml b/.github/workflows/R-CMD-check-HTML5.yaml index e651e70..a4c492c 100644 --- a/.github/workflows/R-CMD-check-HTML5.yaml +++ b/.github/workflows/R-CMD-check-HTML5.yaml @@ -1,58 +1,58 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions -on: - push: - branches: [html5] - pull_request: - branches: [html5] - -name: R-CMD-check-html5 - - -jobs: - HTML5-check: - runs-on: ubuntu-latest - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - extra-repositories: "https://inla.r-inla-download.org/R/stable" - - - - name: Install pdflatex - run: sudo apt-get install texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-latex-extra - - - uses: r-lib/actions/setup-pandoc@v2 - - - name: Install system dependencies on MacOS (X11, gdal) - if: runner.os == 'macOS' - run: | - brew install --cask xquartz - brew install pkg-config - brew install proj@8 - brew install gdal - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - dependencies: '"all"' - extra-packages: | - rcmdcheck - - - name: Session info - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - sessioninfo::session_info(pkgs, include_base = TRUE) - shell: Rscript {0} - - - uses: r-lib/actions/check-r-package@v2 - with: - args: '"--as-cran"' - build_args: 'character()' - #error-on: '"note"' +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +on: + push: + branches: [html5] + pull_request: + branches: [html5] + +name: R-CMD-check-html5 + + +jobs: + HTML5-check: + runs-on: ubuntu-latest + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + extra-repositories: "https://inla.r-inla-download.org/R/stable" + + + - name: Install pdflatex + run: sudo apt-get install texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-latex-extra + + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Install system dependencies on MacOS (X11, gdal) + if: runner.os == 'macOS' + run: | + brew install --cask xquartz + brew install pkg-config + brew install proj@8 + brew install gdal + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"all"' + extra-packages: | + rcmdcheck + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + + - uses: r-lib/actions/check-r-package@v2 + with: + args: '"--as-cran"' + build_args: 'character()' + #error-on: '"note"' diff --git a/.github/workflows/R-CMD-check-no-suggests.yaml b/.github/workflows/R-CMD-check-no-suggests.yaml index e4db198..cea02e9 100644 --- a/.github/workflows/R-CMD-check-no-suggests.yaml +++ b/.github/workflows/R-CMD-check-no-suggests.yaml @@ -1,96 +1,96 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions -# -# Largely copied from: https://github.com/inlabru-org/inlabru/blob/devel/.github/workflows/R-CMD-check-no-suggests.yaml -# Want to test without suggests to ensure things don't fail on cran when INLA isn't there. - -on: - push: - branches: - '**' - pull_request: - branches: - - devel - - master - -name: R-CMD-check-no-suggests - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: windows-latest, r: 'release'} - # - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - extra-repositories: "https://inla.r-inla-download.org/R/testing" - - - uses: r-lib/actions/setup-pandoc@v2 - - - name: Install system dependencies on MacOS (X11, gdal) - if: runner.os == 'macOS' - run: | - brew install --cask xquartz - brew install pkg-config - brew install proj@9 - brew install gdal - - - name: Has inla? Check. - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - "INLA" %in% pkgs - shell: Rscript {0} - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - dependencies: '"hard"' - extra-packages: | - rcmdcheck - testthat - - - name: Has inla? Check, and remove. - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - "INLA" %in% pkgs - if ("INLA" %in% pkgs) { - remove.packages("INLA") - } - shell: Rscript {0} - - - name: Session info - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - sessioninfo::session_info(pkgs, include_base = TRUE) - shell: Rscript {0} - - - uses: r-lib/actions/check-r-package@v2 - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - _R_CHECK_FORCE_SUGGESTS_: false - with: - build_args: 'c("--no-manual", "--no-build-vignettes")' - args: 'c("--no-manual", "--ignore-vignettes", "--as-cran")' - - - +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +# +# Largely copied from: https://github.com/inlabru-org/inlabru/blob/devel/.github/workflows/R-CMD-check-no-suggests.yaml +# Want to test without suggests to ensure things don't fail on cran when INLA isn't there. + +on: + push: + branches: + '**' + pull_request: + branches: + - devel + - master + +name: R-CMD-check-no-suggests + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} + # - {os: macOS-latest, r: 'release'} + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + extra-repositories: "https://inla.r-inla-download.org/R/testing" + + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Install system dependencies on MacOS (X11, gdal) + if: runner.os == 'macOS' + run: | + brew install --cask xquartz + brew install pkg-config + brew install proj@9 + brew install gdal + + - name: Has inla? Check. + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + "INLA" %in% pkgs + shell: Rscript {0} + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"hard"' + extra-packages: | + rcmdcheck + testthat + + - name: Has inla? Check, and remove. + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + "INLA" %in% pkgs + if ("INLA" %in% pkgs) { + remove.packages("INLA") + } + shell: Rscript {0} + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + + - uses: r-lib/actions/check-r-package@v2 + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + _R_CHECK_FORCE_SUGGESTS_: false + with: + build_args: 'c("--no-manual", "--no-build-vignettes")' + args: 'c("--no-manual", "--ignore-vignettes", "--as-cran")' + + + \ No newline at end of file diff --git a/.gitignore b/.gitignore index 260b964..eef5be0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,11 @@ -inst/doc -.Rproj.user -.Rhistory -.Rproj -.RData -*.o -*.so -vignettes/disaggregation_cache/* -vignettes/disaggregation_files/* -.github/workflows/R-CMD-check-HTML5.archyaml +inst/doc +.Rproj.user +.Rhistory +.Rproj +.RData +*.o +*.so +vignettes/disaggregation_cache/* +vignettes/disaggregation_files/* +.github/workflows/R-CMD-check-HTML5.archyaml vignettes/spatio_temporal_disaggregation.Rmd \ No newline at end of file diff --git a/NAMESPACE b/NAMESPACE index 5669eb7..b298c2b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,3 +33,4 @@ importFrom(stats,cor) importFrom(stats,quantile) importFrom(stats,sd) useDynLib(disaggregation) + diff --git a/R/build_mesh.R b/R/build_mesh.R index 151bbdb..71946c9 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -1,28 +1,28 @@ #' Build mesh for disaggregaton model -#' +#' #' \emph{build_mesh} function takes a SpatialPolygons object and mesh arguments to build an appropriate mesh for the spatial field. #' -#' The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary -#' and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest +#' The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary +#' and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest #' and having a small region outside with a coarser mesh to avoid edge effects. -#' +#' #' Six mesh parameters can be specified as arguments: \emph{convex}, \emph{concave} and \emph{resolution}, #' to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, #' with the names meaning the same as used by INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. -#' +#' #' Defaults are: #' pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, 8), cut = 0.4, offset = c(1, 15)). -#' +#' #' @param shapes shapefile covering the region under investigation. #' @param mesh.args list of parameters that control the mesh structure. \emph{convex}, \emph{concave} and \emph{resolution}, #' to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, #' with the parameters having the same meaning as in the INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. #' #' @return An inla.mesh object -#' +#' #' @name build_mesh #' -#' @examples +#' @examples #' \dontrun{ #' polygons <- list() #' for(i in 1:100) { @@ -31,11 +31,11 @@ #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row #' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) #' } -#' +#' #' polys <- do.call(raster::spPolygons, polygons) #' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) #' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' +#' #' my_mesh <- build_mesh(spdf) #' } #' @@ -47,38 +47,38 @@ build_mesh <- function(shapes, mesh.args = NULL) { stopifnot(inherits(shapes, 'SpatialPolygons')) if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) - + limits <- sp::bbox(shapes) hypotenuse <- sqrt((limits[1,2] - limits[1,1])^2 + (limits[2,2] - limits[2,1])^2) maxedge <- hypotenuse/10 - - + + pars <- list(convex = -0.01, concave = -0.5, resolution = 300, - max.edge = c(maxedge, maxedge * 2), - cut = 0.1, + max.edge = c(maxedge, maxedge * 2), + cut = 0.1, offset = c(hypotenuse / 10, hypotenuse / 10)) - + pars[names(mesh.args)] <- mesh.args outline <- sf::st_union(sf::st_as_sf(shapes)) coords <- sf::st_coordinates(outline) - - - outline.hull <- INLA::inla.nonconvex.hull(coords, - convex = pars$convex, + + + outline.hull <- INLA::inla.nonconvex.hull(coords, + convex = pars$convex, concave = pars$concave, resolution = pars$resolution) - - mesh <- INLA::inla.mesh.2d( + + mesh <- INLA::inla.mesh.2d( boundary = outline.hull, - max.edge = pars$max.edge, - cut = pars$cut, + max.edge = pars$max.edge, + cut = pars$cut, offset = pars$offset) - + return(mesh) } diff --git a/R/extract.R b/R/extract.R index 9137adf..db6a302 100644 --- a/R/extract.R +++ b/R/extract.R @@ -58,6 +58,7 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ values <- terra::extract(raster, terra::vect(shape), fun = fun, na.rm = TRUE, cells = TRUE, ...) if(!is.null(fun)){ + # If a summary function was given, just bind everything together and add ID column df <- values if(inherits(shape, 'df')){ @@ -66,13 +67,12 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ df <- cbind(ID = names(shape), df) id <- 'id' } - names(df) <- c(id, names(raster)) - return(df) + } else { df <- values[, 2:(ncol(values) - 1)] - df <- cbind(values$ID, values$cell, df) + df <- cbind(as.data.frame(shape)[, id], values$cell, df) names(df) <- c(id, 'cellid', names(raster)) return(df) @@ -171,13 +171,13 @@ getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { extractCoordsForMesh <- function(cov_rasters, selectIds = NULL) { - stopifnot(inherits(cov_rasters, c('RasterStack', 'RasterBrick'))) + stopifnot(inherits(cov_rasters, 'SpatRaster')) if(!is.null(selectIds)) stopifnot(inherits(selectIds, 'numeric')) points_raster <- cov_rasters[[1]] - points_raster[is.na(raster::values(points_raster))] <- -9999 - raster_pts <- raster::rasterToPoints(points_raster, spatial = TRUE) - coords <- raster_pts@coords + points_raster[is.na(terra::values(points_raster))] <- -9999 + raster_pts <- terra::as.points(points_raster) + coords <- terra::crds(raster_pts) # If specified, only retain certain pixel ids if(!is.null(selectIds)) { diff --git a/R/fit_model.R b/R/fit_model.R index f74d5af..28df455 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -1,46 +1,46 @@ #' Fit the disaggregation model -#' -#' \emph{fit_model} function takes a \emph{disag_data} object created by +#' +#' \emph{fit_model} function takes a \emph{disag_data} object created by #' \code{\link{prepare_data}} and performs a Bayesian disaggregation fit. -#' +#' #' \strong{The model definition} -#' +#' #' The disaggregation model makes predictions at the pixel level: #' \deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} -#' +#' #' And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): #' \deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} #' \deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} -#' +#' #' The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): #' \itemize{ -#' \item Gaussian: -#' If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where +#' \item Gaussian: +#' If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where #' \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} #' \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. -#' \item Binomial: +#' \item Binomial: #' For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. #' \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. -#' \item Poisson: +#' \item Poisson: #' \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. #' } -#' -#' Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field -#' are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -#' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field +#' +#' Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field +#' are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +#' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field #' where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect -#' -#' The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -#' The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +#' +#' The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +#' The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. #' The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. #' These are specified as strings. -#' +#' #' The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. -#' +#' #' The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. -#' +#' #' The \emph{silent} argument can be used to publish/suppress verbose output. Default TRUE. -#' +#' #' #' @param data disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting #' @param priors list of prior values @@ -49,28 +49,28 @@ #' @param iterations number of iterations to run the optimisation for #' @param field logical. Flag the spatial field on or off #' @param iid logical. Flag the iid effect on or off -#' @param hess_control_parscale Argument to scale parameters during the calculation of the Hessian. +#' @param hess_control_parscale Argument to scale parameters during the calculation of the Hessian. #' Must be the same length as the number of parameters. See \code{\link[stats]{optimHess}} for details. -#' @param hess_control_ndeps Argument to control step sizes during the calculation of the Hessian. -#' Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. -#' Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. +#' @param hess_control_ndeps Argument to control step sizes during the calculation of the Hessian. +#' Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. +#' Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. #' See \code{\link[stats]{optimHess}} for details. #' @param silent logical. Suppress verbose output. -#' -#' @return A list is returned of class \code{disag_model}. -#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. +#' +#' @return A list is returned of class \code{disag_model}. +#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. #' The list of class \code{disag_model} contains: -#' \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} -#' \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} +#' \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} +#' \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} #' \item{sd_out }{The TMB object returned by \code{\link[TMB]{sdreport}}.} #' \item{data }{The \emph{disag_data} object used as an input to the model.} #' \item{model_setup }{A list of information on the model setup. Likelihood function (\emph{family}), link function(\emph{link}), logical: whether a field was used (\emph{field}) and logical: whether an iid effect was used (\emph{iid}).} -#' +#' #' @name fit_model -#' @references Nanda et al. (2023) disaggregation: An R Package for Bayesian +#' @references Nanda et al. (2023) disaggregation: An R Package for Bayesian #' Spatial Disaggregation Modeling. #' -#' @examples +#' @examples #' \dontrun{ #' polygons <- list() #' for(i in 1:100) { @@ -79,11 +79,11 @@ #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row #' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) #' } -#' +#' #' polys <- do.call(raster::spPolygons, polygons) #' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) #' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' +#' #' r <- raster::raster(ncol=20, nrow=20) #' r <- raster::setExtent(r, raster::extent(spdf)) #' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) @@ -91,139 +91,139 @@ #' r2 <- raster::setExtent(r2, raster::extent(spdf)) #' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) #' cov_rasters <- raster::stack(r, r2) -#' +#' #' cl <- parallel::makeCluster(2) #' doParallel::registerDoParallel(cl) -#' test_data <- prepare_data(polygon_shapefile = spdf, +#' test_data <- prepare_data(polygon_shapefile = spdf, #' covariate_rasters = cov_rasters) #' parallel::stopCluster(cl) #' foreach::registerDoSEQ() -#' +#' #' result <- fit_model(test_data, iterations = 2) #' } -#' +#' #' @export -fit_model <- function(data, - priors = NULL, - family = 'gaussian', - link = 'identity', - iterations = 100, - field = TRUE, +fit_model <- function(data, + priors = NULL, + family = 'gaussian', + link = 'identity', + iterations = 100, + field = TRUE, iid = TRUE, hess_control_parscale = NULL, hess_control_ndeps = 1e-4, silent = TRUE) { - + .Deprecated(new = 'disag_model', msg = "'fit_model' will be removed in the next version. Please use 'disag_model' instead") - - model_output <- disag_model(data, - priors = priors, - family = family, - link = link, - iterations = iterations, - field = field, + + model_output <- disag_model(data, + priors = priors, + family = family, + link = link, + iterations = iterations, + field = field, iid = iid, hess_control_parscale = hess_control_parscale, hess_control_ndeps = hess_control_ndeps, silent = silent) - + return(model_output) - - + + } #' @export #' @rdname fit_model -disag_model <- function(data, - priors = NULL, - family = 'gaussian', - link = 'identity', - iterations = 100, - field = TRUE, +disag_model <- function(data, + priors = NULL, + family = 'gaussian', + link = 'identity', + iterations = 100, + field = TRUE, iid = TRUE, hess_control_parscale = NULL, hess_control_ndeps = 1e-4, silent = TRUE) { - - + + stopifnot(inherits(data, 'disag_data')) if(!is.null(priors)) stopifnot(inherits(priors, 'list')) stopifnot(inherits(iterations, 'numeric')) - - obj <- make_model_object(data = data, - priors = priors, - family = family, - link = link, - field = field, + + obj <- make_model_object(data = data, + priors = priors, + family = family, + link = link, + field = field, iid = iid, silent = silent) - + message('Fitting model. This may be slow.') opt <- stats::nlminb(obj$par, obj$fn, obj$gr, control = list(iter.max = iterations, trace = 0)) - + if(opt$convergence != 0) warning('The model did not converge. Try increasing the number of iterations') - + # Get hess control parameters into a list. hess_control <- setup_hess_control(opt, hess_control_parscale, hess_control_ndeps) - + # Calculate the hessian hess <- stats::optimHess(opt$par, fn = obj$fn, gr = obj$gr, control = hess_control) - + # Calc uncertainty using the fixed hessian from above. sd_out <- TMB::sdreport(obj, getJointPrecision = TRUE, hessian.fixed = hess) - + sd_out <- TMB::sdreport(obj, getJointPrecision = TRUE) - + # Rename parameters to match layers # Need to change in sd_out as well # names(opt$par)[names(opt$par) == 'slope'] <- names(data$covariate_rasters) - + model_output <- list(obj = obj, opt = opt, sd_out = sd_out, data = data, model_setup = list(family = family, link = link, field = field, iid = iid)) - + class(model_output) <- c('disag_model', 'list') - + return(model_output) } #' Create the TMB model object for the disaggregation model -#' -#' \emph{make_model_object} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} +#' +#' \emph{make_model_object} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} #' and creates a TMB model object to be used in fitting. -#' +#' #' \strong{The model definition} -#' +#' #' The disaggregation model make predictions at the pixel level: #' \deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} -#' +#' #' And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): #' \deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} #' \deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} -#' +#' #' The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): #' \itemize{ -#' \item Gaussian: -#' If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where +#' \item Gaussian: +#' If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where #' \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} #' \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. -#' \item Binomial: +#' \item Binomial: #' For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. #' \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. -#' \item Poisson: +#' \item Poisson: #' \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. #' } -#' -#' Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field -#' are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -#' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field +#' +#' Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field +#' are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +#' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field #' where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect. -#' +#' #' The precise names and default values for these priors are: #' \itemize{ #' \item priormean_intercept: 0 @@ -237,18 +237,18 @@ disag_model <- function(data, #' \item prior_iideffect_sd_max: 0.1 #' \item prior_iideffect_sd_prob: 0.01 #' } -#' -#' The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -#' The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +#' +#' The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +#' The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. #' The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. #' These are specified as strings. -#' +#' #' The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. -#' +#' #' The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. -#' +#' #' The \emph{silent} argument can be used to publish/supress verbose output. Default TRUE. -#' +#' #' #' @param data disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting #' @param priors list of prior values @@ -257,12 +257,12 @@ disag_model <- function(data, #' @param field logical. Flag the spatial field on or off #' @param iid logical. Flag the iid effect on or off #' @param silent logical. Suppress verbose output. -#' +#' #' @return The TMB model object returned by \code{\link[TMB]{MakeADFun}}. -#' +#' #' @name make_model_object #' -#' @examples +#' @examples #' \dontrun{ #' polygons <- list() #' for(i in 1:100) { @@ -271,11 +271,11 @@ disag_model <- function(data, #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row #' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) #' } -#' +#' #' polys <- do.call(raster::spPolygons, polygons) #' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) #' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' +#' #' r <- raster::raster(ncol=20, nrow=20) #' r <- raster::setExtent(r, raster::extent(spdf)) #' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) @@ -283,36 +283,36 @@ disag_model <- function(data, #' r2 <- raster::setExtent(r2, raster::extent(spdf)) #' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) #' cov_rasters <- raster::stack(r, r2) -#' +#' #' cl <- parallel::makeCluster(2) #' doParallel::registerDoParallel(cl) -#' test_data <- prepare_data(polygon_shapefile = spdf, +#' test_data <- prepare_data(polygon_shapefile = spdf, #' covariate_rasters = cov_rasters) #' parallel::stopCluster(cl) #' foreach::registerDoSEQ() -#' +#' #' result <- make_model_object(test_data) #' } -#' +#' #' @export -#' +#' -make_model_object <- function(data, - priors = NULL, - family = 'gaussian', - link = 'identity', - field = TRUE, +make_model_object <- function(data, + priors = NULL, + family = 'gaussian', + link = 'identity', + field = TRUE, iid = TRUE, silent = TRUE) { - - + + # Check that binomial model has sample_size values supplied if(family == 'binomial') { if(sum(is.na(data$polygon_data$N)) != 0) { stop("There are NAs in the sample sizes. These must be supplied for a binomial likelihood") } } - + if(family == 'gaussian') { family_id = 0 } else if(family == 'binomial') { @@ -322,7 +322,7 @@ make_model_object <- function(data, } else { stop(paste(family, "is not a valid likelihood")) } - + if(link == 'logit') { link_id = 0 } else if(link == 'log') { @@ -332,38 +332,38 @@ make_model_object <- function(data, } else { stop(paste(link, "is not a valid link function")) } - + if(family == 'gaussian' & iid) { - warning('You are using both a gaussian likelihood and an iid effect. Using both of these is redundant as they are + warning('You are using both a gaussian likelihood and an iid effect. Using both of these is redundant as they are having the same effect on the model. Consider setting iid = FALSE.') } - + if(is.null(data$mesh)) { stop('Your data object must contain an INLA mesh.') } - + nu = 1 # Sort out mesh bits - spde <- (INLA::inla.spde2.matern(data$mesh, alpha = nu + 1)$param.inla)[c("M0", "M1", "M2")] + spde <- (INLA::inla.spde2.matern(data$mesh, alpha = nu + 1)$param.inla)[c("M0", "M1", "M2")] Apix <- INLA::inla.mesh.project(data$mesh, loc = data$coordsForFit)$A n_s <- nrow(spde$M0) - + cov_matrix <- as.matrix(data$covariate_data[, -c(1:2)]) - # If we have exactly one column we don't have to transpose. Sure this + # If we have exactly one column we don't have to transpose. Sure this # this could be cleaner but I don't know how. if(ncol(cov_matrix) == 1){ cov_matrix <- as.matrix(apply(cov_matrix, 1, as.numeric)) } else { cov_matrix <- t(apply(cov_matrix, 1, as.numeric)) } - + # Construct sensible default field hyperpriors limits <- sp::bbox(data$polygon_shapefile) hypontenuse <- sqrt((limits[1,2] - limits[1,1])^2 + (limits[2,2] - limits[2,1])^2) prior_rho <- hypontenuse/3 - + prior_sigma <- sd(data$polygon_data$response/mean(data$polygon_data$response)) - + # Default priors if they are not specified default_priors <- list(priormean_intercept = 0, priorsd_intercept = 10.0, @@ -375,7 +375,7 @@ make_model_object <- function(data, prior_sigma_prob = 0.1, prior_iideffect_sd_max = 0.1, prior_iideffect_sd_prob = 0.01) - + # Replace with any specified priors if(!is.null(priors)) { final_priors <- default_priors @@ -397,7 +397,7 @@ make_model_object <- function(data, } else { final_priors <- default_priors } - + parameters <- list(intercept = -5, slope = rep(0, ncol(cov_matrix)), log_tau_gaussian = 8, @@ -406,7 +406,7 @@ make_model_object <- function(data, log_sigma = 0, log_rho = 4, nodemean = rep(0, n_s)) - + input_data <- list(x = cov_matrix, aggregation_values = data$aggregation_pixels, Apixel = Apix, @@ -419,9 +419,9 @@ make_model_object <- function(data, nu = nu, field = as.integer(field), iid = as.integer(iid)) - + input_data <- c(input_data, final_priors) - + tmb_map <- list() if(!field) { tmb_map <- c(tmb_map, list(log_sigma = as.factor(NA), @@ -435,7 +435,7 @@ make_model_object <- function(data, if(family_id != 0) { # if not gaussian do not need a dispersion in likelihood tmb_map <- c(tmb_map, list(log_tau_gaussian = as.factor(NA))) } - + random_effects <- c() if(field) { random_effects <- c(random_effects, 'nodemean') @@ -443,15 +443,15 @@ make_model_object <- function(data, if(iid) { random_effects <- c(random_effects, 'iideffect') } - + obj <- TMB::MakeADFun( - data = input_data, + data = input_data, parameters = parameters, map = tmb_map, random = random_effects, silent = silent, DLL = "disaggregation") - + return(obj) } @@ -468,7 +468,7 @@ setup_hess_control <- function(opt,hess_control_parscale, hess_control_ndeps){ hess_control$parscale <- hess_control_parscale } # hess_control_ndeps can either be length 1 (default) or correct length vecot. - if(length(hess_control_ndeps) == 1){ + if(length(hess_control_ndeps) == 1){ hess_control$ndeps <- rep(hess_control_ndeps, length(opt$par)) } else { if(length(hess_control_ndeps) != length(opt$par)){ diff --git a/R/matching.R b/R/matching.R index f2b7825..18684c3 100644 --- a/R/matching.R +++ b/R/matching.R @@ -1,53 +1,53 @@ -#' Function to match pixels to their corresponding polygon -#' -#' From the covariate data and polygon data, the function matches the polygon id between the two to find -#' which pixels from the covariate data are contained in each of the polygons. -#' -#' Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, -#' and another data.frame containing polygon data with a polygon id, response and sample size column (as returned -#' by \code{getPolygonData} function). -#' -#' Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in -#' covariate data that corresponds to that polygon, the second column is the index of the last row in -#' covariate data that corresponds to that polygon. -#' -#' @param covariates data.frame with each covariate as a column an and id column. -#' @param polygon_data data.frame with polygon id and response data. -#' @param id_var string with the name of the column in the covariate data.frame containing the polygon id. -#' -#' @return A matrix with two columns and one row for each polygon. The first column is the index of the first row in -#' covariate data that corresponds to that polygon, the second column is the index of the last row in -#' covariate data that corresponds to that polygon. -#' -#' @name getStartendindex -#' -#' @examples { -#' covs <- data.frame(area_id = c(1, 1, 1, 2, 2, 3, 3, 3, 3), response = c(3, 9, 5, 2, 3, 6, 7, 3, 5)) -#' response <- data.frame(area_id = c(1, 2, 3), response = c(4, 7, 2), N = c(NA, NA, NA)) -#' getStartendindex(covs, response, 'area_id') -#' } -#' -#' -#' @export - -getStartendindex <- function(covariates, polygon_data, id_var = 'area_id') { - - stopifnot(ncol(polygon_data) == 3) - stopifnot(ncol(covariates) >= 2) - stopifnot(nrow(covariates) > nrow(polygon_data)) - stopifnot(sum(polygon_data$area_id %in% covariates[, id_var]) == nrow(polygon_data)) - - # Create startendindex matrix - # This defines which pixels in the matrix are associated with which polygon. - startendindex <- lapply(unique(covariates[, id_var]), function(x) range(which(covariates[, id_var] == x))) - - startendindex <- do.call(rbind, startendindex) - - whichindices <- match(polygon_data$area_id, unique(covariates[, id_var])) - - # c++ is zero indexed. - startendindex <- startendindex[whichindices, ] - 1L - - return(startendindex) -} - +#' Function to match pixels to their corresponding polygon +#' +#' From the covariate data and polygon data, the function matches the polygon id between the two to find +#' which pixels from the covariate data are contained in each of the polygons. +#' +#' Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, +#' and another data.frame containing polygon data with a polygon id, response and sample size column (as returned +#' by \code{getPolygonData} function). +#' +#' Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in +#' covariate data that corresponds to that polygon, the second column is the index of the last row in +#' covariate data that corresponds to that polygon. +#' +#' @param covariates data.frame with each covariate as a column an and id column. +#' @param polygon_data data.frame with polygon id and response data. +#' @param id_var string with the name of the column in the covariate data.frame containing the polygon id. +#' +#' @return A matrix with two columns and one row for each polygon. The first column is the index of the first row in +#' covariate data that corresponds to that polygon, the second column is the index of the last row in +#' covariate data that corresponds to that polygon. +#' +#' @name getStartendindex +#' +#' @examples { +#' covs <- data.frame(area_id = c(1, 1, 1, 2, 2, 3, 3, 3, 3), response = c(3, 9, 5, 2, 3, 6, 7, 3, 5)) +#' response <- data.frame(area_id = c(1, 2, 3), response = c(4, 7, 2), N = c(NA, NA, NA)) +#' getStartendindex(covs, response, 'area_id') +#' } +#' +#' +#' @export + +getStartendindex <- function(covariates, polygon_data, id_var = 'area_id') { + + stopifnot(ncol(polygon_data) == 3) + stopifnot(ncol(covariates) >= 2) + stopifnot(nrow(covariates) > nrow(polygon_data)) + stopifnot(sum(polygon_data$area_id %in% covariates[, id_var]) == nrow(polygon_data)) + + # Create startendindex matrix + # This defines which pixels in the matrix are associated with which polygon. + startendindex <- lapply(unique(covariates[, id_var]), function(x) range(which(covariates[, id_var] == x))) + + startendindex <- do.call(rbind, startendindex) + + whichindices <- match(polygon_data$area_id, unique(covariates[, id_var])) + + # c++ is zero indexed. + startendindex <- startendindex[whichindices, ] - 1L + + return(startendindex) +} + diff --git a/R/plotting.R b/R/plotting.R index d6080b4..d1ed93b 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -1,90 +1,91 @@ +<<<<<<< HEAD #' Plot input data for disaggregation #' #' Plotting function for class \emph{disag_data} (the input data for disaggregation). -#' +#' #' Produces three plots: polygon response data, covariate rasters and INLA mesh. #' #' @param x Object of class \emph{disag_data} to be plotted. #' @param which If a subset of plots is required, specify a subset of the numbers 1:3 #' @param ... Further arguments to \emph{plot} function. -#' +#' #' @return A list of three plots: the polygon plot (ggplot), covariate plot (spplot) and INLA mesh plot (ggplot) -#' +#' #' @import ggplot2 #' @method plot disag_data -#' +#' #' @export plot.disag_data <- function(x, which = c(1,2,3), ...) { - + plots <- list() titles <- c() - + if(1 %in% which) { plots$polygon <- plot_polygon_data(x$polygon_shapefile, x$shapefile_names) titles <- c(titles, 'Polygon response data') } - + if(2 %in% which) { stopifnot(inherits(x$covariate_rasters, c('RasterStack', 'RasterBrick'))) plots$covariates <- sp::spplot(x$covariate_rasters) titles <- c(titles, 'Covariate rasters') } - + if(3 %in% which & !is.null(x$mesh)) { stopifnot(inherits(x$mesh, 'inla.mesh')) plots$mesh <- plot_mesh(x$mesh) titles <- c(titles, 'INLA mesh for spatial field') } - + print(cowplot::plot_grid(plotlist = plots, labels = titles, label_size = 10)) - + return(invisible(plots)) } #' Plot results of fitted model #' #' Plotting function for class \emph{disag_model} (the result of the disaggregation fitting). -#' +#' #' Produces two plots: results of the fixed effects and in-sample observed vs predicted plot. -#' +#' #' @param x Object of class \emph{disag_model} to be plotted. #' @param ... Further arguments to \emph{plot} function. -#' -#' @return A list of two ggplot plots: results of the fixed effects and an in-sample observed vs predicted plot -#' +#' +#' @return A list of two ggplot plots: results of the fixed effects and an in-sample observed vs predicted plot +#' #' @import ggplot2 #' @method plot disag_model -#' +#' #' @export plot.disag_model <- function(x, ...){ - + parameter <- sd <- obs <- pred <- NULL posteriors <- as.data.frame(summary(x$sd_out, select = 'fixed')) posteriors <- dplyr::mutate(posteriors, name = rownames(posteriors)) names(posteriors) <- c('mean', 'sd', 'parameter') posteriors$fixed <- grepl('slope', posteriors$parameter) posteriors$type <- ifelse(posteriors$fixed, 'Slope', 'Other') - + # Check name lengths match before substituting. lengths_match <- raster::nlayers(x$data$covariate_rasters) == sum(posteriors$fixed) if(lengths_match){ posteriors$parameter[grepl('slope', posteriors$parameter)] <- names(x$data$covariate_rasters) } - - fixedeffects <- ggplot() + - geom_errorbar(posteriors, mapping = aes(x = parameter, ymin = mean - sd, - ymax = mean + sd), - width = 0.2, color = "blue") + - geom_point(posteriors, mapping = aes(x = parameter, y = mean)) + - facet_wrap( ~ type , scales = 'free') + + + fixedeffects <- ggplot() + + geom_errorbar(posteriors, mapping = aes(x = parameter, ymin = mean - sd, + ymax = mean + sd), + width = 0.2, color = "blue") + + geom_point(posteriors, mapping = aes(x = parameter, y = mean)) + + facet_wrap( ~ type , scales = 'free') + coord_flip() + ggtitle("Parameters (excluding random effects)") - + report <- x$obj$report() - + # Form of the observed and predicted results depends on the likelihood function used if(x$model_setup$family == 'gaussian') { observed_data = report$polygon_response_data/report$reportnormalisation @@ -99,33 +100,33 @@ plot.disag_model <- function(x, ...){ predicted_data = report$reportprediction_rate title <- 'In sample performance: incidence rate' } - + data <- data.frame(obs = observed_data, pred = predicted_data) - - obspred <- ggplot(data, aes(x = obs, y = pred)) + - geom_point() + - geom_abline(intercept = 0, slope = 1, color = 'blue') + + + obspred <- ggplot(data, aes(x = obs, y = pred)) + + geom_point() + + geom_abline(intercept = 0, slope = 1, color = 'blue') + ggtitle(title) - + plots <- list(fixedeffects, obspred) print(cowplot::plot_grid(plotlist = plots)) - + return(invisible(plots)) } #' Plot mean and uncertainty predictions from the disaggregation model results #' #' Plotting function for class \emph{disag_prediction} (the mean and uncertainty predictions of the disaggregation fitting). -#' +#' #' Produces raster plots of the mean prediction, and the lower and upper confidence intervals. #' #' @param x Object of class \emph{disag_prediction} to be plotted. #' @param ... Further arguments to \emph{plot} function. -#' +#' #' @return A list of plots of rasters from the prediction: mean prediction, lower CI and upper CI. -#' +#' #' @method plot disag_prediction -#' +#' #' @export @@ -133,11 +134,11 @@ plot.disag_prediction <- function(x, ...) { rasters_to_plot <- raster::stack(x$mean_prediction$prediction, x$uncertainty_prediction$predictions_ci) names(rasters_to_plot) <- c('mean prediction', 'lower CI', 'upper CI') - + plots <- sp::spplot(rasters_to_plot) - + print(plots) - + return(invisible(plots)) } @@ -146,9 +147,9 @@ plot.disag_prediction <- function(x, ...) { # # @param x Object to be plotted # @param names list of 2 names: polygon id variable and response variable names -# +# # @return A ggplot of the polygon data -# +# # @name plot_polygon_data plot_polygon_data <- function(x, names) { @@ -157,18 +158,18 @@ plot_polygon_data <- function(x, names) { shp <- sf::st_as_sf(x) shp <- dplyr::rename(shp, 'response' = names$response_var) shp <- dplyr::rename(shp, 'area_id' = names$id_var) - + area_id <- long <- lat <- group <- response <- NULL stopifnot(inherits(shp, 'sf')) - - shp <- dplyr::mutate(shp, area_id = as.character(area_id)) - p <- ggplot(shp, aes(fill = response)) + + shp <- dplyr::mutate(shp, area_id = as.character(area_id)) + + p <- ggplot(shp, aes(fill = response)) + geom_sf() + #coord_equal() + scale_fill_viridis_c(trans = 'identity') - + return(invisible(p)) } @@ -179,24 +180,24 @@ plot_polygon_data <- function(x, names) { # @param lwd Line width # @param linecol The colour for the mesh edges # @param size size Size of data points -# +# # @name plot_mesh plot_mesh <- function(x, main = '', col = 'blue', lwd = 0.5, linecol = 'darkgrey', size = 1.2) { - + mesh <- x # extract point data d <- data.frame(x = mesh$loc[, 1], y = mesh$loc[, 2], type = 'evertices') levels(d$type) <- c('evertices', 'adata') d[mesh$idx$loc, 'type'] <- 'adata' - # extract lines data. + # extract lines data. # mesh$graph$tv column 1, 2, 3 are points in triangles. # Therefore need 1 to 2, 2 to 3 and 3 to 1. - idx = rbind(mesh$graph$tv[, 1:2, drop = FALSE], - mesh$graph$tv[, 2:3, drop = FALSE], + idx = rbind(mesh$graph$tv[, 1:2, drop = FALSE], + mesh$graph$tv[, 2:3, drop = FALSE], mesh$graph$tv[, c(3, 1), drop = FALSE]) segments <- data.frame(mesh$loc[idx[, 1], 1:2], mesh$loc[idx[, 2], 1:2], type = 'bsegments') - + innerouter <- data.frame(mesh$loc[mesh$segm$bnd$idx[, 1], 1:2], mesh$loc[mesh$segm$bnd$idx[, 2], 1:2], type = 'cbinding', stringsAsFactors = FALSE) @@ -211,19 +212,19 @@ plot_mesh <- function(x, main = '', col = 'blue', lwd = 0.5, linecol = 'darkgrey #innerouter[nrow(innerouter), 5] <- 'dinternal' innerouter$type = factor(innerouter$type, levels = c('dinternal', 'cbinding')) } - - + + names(segments) <- c('x1', 'y1', 'x2', 'y2', 'type') names(innerouter) <- c('x1', 'y1', 'x2', 'y2', 'type') - + segments <- rbind(segments, innerouter) - + #size = .data$type - p <- ggplot2::ggplot(data = d, - ggplot2::aes(.data$x, .data$y, + p <- ggplot2::ggplot(data = d, + ggplot2::aes(.data$x, .data$y, colour = .data$type)) + - ggplot2::geom_segment(data = segments, - ggplot2::aes(x = .data$x1, y = .data$y1, + ggplot2::geom_segment(data = segments, + ggplot2::aes(x = .data$x1, y = .data$y1, xend = .data$x2, yend = .data$y2, linewidth = .data$type)) + ggplot2::geom_point(aes(size = .data$type)) + @@ -235,6 +236,6 @@ plot_mesh <- function(x, main = '', col = 'blue', lwd = 0.5, linecol = 'darkgrey ggplot2::scale_size_manual(values = c(size, lwd, 1.3, 1.3, 0), drop = FALSE) + ggplot2::scale_linewidth_manual(values = c(size, lwd, 1.3, 1.3, 0), drop = FALSE) + ggtitle(main) - + return(invisible(p)) } diff --git a/R/predict.R b/R/predict.R index 1b78aad..4e9c759 100644 --- a/R/predict.R +++ b/R/predict.R @@ -1,80 +1,81 @@ +<<<<<<< HEAD #' Predict mean and uncertainty from the disaggregation model result -#' -#' \emph{predict.disag_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and +#' +#' \emph{predict.disag_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and #' predicts mean and uncertainty maps. -#' -#' To predict over a different spatial extent to that used in the model, -#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +#' +#' To predict over a different spatial extent to that used in the model, +#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. #' If this is not given predictions are made over the data used in the fit. -#' -#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. -#' -#' For the uncertainty calculations, the number of the realisations and the size of the confidence interval to be calculated -#' are given by the arguments \emph{N} and \emph{CI} respectively. -#' +#' +#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. +#' +#' For the uncertainty calculations, the number of the realisations and the size of the confidence interval to be calculated +#' are given by the arguments \emph{N} and \emph{CI} respectively. +#' #' @param object disag_model object returned by disag_model function. -#' @param newdata If NULL, predictions are made using the data in model_output. -#' If this is a raster stack or brick, predictions will be made over this data. +#' @param newdata If NULL, predictions are made using the data in model_output. +#' If this is a raster stack or brick, predictions will be made over this data. #' @param predict_iid logical. If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE. #' @param N Number of realisations. Default: 100. #' @param CI Confidence interval to be calculated from the realisations. Default: 0.95. #' @param ... Further arguments passed to or from other methods. #' -#' @return An object of class \emph{disag_prediction} which consists of a list of two objects: +#' @return An object of class \emph{disag_prediction} which consists of a list of two objects: #' \item{mean_prediction }{List of: #' \itemize{ #' \item \emph{prediction} Raster of mean predictions based. #' \item \emph{field} Raster of the field component of the linear predictor. #' \item \emph{iid} Raster of the iid component of the linear predictor. #' \item \emph{covariates} Raster of the covariate component of the linear predictor. -#' }} +#' }} #' \item{uncertainty_prediction: }{List of: #' \itemize{ #' \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. #' \item \emph{predictions_ci} RasterStack of the upper and lower credible intervals. Defined by argument \emph{CI}. -#' }} +#' }} #' #' #' @method predict disag_model #' -#' @examples +#' @examples #' \dontrun{ #' predict(fit_result) #' } -#' +#' #' @export predict.disag_model <- function(object, newdata = NULL, predict_iid = FALSE, N = 100, CI = 0.95, ...) { - + mean_prediction <- predict_model(object, newdata = newdata, predict_iid) - + uncertainty_prediction <- predict_uncertainty(object, newdata = newdata, predict_iid, N, CI) - + prediction <- list(mean_prediction = mean_prediction, uncertainty_prediction = uncertainty_prediction) - + class(prediction) <- c('disag_prediction', 'list') - + return(prediction) } #' Function to predict mean from the model result -#' -#' \emph{predict_model} function takes a \emph{disag_model} object created by -#' \emph{disaggregation::disag_model} and predicts mean maps. -#' +#' +#' \emph{predict_model} function takes a \emph{disag_model} object created by +#' \emph{disaggregation::disag_model} and predicts mean maps. +#' #' Function returns rasters of the mean predictions as well as the covariate and field contributions #' to the linear predictor. -#' -#' To predict over a different spatial extent to that used in the model, -#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +#' +#' To predict over a different spatial extent to that used in the model, +#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. #' If this is not given predictions are made over the data used in the fit. -#' -#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. -#' +#' +#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. +#' #' @param model_output disag_model object returned by disag_model function -#' @param newdata If NULL, predictions are made using the data in model_output. +#' @param newdata If NULL, predictions are made using the data in model_output. #' If this is a raster stack or brick, predictions will be made over this data. Default NULL. #' @param predict_iid If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE. #' @@ -85,84 +86,84 @@ predict.disag_model <- function(object, newdata = NULL, predict_iid = FALSE, N = #' \item \emph{iid} Raster of the iid component of the linear predictor. #' \item \emph{covariates} Raster of the covariate component of the linear predictor. #' } -#' +#' #' @name predict_model #' -#' @examples +#' @examples #' \dontrun{ #' predict_model(result) #' } -#' +#' #' @export predict_model <- function(model_output, newdata = NULL, predict_iid = FALSE) { - + objects_for_prediction <- setup_objects(model_output, newdata = newdata, predict_iid) - + pars <- model_output$obj$env$last.par.best pars <- split(pars, names(pars)) - - prediction <- predict_single_raster(pars, + + prediction <- predict_single_raster(pars, objects_for_prediction, - link_function = model_output$model_setup$link) - + link_function = model_output$model_setup$link) + return(prediction) - + } #' Function to predict uncertainty from the model result -#' -#' \emph{predict_uncertainty} function takes a \emph{disag_model} object created by -#' \emph{disaggregation::disag_model} and predicts upper and lower credible interval maps. -#' +#' +#' \emph{predict_uncertainty} function takes a \emph{disag_model} object created by +#' \emph{disaggregation::disag_model} and predicts upper and lower credible interval maps. +#' #' Function returns a RasterStack of the realisations as well as the upper and lower credible interval rasters. -#' -#' To predict over a different spatial extent to that used in the model, -#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +#' +#' To predict over a different spatial extent to that used in the model, +#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. #' If this is not given predictions are made over the data used in the fit. -#' -#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. -#' +#' +#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. +#' #' The number of the realisations and the size of the confidence interval to be calculated. -#' are given by the arguments \emph{N} and \emph{CI} respectively. -#' +#' are given by the arguments \emph{N} and \emph{CI} respectively. +#' #' @param model_output disag_model object returned by disag_model function. -#' @param newdata If NULL, predictions are made using the data in model_output. +#' @param newdata If NULL, predictions are made using the data in model_output. #' If this is a raster stack or brick, predictions will be made over this data. Default NULL. #' @param predict_iid If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE. #' @param N number of realisations. Default: 100. #' @param CI confidence interval. Default: 0.95. -#' +#' #' @return The uncertainty prediction, which is a list of: #' \itemize{ #' \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. #' \item \emph{predictions_ci} RasterStack of the upper and lower credible intervals. Defined by argument \emph{CI}. #' } -#' +#' #' @name predict_uncertainty #' -#' @examples +#' @examples #' \dontrun{ #' predict_uncertainty(result) #' } -#' +#' #' @export predict_uncertainty <- function(model_output, newdata = NULL, predict_iid = FALSE, N = 100, CI = 0.95) { - + objects_for_prediction <- setup_objects(model_output, newdata = newdata, predict_iid) - + parameters <- model_output$obj$env$last.par.best - + # If we have either of the random effects, we have the jointPrecision matrix. # but if we have neither, we don't get that matrix and should use the # covariance matrix instead - + #CH <- Matrix::Cholesky(as(S, 'dsCMatrix')) #x <- rmvn.sparse(10, mu, CH, prec=FALSE) ## 10 random draws of x #d <- dmvn.sparse(x, mu, CH, prec=FALSE) ## densities of the 10 draws - - + + if(model_output$model_setup$iid | model_output$model_setup$field){ ch <- Matrix::Cholesky(model_output$sd_out$jointPrecision) par_draws <- sparseMVN::rmvn.sparse(N, parameters, ch, prec = TRUE) @@ -171,47 +172,47 @@ predict_uncertainty <- function(model_output, newdata = NULL, predict_iid = FALS ch <- Matrix::Cholesky(covariance_matrix) par_draws <- sparseMVN::rmvn.sparse(N, parameters, ch, prec = FALSE) } - + predictions <- list() - + for(r in seq_len(N)) { - + p <- split(par_draws[r, ], names(parameters)) - - prediction_result <- predict_single_raster(p, + + prediction_result <- predict_single_raster(p, objects_for_prediction, - link_function = model_output$model_setup$link) - + link_function = model_output$model_setup$link) + predictions[[r]] <- prediction_result$prediction } predictions <- raster::stack(predictions) - + probs <- c((1 - CI) / 2, 1 - (1 - CI) / 2) predictions_ci <- raster::calc(predictions, function(x) stats::quantile(x, probs = probs, na.rm = TRUE)) names(predictions_ci) <- c('lower CI', 'upper CI') - + uncertainty <- list(realisations = predictions, predictions_ci = predictions_ci) - + return(uncertainty) } # Get coordinates from raster # -# @param data disag_data object -# +# @param data disag_data object +# # @return A matrix of the coordinates of the raster -# +# # @name getCoords getCoords <- function(data) { - + points_raster <- data$covariate_rasters[[1]] points_raster[is.na(points_raster)] <- -9999 raster_pts <- raster::rasterToPoints(points_raster, spatial = TRUE) coords <- raster_pts@coords - + return(coords) } @@ -219,18 +220,18 @@ getCoords <- function(data) { # # @param mesh mesh used in the model fitting # @param coords coordinates extracted from raster -# +# # @return An Amatrix object for the field -# +# # @name getAmatrix getAmatrix <- function(mesh, coords) { - - spde <- (INLA::inla.spde2.matern(mesh, alpha = 2)$param.inla)[c("M0", "M1", "M2")] - n_s <- nrow(spde$M0) - + + spde <- (INLA::inla.spde2.matern(mesh, alpha = 2)$param.inla)[c("M0", "M1", "M2")] + n_s <- nrow(spde$M0) + Amatrix <- INLA::inla.mesh.project(mesh, loc = as.matrix(coords))$A - + return(Amatrix) } @@ -242,7 +243,7 @@ check_newdata <- function(newdata, model_output){ if(!is.null(newdata)){ if(!(inherits(newdata, c('RasterStack', 'RasterBrick', 'RasterLayer')))){ stop('newdata should be NULL or a RasterStack or a RasterBrick') - } + } if(!all(names(model_output$data$covariate_rasters) %in% names(newdata))){ stop('All covariates used to fit the model must be in newdata') } @@ -254,26 +255,26 @@ check_newdata <- function(newdata, model_output){ # Function to setup covariates, field and iid objects for prediction setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { - + newdata <- check_newdata(newdata, model_output) - + # Pull out original data data <- model_output$data - + # Decide which covariates to predict over if(is.null(newdata)){ covariates <- data$covariate_rasters } else { covariates <- newdata } - + data$covariate_rasters <- covariates - + # If there is no iid effect in the model, it cannot be predicted if(!model_output$model_setup$iid) { predict_iid <- FALSE } - + if(model_output$model_setup$field) { if(is.null(newdata)) { coords <- data$coordsForPrediction @@ -285,19 +286,19 @@ setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { } else { field_objects <- NULL } - + if(predict_iid) { tmp_shp <- model_output$data$polygon_shapefile tmp_shp@data <- data.frame(area_id = factor(model_output$data$polygon_data$area_id)) - shapefile_raster <- raster::rasterize(tmp_shp, - model_output$data$covariate_rasters, + shapefile_raster <- raster::rasterize(tmp_shp, + model_output$data$covariate_rasters, field = 'area_id') shapefile_ids <- raster::unique(shapefile_raster) iid_objects <- list(shapefile_raster = shapefile_raster, shapefile_ids = shapefile_ids) } else { iid_objects <- NULL } - + return(list(covariates = covariates, field_objects = field_objects, iid_objects = iid_objects)) @@ -305,24 +306,24 @@ setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { # Function to take model parameters and predict a single raster predict_single_raster <- function(model_parameters, objects, link_function) { - + # Create linear predictor covs_by_betas <- list() for(i in seq_len(raster::nlayers(objects$covariates))){ covs_by_betas[[i]] <- model_parameters$slope[i] * objects$covariates[[i]] } - + cov_by_betas <- raster::stack(covs_by_betas) if(raster::nlayers(cov_by_betas) > 1){ sum_cov_by_betas <- sum(cov_by_betas) - } else { + } else { # With only 1 covariate, there's nothing to sum. Do this to avoid warnings. sum_cov_by_betas <- cov_by_betas } cov_contribution <- sum_cov_by_betas + model_parameters$intercept - - linear_pred <- cov_contribution - + + linear_pred <- cov_contribution + if(!is.null(objects$field_objects)){ # Extract field values field <- (objects$field_objects$Amatrix %*% model_parameters$nodemean)[, 1] @@ -331,12 +332,12 @@ predict_single_raster <- function(model_parameters, objects, link_function) { } else { field_ras <- NULL } - + if(!is.null(objects$iid_objects)) { iid_ras <- objects$iid_objects$shapefile_raster iideffect_sd <- 1/sqrt(exp(model_parameters$iideffect_log_tau)) for(i in seq_along(model_parameters$iideffect)) { - iid_ras@data@values[which(objects$iid_objects$shapefile_raster@data@values == objects$iid_objects$shapefile_ids[i])] <- + iid_ras@data@values[which(objects$iid_objects$shapefile_raster@data@values == objects$iid_objects$shapefile_ids[i])] <- model_parameters$iideffect[i] na_pixels <- which(is.na(iid_ras@data@values)) na_iid_values <- stats::rnorm(length(na_pixels), 0, iideffect_sd) @@ -355,7 +356,7 @@ predict_single_raster <- function(model_parameters, objects, link_function) { } else { iid_ras <- NULL } - + if(link_function == 'logit') { prediction_ras <- 1 / (1 + exp(-1 * linear_pred)) } else if(link_function == 'log') { @@ -363,11 +364,11 @@ predict_single_raster <- function(model_parameters, objects, link_function) { } else if(link_function == 'identity') { prediction_ras <- linear_pred } - - predictions <- list(prediction = prediction_ras, + + predictions <- list(prediction = prediction_ras, field = field_ras, iid = iid_ras, covariates = cov_contribution) - + return(predictions) } diff --git a/R/prepare_data.R b/R/prepare_data.R index c04e46d..0d6831e 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -128,7 +128,9 @@ prepare_data <- function(x, covariate_rasters <- c(covariate_rasters, aggregation_raster) covariate_data <- parallelExtract(covariate_rasters, x, fun = NULL, id = id_var) - covariate_rasters <- raster::dropLayer(covariate_rasters, raster::nlayers(covariate_rasters)) + # Remove the aggregation raster + covariate_rasters <- covariate_rasters[[seq(nlyr(covariate_rasters) - 1)]] + names(covariate_rasters) <- cov_names aggregation_pixels <- as.numeric(covariate_data[ , ncol(covariate_data)]) diff --git a/README.md b/README.md index 2497a62..cc08ed4 100644 --- a/README.md +++ b/README.md @@ -109,4 +109,3 @@ Summary functions for input data and model results summary(data_for_model) summary(model_result) ``` - diff --git a/cran-comments.md b/cran-comments.md index 6d8f73b..1cdb171 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,71 +1,71 @@ -## Update -This is a package update (version 0.2.0). The only real change in this version -is updating references to our Journal of Statistical Science paper that is in -press. - - - - -## Test environments -Windows, R release -Ubuntu 20, R release -Ubuntu 20, r Oldrel -Ubuntu 20, R devel - - -## R CMD check results -There were no ERRORs or WARNINGs. - -There were 3 NOTES. - - -* checking CRAN incoming feasibility ... [14s] NOTE -Maintainer: 'Tim Lucas ' - -Possibly misspelled words in DESCRIPTION: - Nandi (15:28) - -Suggests or Enhances not in mainstream repositories: - INLA -Availability using Additional_repositories specification: - INLA yes https://inla.r-inla-download.org/R/stable - -Found the following (possibly) invalid DOIs: - DOI: 10.18637/jss.v106.i11 - From: DESCRIPTION - inst/CITATION - Status: 404 - Message: Not Found - - -Examples with CPU (user + system) or elapsed time > 10s - user system elapsed -getPolygonData 9.89 0.17 10.08 - - - -Response: Anita Nandi's name is spelled correctly. The INLA availability -issue is the same as previous submissions. The doi is for our new Journal -of the Statistical Society paper and has been reserved but not registered yet. - - - -* checking package dependencies ... NOTE -Package suggested but not available for checking: 'INLA' - -Response: Same as above. - - -* checking examples ... [16s] NOTE -Examples with CPU (user + system) or elapsed time > 10s - user system elapsed -getPolygonData 9.89 0.17 10.08 - - - -Response: As this is only just over the 10 second limit we hope it is ok. We -have done our best to make the examples small throughout. - - -## Downstream dependencies -There are currently no downstream dependencies for this package +## Update +This is a package update (version 0.2.0). The only real change in this version +is updating references to our Journal of Statistical Science paper that is in +press. + + + + +## Test environments +Windows, R release +Ubuntu 20, R release +Ubuntu 20, r Oldrel +Ubuntu 20, R devel + + +## R CMD check results +There were no ERRORs or WARNINGs. + +There were 3 NOTES. + + +* checking CRAN incoming feasibility ... [14s] NOTE +Maintainer: 'Tim Lucas ' + +Possibly misspelled words in DESCRIPTION: + Nandi (15:28) + +Suggests or Enhances not in mainstream repositories: + INLA +Availability using Additional_repositories specification: + INLA yes https://inla.r-inla-download.org/R/stable + +Found the following (possibly) invalid DOIs: + DOI: 10.18637/jss.v106.i11 + From: DESCRIPTION + inst/CITATION + Status: 404 + Message: Not Found + + +Examples with CPU (user + system) or elapsed time > 10s + user system elapsed +getPolygonData 9.89 0.17 10.08 + + + +Response: Anita Nandi's name is spelled correctly. The INLA availability +issue is the same as previous submissions. The doi is for our new Journal +of the Statistical Society paper and has been reserved but not registered yet. + + + +* checking package dependencies ... NOTE +Package suggested but not available for checking: 'INLA' + +Response: Same as above. + + +* checking examples ... [16s] NOTE +Examples with CPU (user + system) or elapsed time > 10s + user system elapsed +getPolygonData 9.89 0.17 10.08 + + + +Response: As this is only just over the 10 second limit we hope it is ok. We +have done our best to make the examples small throughout. + + +## Downstream dependencies +There are currently no downstream dependencies for this package diff --git a/man/build_mesh.Rd b/man/build_mesh.Rd index f30a92f..5e17944 100644 --- a/man/build_mesh.Rd +++ b/man/build_mesh.Rd @@ -1,3 +1,4 @@ +<<<<<<< HEAD % Generated by roxygen2: do not edit by hand % Please edit documentation in R/build_mesh.R \name{build_mesh} @@ -20,8 +21,8 @@ An inla.mesh object \emph{build_mesh} function takes a SpatialPolygons object and mesh arguments to build an appropriate mesh for the spatial field. } \details{ -The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary -and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest +The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary +and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. Six mesh parameters can be specified as arguments: \emph{convex}, \emph{concave} and \emph{resolution}, @@ -48,6 +49,5 @@ pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, my_mesh <- build_mesh(spdf) } - - } + diff --git a/man/fit_model.Rd b/man/fit_model.Rd index 1b4500c..c1dec35 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -1,148 +1,148 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fit_model.R -\name{fit_model} -\alias{fit_model} -\alias{disag_model} -\title{Fit the disaggregation model} -\usage{ -fit_model( - data, - priors = NULL, - family = "gaussian", - link = "identity", - iterations = 100, - field = TRUE, - iid = TRUE, - hess_control_parscale = NULL, - hess_control_ndeps = 1e-04, - silent = TRUE -) - -disag_model( - data, - priors = NULL, - family = "gaussian", - link = "identity", - iterations = 100, - field = TRUE, - iid = TRUE, - hess_control_parscale = NULL, - hess_control_ndeps = 1e-04, - silent = TRUE -) -} -\arguments{ -\item{data}{disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting} - -\item{priors}{list of prior values} - -\item{family}{likelihood function: \emph{gaussian}, \emph{binomial} or \emph{poisson}} - -\item{link}{link function: \emph{logit}, \emph{log} or \emph{identity}} - -\item{iterations}{number of iterations to run the optimisation for} - -\item{field}{logical. Flag the spatial field on or off} - -\item{iid}{logical. Flag the iid effect on or off} - -\item{hess_control_parscale}{Argument to scale parameters during the calculation of the Hessian. -Must be the same length as the number of parameters. See \code{\link[stats]{optimHess}} for details.} - -\item{hess_control_ndeps}{Argument to control step sizes during the calculation of the Hessian. -Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. -Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. -See \code{\link[stats]{optimHess}} for details.} - -\item{silent}{logical. Suppress verbose output.} -} -\value{ -A list is returned of class \code{disag_model}. -The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. -The list of class \code{disag_model} contains: - \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} - \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} - \item{sd_out }{The TMB object returned by \code{\link[TMB]{sdreport}}.} - \item{data }{The \emph{disag_data} object used as an input to the model.} - \item{model_setup }{A list of information on the model setup. Likelihood function (\emph{family}), link function(\emph{link}), logical: whether a field was used (\emph{field}) and logical: whether an iid effect was used (\emph{iid}).} -} -\description{ -\emph{fit_model} function takes a \emph{disag_data} object created by -\code{\link{prepare_data}} and performs a Bayesian disaggregation fit. -} -\details{ -\strong{The model definition} - -The disaggregation model makes predictions at the pixel level: -\deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} - -And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): -\deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} -\deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} - -The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): -\itemize{ - \item Gaussian: - If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where - \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} - \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. - \item Binomial: - For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. - \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. - \item Poisson: - \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. -} - -Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field -are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field -where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect - -The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. -The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. -These are specified as strings. - -The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. - -The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. - -The \emph{silent} argument can be used to publish/suppress verbose output. Default TRUE. -} -\examples{ -\dontrun{ - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } - - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - - r <- raster::raster(ncol=20, nrow=20) - r <- raster::setExtent(r, raster::extent(spdf)) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2 <- raster::setExtent(r2, raster::extent(spdf)) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_rasters <- raster::stack(r, r2) - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_rasters) - parallel::stopCluster(cl) - foreach::registerDoSEQ() - - result <- fit_model(test_data, iterations = 2) - } - -} -\references{ -Nanda et al. (2023) disaggregation: An R Package for Bayesian -Spatial Disaggregation Modeling. -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fit_model.R +\name{fit_model} +\alias{fit_model} +\alias{disag_model} +\title{Fit the disaggregation model} +\usage{ +fit_model( + data, + priors = NULL, + family = "gaussian", + link = "identity", + iterations = 100, + field = TRUE, + iid = TRUE, + hess_control_parscale = NULL, + hess_control_ndeps = 1e-04, + silent = TRUE +) + +disag_model( + data, + priors = NULL, + family = "gaussian", + link = "identity", + iterations = 100, + field = TRUE, + iid = TRUE, + hess_control_parscale = NULL, + hess_control_ndeps = 1e-04, + silent = TRUE +) +} +\arguments{ +\item{data}{disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting} + +\item{priors}{list of prior values} + +\item{family}{likelihood function: \emph{gaussian}, \emph{binomial} or \emph{poisson}} + +\item{link}{link function: \emph{logit}, \emph{log} or \emph{identity}} + +\item{iterations}{number of iterations to run the optimisation for} + +\item{field}{logical. Flag the spatial field on or off} + +\item{iid}{logical. Flag the iid effect on or off} + +\item{hess_control_parscale}{Argument to scale parameters during the calculation of the Hessian. +Must be the same length as the number of parameters. See \code{\link[stats]{optimHess}} for details.} + +\item{hess_control_ndeps}{Argument to control step sizes during the calculation of the Hessian. +Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. +Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. +See \code{\link[stats]{optimHess}} for details.} + +\item{silent}{logical. Suppress verbose output.} +} +\value{ +A list is returned of class \code{disag_model}. +The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. +The list of class \code{disag_model} contains: + \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} + \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} + \item{sd_out }{The TMB object returned by \code{\link[TMB]{sdreport}}.} + \item{data }{The \emph{disag_data} object used as an input to the model.} + \item{model_setup }{A list of information on the model setup. Likelihood function (\emph{family}), link function(\emph{link}), logical: whether a field was used (\emph{field}) and logical: whether an iid effect was used (\emph{iid}).} +} +\description{ +\emph{fit_model} function takes a \emph{disag_data} object created by +\code{\link{prepare_data}} and performs a Bayesian disaggregation fit. +} +\details{ +\strong{The model definition} + +The disaggregation model makes predictions at the pixel level: +\deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} + +And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): +\deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} +\deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} + +The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): +\itemize{ + \item Gaussian: + If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where + \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} + \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. + \item Binomial: + For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. + \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. + \item Poisson: + \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. +} + +Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field +are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field +where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect + +The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. +These are specified as strings. + +The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. + +The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. + +The \emph{silent} argument can be used to publish/suppress verbose output. Default TRUE. +} +\examples{ +\dontrun{ + polygons <- list() + for(i in 1:100) { + row <- ceiling(i/10) + col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) + } + + polys <- do.call(raster::spPolygons, polygons) + response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) + spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) + + r <- raster::raster(ncol=20, nrow=20) + r <- raster::setExtent(r, raster::extent(spdf)) + r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) + r2 <- raster::raster(ncol=20, nrow=20) + r2 <- raster::setExtent(r2, raster::extent(spdf)) + r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) + cov_rasters <- raster::stack(r, r2) + + cl <- parallel::makeCluster(2) + doParallel::registerDoParallel(cl) + test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_rasters) + parallel::stopCluster(cl) + foreach::registerDoSEQ() + + result <- fit_model(test_data, iterations = 2) + } + +} +\references{ +Nanda et al. (2023) disaggregation: An R Package for Bayesian +Spatial Disaggregation Modeling. +} diff --git a/man/getStartendindex.Rd b/man/getStartendindex.Rd index 4d35f90..6848ada 100644 --- a/man/getStartendindex.Rd +++ b/man/getStartendindex.Rd @@ -1,42 +1,42 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/matching.R -\name{getStartendindex} -\alias{getStartendindex} -\title{Function to match pixels to their corresponding polygon} -\usage{ -getStartendindex(covariates, polygon_data, id_var = "area_id") -} -\arguments{ -\item{covariates}{data.frame with each covariate as a column an and id column.} - -\item{polygon_data}{data.frame with polygon id and response data.} - -\item{id_var}{string with the name of the column in the covariate data.frame containing the polygon id.} -} -\value{ -A matrix with two columns and one row for each polygon. The first column is the index of the first row in -covariate data that corresponds to that polygon, the second column is the index of the last row in -covariate data that corresponds to that polygon. -} -\description{ -From the covariate data and polygon data, the function matches the polygon id between the two to find -which pixels from the covariate data are contained in each of the polygons. -} -\details{ -Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, -and another data.frame containing polygon data with a polygon id, response and sample size column (as returned -by \code{getPolygonData} function). - -Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in -covariate data that corresponds to that polygon, the second column is the index of the last row in -covariate data that corresponds to that polygon. -} -\examples{ -{ - covs <- data.frame(area_id = c(1, 1, 1, 2, 2, 3, 3, 3, 3), response = c(3, 9, 5, 2, 3, 6, 7, 3, 5)) - response <- data.frame(area_id = c(1, 2, 3), response = c(4, 7, 2), N = c(NA, NA, NA)) - getStartendindex(covs, response, 'area_id') -} - - -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/matching.R +\name{getStartendindex} +\alias{getStartendindex} +\title{Function to match pixels to their corresponding polygon} +\usage{ +getStartendindex(covariates, polygon_data, id_var = "area_id") +} +\arguments{ +\item{covariates}{data.frame with each covariate as a column an and id column.} + +\item{polygon_data}{data.frame with polygon id and response data.} + +\item{id_var}{string with the name of the column in the covariate data.frame containing the polygon id.} +} +\value{ +A matrix with two columns and one row for each polygon. The first column is the index of the first row in +covariate data that corresponds to that polygon, the second column is the index of the last row in +covariate data that corresponds to that polygon. +} +\description{ +From the covariate data and polygon data, the function matches the polygon id between the two to find +which pixels from the covariate data are contained in each of the polygons. +} +\details{ +Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, +and another data.frame containing polygon data with a polygon id, response and sample size column (as returned +by \code{getPolygonData} function). + +Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in +covariate data that corresponds to that polygon, the second column is the index of the last row in +covariate data that corresponds to that polygon. +} +\examples{ +{ + covs <- data.frame(area_id = c(1, 1, 1, 2, 2, 3, 3, 3, 3), response = c(3, 9, 5, 2, 3, 6, 7, 3, 5)) + response <- data.frame(area_id = c(1, 2, 3), response = c(4, 7, 2), N = c(NA, NA, NA)) + getStartendindex(covs, response, 'area_id') +} + + +} diff --git a/man/make_model_object.Rd b/man/make_model_object.Rd index 8348828..b040041 100644 --- a/man/make_model_object.Rd +++ b/man/make_model_object.Rd @@ -1,124 +1,124 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fit_model.R -\name{make_model_object} -\alias{make_model_object} -\title{Create the TMB model object for the disaggregation model} -\usage{ -make_model_object( - data, - priors = NULL, - family = "gaussian", - link = "identity", - field = TRUE, - iid = TRUE, - silent = TRUE -) -} -\arguments{ -\item{data}{disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting} - -\item{priors}{list of prior values} - -\item{family}{likelihood function: \emph{gaussian}, \emph{binomial} or \emph{poisson}} - -\item{link}{link function: \emph{logit}, \emph{log} or \emph{identity}} - -\item{field}{logical. Flag the spatial field on or off} - -\item{iid}{logical. Flag the iid effect on or off} - -\item{silent}{logical. Suppress verbose output.} -} -\value{ -The TMB model object returned by \code{\link[TMB]{MakeADFun}}. -} -\description{ -\emph{make_model_object} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} -and creates a TMB model object to be used in fitting. -} -\details{ -\strong{The model definition} - -The disaggregation model make predictions at the pixel level: -\deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} - -And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): -\deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} -\deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} - -The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): -\itemize{ - \item Gaussian: - If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where - \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} - \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. - \item Binomial: - For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. - \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. - \item Poisson: - \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. -} - -Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field -are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field -where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect. - -The precise names and default values for these priors are: -\itemize{ -\item priormean_intercept: 0 -\item priorsd_intercept: 10.0 -\item priormean_slope: 0.0 -\item priorsd_slope: 0.5 -\item prior_rho_min: A third the length of the diagonal of the bounding box. -\item prior_rho_prob: 0.1 -\item prior_sigma_max: sd(response/mean(response)) -\item prior_sigma_prob: 0.1 -\item prior_iideffect_sd_max: 0.1 -\item prior_iideffect_sd_prob: 0.01 -} - -The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. -The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. -These are specified as strings. - -The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. - -The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. - -The \emph{silent} argument can be used to publish/supress verbose output. Default TRUE. -} -\examples{ -\dontrun{ - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } - - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - - r <- raster::raster(ncol=20, nrow=20) - r <- raster::setExtent(r, raster::extent(spdf)) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2 <- raster::setExtent(r2, raster::extent(spdf)) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_rasters <- raster::stack(r, r2) - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_rasters) - parallel::stopCluster(cl) - foreach::registerDoSEQ() - - result <- make_model_object(test_data) - } - -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fit_model.R +\name{make_model_object} +\alias{make_model_object} +\title{Create the TMB model object for the disaggregation model} +\usage{ +make_model_object( + data, + priors = NULL, + family = "gaussian", + link = "identity", + field = TRUE, + iid = TRUE, + silent = TRUE +) +} +\arguments{ +\item{data}{disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting} + +\item{priors}{list of prior values} + +\item{family}{likelihood function: \emph{gaussian}, \emph{binomial} or \emph{poisson}} + +\item{link}{link function: \emph{logit}, \emph{log} or \emph{identity}} + +\item{field}{logical. Flag the spatial field on or off} + +\item{iid}{logical. Flag the iid effect on or off} + +\item{silent}{logical. Suppress verbose output.} +} +\value{ +The TMB model object returned by \code{\link[TMB]{MakeADFun}}. +} +\description{ +\emph{make_model_object} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} +and creates a TMB model object to be used in fitting. +} +\details{ +\strong{The model definition} + +The disaggregation model make predictions at the pixel level: +\deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} + +And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): +\deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} +\deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} + +The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): +\itemize{ + \item Gaussian: + If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where + \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} + \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. + \item Binomial: + For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. + \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. + \item Poisson: + \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. +} + +Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field +are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field +where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect. + +The precise names and default values for these priors are: +\itemize{ +\item priormean_intercept: 0 +\item priorsd_intercept: 10.0 +\item priormean_slope: 0.0 +\item priorsd_slope: 0.5 +\item prior_rho_min: A third the length of the diagonal of the bounding box. +\item prior_rho_prob: 0.1 +\item prior_sigma_max: sd(response/mean(response)) +\item prior_sigma_prob: 0.1 +\item prior_iideffect_sd_max: 0.1 +\item prior_iideffect_sd_prob: 0.01 +} + +The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. +These are specified as strings. + +The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. + +The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. + +The \emph{silent} argument can be used to publish/supress verbose output. Default TRUE. +} +\examples{ +\dontrun{ + polygons <- list() + for(i in 1:100) { + row <- ceiling(i/10) + col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) + } + + polys <- do.call(raster::spPolygons, polygons) + response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) + spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) + + r <- raster::raster(ncol=20, nrow=20) + r <- raster::setExtent(r, raster::extent(spdf)) + r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) + r2 <- raster::raster(ncol=20, nrow=20) + r2 <- raster::setExtent(r2, raster::extent(spdf)) + r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) + cov_rasters <- raster::stack(r, r2) + + cl <- parallel::makeCluster(2) + doParallel::registerDoParallel(cl) + test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_rasters) + parallel::stopCluster(cl) + foreach::registerDoSEQ() + + result <- make_model_object(test_data) + } + +} diff --git a/man/plot.disag_data.Rd b/man/plot.disag_data.Rd index bc4f774..f541c61 100644 --- a/man/plot.disag_data.Rd +++ b/man/plot.disag_data.Rd @@ -1,24 +1,24 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting.R -\name{plot.disag_data} -\alias{plot.disag_data} -\title{Plot input data for disaggregation} -\usage{ -\method{plot}{disag_data}(x, which = c(1, 2, 3), ...) -} -\arguments{ -\item{x}{Object of class \emph{disag_data} to be plotted.} - -\item{which}{If a subset of plots is required, specify a subset of the numbers 1:3} - -\item{...}{Further arguments to \emph{plot} function.} -} -\value{ -A list of three plots: the polygon plot (ggplot), covariate plot (spplot) and INLA mesh plot (ggplot) -} -\description{ -Plotting function for class \emph{disag_data} (the input data for disaggregation). -} -\details{ -Produces three plots: polygon response data, covariate rasters and INLA mesh. -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.R +\name{plot.disag_data} +\alias{plot.disag_data} +\title{Plot input data for disaggregation} +\usage{ +\method{plot}{disag_data}(x, which = c(1, 2, 3), ...) +} +\arguments{ +\item{x}{Object of class \emph{disag_data} to be plotted.} + +\item{which}{If a subset of plots is required, specify a subset of the numbers 1:3} + +\item{...}{Further arguments to \emph{plot} function.} +} +\value{ +A list of three plots: the polygon plot (ggplot), covariate plot (spplot) and INLA mesh plot (ggplot) +} +\description{ +Plotting function for class \emph{disag_data} (the input data for disaggregation). +} +\details{ +Produces three plots: polygon response data, covariate rasters and INLA mesh. +} diff --git a/man/plot.disag_model.Rd b/man/plot.disag_model.Rd index 6fbcab3..421e14c 100644 --- a/man/plot.disag_model.Rd +++ b/man/plot.disag_model.Rd @@ -1,22 +1,22 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting.R -\name{plot.disag_model} -\alias{plot.disag_model} -\title{Plot results of fitted model} -\usage{ -\method{plot}{disag_model}(x, ...) -} -\arguments{ -\item{x}{Object of class \emph{disag_model} to be plotted.} - -\item{...}{Further arguments to \emph{plot} function.} -} -\value{ -A list of two ggplot plots: results of the fixed effects and an in-sample observed vs predicted plot -} -\description{ -Plotting function for class \emph{disag_model} (the result of the disaggregation fitting). -} -\details{ -Produces two plots: results of the fixed effects and in-sample observed vs predicted plot. -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.R +\name{plot.disag_model} +\alias{plot.disag_model} +\title{Plot results of fitted model} +\usage{ +\method{plot}{disag_model}(x, ...) +} +\arguments{ +\item{x}{Object of class \emph{disag_model} to be plotted.} + +\item{...}{Further arguments to \emph{plot} function.} +} +\value{ +A list of two ggplot plots: results of the fixed effects and an in-sample observed vs predicted plot +} +\description{ +Plotting function for class \emph{disag_model} (the result of the disaggregation fitting). +} +\details{ +Produces two plots: results of the fixed effects and in-sample observed vs predicted plot. +} diff --git a/man/plot.disag_prediction.Rd b/man/plot.disag_prediction.Rd index 5942bf3..213027c 100644 --- a/man/plot.disag_prediction.Rd +++ b/man/plot.disag_prediction.Rd @@ -1,22 +1,22 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting.R -\name{plot.disag_prediction} -\alias{plot.disag_prediction} -\title{Plot mean and uncertainty predictions from the disaggregation model results} -\usage{ -\method{plot}{disag_prediction}(x, ...) -} -\arguments{ -\item{x}{Object of class \emph{disag_prediction} to be plotted.} - -\item{...}{Further arguments to \emph{plot} function.} -} -\value{ -A list of plots of rasters from the prediction: mean prediction, lower CI and upper CI. -} -\description{ -Plotting function for class \emph{disag_prediction} (the mean and uncertainty predictions of the disaggregation fitting). -} -\details{ -Produces raster plots of the mean prediction, and the lower and upper confidence intervals. -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.R +\name{plot.disag_prediction} +\alias{plot.disag_prediction} +\title{Plot mean and uncertainty predictions from the disaggregation model results} +\usage{ +\method{plot}{disag_prediction}(x, ...) +} +\arguments{ +\item{x}{Object of class \emph{disag_prediction} to be plotted.} + +\item{...}{Further arguments to \emph{plot} function.} +} +\value{ +A list of plots of rasters from the prediction: mean prediction, lower CI and upper CI. +} +\description{ +Plotting function for class \emph{disag_prediction} (the mean and uncertainty predictions of the disaggregation fitting). +} +\details{ +Produces raster plots of the mean prediction, and the lower and upper confidence intervals. +} diff --git a/src/disaggregation.cpp b/src/disaggregation.cpp index 210e595..d476bbd 100644 --- a/src/disaggregation.cpp +++ b/src/disaggregation.cpp @@ -1,262 +1,262 @@ -// -// Author: Anita Nandi -// Date: 2019-02-14 - -// Data: Spatial field mesh and matrices, polygon data, covariate pixel data - - -#define TMB_LIB_INIT R_init_disaggregation -#include - -template -Type objective_function::operator()() -{ - - using namespace R_inla; - using namespace density; - using namespace Eigen; - - // ------------------------------------------------------------------------ // - // Spatial field data - // ------------------------------------------------------------------------ // - - // The A matrices are for projecting the mesh to a point for the pixel and point data respectively. - DATA_SPARSE_MATRIX(Apixel); - DATA_STRUCT(spde, spde_t); - - // ------------------------------------------------------------------------ // - // Polygon level data - // ------------------------------------------------------------------------ // - - // Covariate pixel data - DATA_MATRIX(x); - - // two col matrix with start end indices for each shape case. - DATA_IARRAY(startendindex); - - // Shape data. Cases and region id. - DATA_VECTOR(polygon_response_data); - DATA_VECTOR(response_sample_size); - - // Use to aggreagte pixel response values to polygon level - DATA_VECTOR(aggregation_values); - - // ------------------------------------------------------------------------ // - // Likelihood and link functions - // ------------------------------------------------------------------------ // - - DATA_INTEGER(family); - DATA_INTEGER(link); - - // ------------------------------------------------------------------------ // - // Parameters - // ------------------------------------------------------------------------ // - - PARAMETER(intercept); - PARAMETER_VECTOR(slope); - - DATA_SCALAR(priormean_intercept); - DATA_SCALAR(priorsd_intercept); - DATA_SCALAR(priormean_slope); - DATA_SCALAR(priorsd_slope); - - // Priors for likelihood - PARAMETER(log_tau_gaussian); - Type tau_gaussian = exp(log_tau_gaussian); - Type gaussian_sd = 1 / sqrt(tau_gaussian); - - // INLA defines a loggamma prior on log tau. - // We evaluate a gamma prior on tau, but the parameters are - // therefore the same. - Type prior_gamma_shape = 1; - Type prior_gamma_rate = 5e-05; - - PARAMETER_VECTOR(iideffect); - PARAMETER(iideffect_log_tau); - Type iideffect_tau = exp(iideffect_log_tau); - Type iideffect_sd = 1 / sqrt(iideffect_tau); - - Type iideffect_mean = 0.0; - - // Priors on iid random effect for polygons - DATA_SCALAR(prior_iideffect_sd_max); - DATA_SCALAR(prior_iideffect_sd_prob); - - // spde hyperparameters - PARAMETER(log_sigma); - PARAMETER(log_rho); - Type sigma = exp(log_sigma); - Type rho = exp(log_rho); - - // Priors on spde hyperparameters - DATA_SCALAR(prior_rho_min); - DATA_SCALAR(prior_rho_prob); - DATA_SCALAR(prior_sigma_max); - DATA_SCALAR(prior_sigma_prob); - - // Convert hyperparameters to natural scale - DATA_SCALAR(nu); - Type kappa = sqrt(8.0) / rho; - - // Random effect parameters - PARAMETER_VECTOR(nodemean); - - // Model component flags - DATA_INTEGER(field); - DATA_INTEGER(iid); - - // Number of polygons - int n_polygons = polygon_response_data.size(); - // Number of pixels - int n_pixels = x.rows(); - - Type nll = 0.0; - - // ------------------------------------------------------------------------ // - // Likelihood from priors - // ------------------------------------------------------------------------ // - - nll -= dnorm(intercept, priormean_intercept, priorsd_intercept, true); - for (int s = 0; s < slope.size(); s++) { - nll -= dnorm(slope[s], priormean_slope, priorsd_slope, true); - } - - if(iid) { - // Likelihood of hyperparameter of polygon iid random effect. - // From https://projecteuclid.org/euclid.ss/1491465621 (Eqn 3.3) - Type lambda = -log(prior_iideffect_sd_prob) / prior_iideffect_sd_max; - Type log_pcdensity_iid = log(lambda / 2) - (3/2)*iideffect_log_tau - lambda * pow(iideffect_tau, -1/2); - // log(iideffect_sd) from the Jacobian - nll -= log_pcdensity_iid + iideffect_log_tau; - - // Likelihood of random effect for polygons - for(int p = 0; p < iideffect.size(); p++) { - nll -= dnorm(iideffect[p], iideffect_mean, iideffect_sd, true); - } - } - - // Likelihood from the gaussian prior. - // log(prec) ~ loggamma - // prec ~ gamma - if(family == 0) { - nll -= dgamma(tau_gaussian, prior_gamma_shape, prior_gamma_rate, true); - } - - if(field) { - // Likelihood of hyperparameters for field. - // From https://www.tandfonline.com/doi/full/10.1080/01621459.2017.1415907 (Theorem 2.6) - Type lambdatilde1 = -log(prior_rho_prob) * prior_rho_min; - Type lambdatilde2 = -log(prior_sigma_prob) / prior_sigma_max; - Type log_pcdensity = log(lambdatilde1) + log(lambdatilde2) - 2*log_rho - lambdatilde1 * pow(rho, -1) - lambdatilde2 * sigma; - // log_rho and log_sigma from the Jacobian - nll -= log_pcdensity + log_rho + log_sigma; - - // Build spde matrix - SparseMatrix Q = Q_spde(spde, kappa); - - // From Lindgren (2011) https://doi.org/10.1111/j.1467-9868.2011.00777.x, see equation for the marginal variance - Type scaling_factor = sqrt(exp(lgamma(nu)) / (exp(lgamma(nu + 1)) * 4 * M_PI * pow(kappa, 2*nu))); - - // Likelihood of the random field. - nll += SCALE(GMRF(Q), sigma / scaling_factor)(nodemean); - } - - Type nll_priors = nll; - - // ------------------------------------------------------------------------ // - // Likelihood from data - // ------------------------------------------------------------------------ // - - vector pixel_linear_pred(n_pixels); - pixel_linear_pred = intercept + x * slope; - - if(field) { - // Calculate field for pixel data - vector linear_pred_field(n_pixels); - linear_pred_field = Apixel * nodemean; - pixel_linear_pred += linear_pred_field.array(); - } - - // recalculate startendindices to be in the form start, n - startendindex.col(1) = startendindex.col(1) - startendindex.col(0) + 1; - - Type polygon_response; - Type normalised_polygon_response; - Type normalisation_total; - Type pred_polygoncases; - Type pred_polygonrate; - Type polygon_sd; - vector pixel_pred; - vector numerator_pixels; - vector normalisation_pixels; - vector reportnormalisation(n_polygons); - vector reportprediction_cases(n_polygons); - vector reportprediction_rate(n_polygons); - vector reportnll(n_polygons); - vector reportpolygonsd(n_polygons); - - // For each shape get pixel predictions within and aggregate to polygon level - for (int polygon = 0; polygon < n_polygons; polygon++) { - - // Get pixel level predictions (rate) - pixel_pred = pixel_linear_pred.segment(startendindex(polygon, 0), startendindex(polygon, 1)).array(); - if(iid) { - pixel_pred += iideffect[polygon]; - } - // Use correct link function - if(link == 0) { - pixel_pred = invlogit(pixel_pred); - } else if(link == 1) { - pixel_pred = exp(pixel_pred); - } else if(link == 2){ - // Don't need to do anything, i.e. pixel_pred = pixel_pred; - } else { - error("Link function not implemented."); - } - - // Aggregate to polygon prediction - numerator_pixels = pixel_pred * aggregation_values.segment(startendindex(polygon, 0), startendindex(polygon, 1)).array(); - normalisation_pixels = aggregation_values.segment(startendindex(polygon, 0), startendindex(polygon, 1)); - normalisation_total = sum(normalisation_pixels); - pred_polygoncases = sum(numerator_pixels); - pred_polygonrate = pred_polygoncases/normalisation_total; - - reportnormalisation[polygon] = normalisation_total; - reportprediction_cases[polygon] = pred_polygoncases; - reportprediction_rate[polygon] = pred_polygonrate; - - // Use correct likelihood function - if(family == 0) { - // Scale the pixel sd to polygon level - polygon_sd = gaussian_sd * sqrt((normalisation_pixels * normalisation_pixels).sum()) / normalisation_total; - reportpolygonsd[polygon] = polygon_sd; - // Calculate normal likelihood in rate space - polygon_response = polygon_response_data(polygon); - normalised_polygon_response = polygon_response/normalisation_total; - nll -= dnorm(normalised_polygon_response, pred_polygonrate, polygon_sd, true); - reportnll[polygon] = -dnorm(normalised_polygon_response, pred_polygonrate, polygon_sd, true); - } else if(family == 1) { - nll -= dbinom(polygon_response_data[polygon], response_sample_size[polygon], pred_polygonrate, true); - reportnll[polygon] = -dbinom(polygon_response_data[polygon], response_sample_size[polygon], pred_polygonrate, true); - } else if(family == 2) { - nll -= dpois(polygon_response_data[polygon], pred_polygoncases, true); - reportnll[polygon] = -dpois(polygon_response_data[polygon], pred_polygoncases, true); - } else { - error("Likelihood not implemented."); - } - - } - - REPORT(reportprediction_cases); - REPORT(reportprediction_rate); - REPORT(reportnormalisation); - REPORT(reportnll); - REPORT(polygon_response_data); - REPORT(nll_priors); - REPORT(nll); - if(family == 0) { - REPORT(reportpolygonsd); - } - - return nll; -} +// +// Author: Anita Nandi +// Date: 2019-02-14 + +// Data: Spatial field mesh and matrices, polygon data, covariate pixel data + + +#define TMB_LIB_INIT R_init_disaggregation +#include + +template +Type objective_function::operator()() +{ + + using namespace R_inla; + using namespace density; + using namespace Eigen; + + // ------------------------------------------------------------------------ // + // Spatial field data + // ------------------------------------------------------------------------ // + + // The A matrices are for projecting the mesh to a point for the pixel and point data respectively. + DATA_SPARSE_MATRIX(Apixel); + DATA_STRUCT(spde, spde_t); + + // ------------------------------------------------------------------------ // + // Polygon level data + // ------------------------------------------------------------------------ // + + // Covariate pixel data + DATA_MATRIX(x); + + // two col matrix with start end indices for each shape case. + DATA_IARRAY(startendindex); + + // Shape data. Cases and region id. + DATA_VECTOR(polygon_response_data); + DATA_VECTOR(response_sample_size); + + // Use to aggreagte pixel response values to polygon level + DATA_VECTOR(aggregation_values); + + // ------------------------------------------------------------------------ // + // Likelihood and link functions + // ------------------------------------------------------------------------ // + + DATA_INTEGER(family); + DATA_INTEGER(link); + + // ------------------------------------------------------------------------ // + // Parameters + // ------------------------------------------------------------------------ // + + PARAMETER(intercept); + PARAMETER_VECTOR(slope); + + DATA_SCALAR(priormean_intercept); + DATA_SCALAR(priorsd_intercept); + DATA_SCALAR(priormean_slope); + DATA_SCALAR(priorsd_slope); + + // Priors for likelihood + PARAMETER(log_tau_gaussian); + Type tau_gaussian = exp(log_tau_gaussian); + Type gaussian_sd = 1 / sqrt(tau_gaussian); + + // INLA defines a loggamma prior on log tau. + // We evaluate a gamma prior on tau, but the parameters are + // therefore the same. + Type prior_gamma_shape = 1; + Type prior_gamma_rate = 5e-05; + + PARAMETER_VECTOR(iideffect); + PARAMETER(iideffect_log_tau); + Type iideffect_tau = exp(iideffect_log_tau); + Type iideffect_sd = 1 / sqrt(iideffect_tau); + + Type iideffect_mean = 0.0; + + // Priors on iid random effect for polygons + DATA_SCALAR(prior_iideffect_sd_max); + DATA_SCALAR(prior_iideffect_sd_prob); + + // spde hyperparameters + PARAMETER(log_sigma); + PARAMETER(log_rho); + Type sigma = exp(log_sigma); + Type rho = exp(log_rho); + + // Priors on spde hyperparameters + DATA_SCALAR(prior_rho_min); + DATA_SCALAR(prior_rho_prob); + DATA_SCALAR(prior_sigma_max); + DATA_SCALAR(prior_sigma_prob); + + // Convert hyperparameters to natural scale + DATA_SCALAR(nu); + Type kappa = sqrt(8.0) / rho; + + // Random effect parameters + PARAMETER_VECTOR(nodemean); + + // Model component flags + DATA_INTEGER(field); + DATA_INTEGER(iid); + + // Number of polygons + int n_polygons = polygon_response_data.size(); + // Number of pixels + int n_pixels = x.rows(); + + Type nll = 0.0; + + // ------------------------------------------------------------------------ // + // Likelihood from priors + // ------------------------------------------------------------------------ // + + nll -= dnorm(intercept, priormean_intercept, priorsd_intercept, true); + for (int s = 0; s < slope.size(); s++) { + nll -= dnorm(slope[s], priormean_slope, priorsd_slope, true); + } + + if(iid) { + // Likelihood of hyperparameter of polygon iid random effect. + // From https://projecteuclid.org/euclid.ss/1491465621 (Eqn 3.3) + Type lambda = -log(prior_iideffect_sd_prob) / prior_iideffect_sd_max; + Type log_pcdensity_iid = log(lambda / 2) - (3/2)*iideffect_log_tau - lambda * pow(iideffect_tau, -1/2); + // log(iideffect_sd) from the Jacobian + nll -= log_pcdensity_iid + iideffect_log_tau; + + // Likelihood of random effect for polygons + for(int p = 0; p < iideffect.size(); p++) { + nll -= dnorm(iideffect[p], iideffect_mean, iideffect_sd, true); + } + } + + // Likelihood from the gaussian prior. + // log(prec) ~ loggamma + // prec ~ gamma + if(family == 0) { + nll -= dgamma(tau_gaussian, prior_gamma_shape, prior_gamma_rate, true); + } + + if(field) { + // Likelihood of hyperparameters for field. + // From https://www.tandfonline.com/doi/full/10.1080/01621459.2017.1415907 (Theorem 2.6) + Type lambdatilde1 = -log(prior_rho_prob) * prior_rho_min; + Type lambdatilde2 = -log(prior_sigma_prob) / prior_sigma_max; + Type log_pcdensity = log(lambdatilde1) + log(lambdatilde2) - 2*log_rho - lambdatilde1 * pow(rho, -1) - lambdatilde2 * sigma; + // log_rho and log_sigma from the Jacobian + nll -= log_pcdensity + log_rho + log_sigma; + + // Build spde matrix + SparseMatrix Q = Q_spde(spde, kappa); + + // From Lindgren (2011) https://doi.org/10.1111/j.1467-9868.2011.00777.x, see equation for the marginal variance + Type scaling_factor = sqrt(exp(lgamma(nu)) / (exp(lgamma(nu + 1)) * 4 * M_PI * pow(kappa, 2*nu))); + + // Likelihood of the random field. + nll += SCALE(GMRF(Q), sigma / scaling_factor)(nodemean); + } + + Type nll_priors = nll; + + // ------------------------------------------------------------------------ // + // Likelihood from data + // ------------------------------------------------------------------------ // + + vector pixel_linear_pred(n_pixels); + pixel_linear_pred = intercept + x * slope; + + if(field) { + // Calculate field for pixel data + vector linear_pred_field(n_pixels); + linear_pred_field = Apixel * nodemean; + pixel_linear_pred += linear_pred_field.array(); + } + + // recalculate startendindices to be in the form start, n + startendindex.col(1) = startendindex.col(1) - startendindex.col(0) + 1; + + Type polygon_response; + Type normalised_polygon_response; + Type normalisation_total; + Type pred_polygoncases; + Type pred_polygonrate; + Type polygon_sd; + vector pixel_pred; + vector numerator_pixels; + vector normalisation_pixels; + vector reportnormalisation(n_polygons); + vector reportprediction_cases(n_polygons); + vector reportprediction_rate(n_polygons); + vector reportnll(n_polygons); + vector reportpolygonsd(n_polygons); + + // For each shape get pixel predictions within and aggregate to polygon level + for (int polygon = 0; polygon < n_polygons; polygon++) { + + // Get pixel level predictions (rate) + pixel_pred = pixel_linear_pred.segment(startendindex(polygon, 0), startendindex(polygon, 1)).array(); + if(iid) { + pixel_pred += iideffect[polygon]; + } + // Use correct link function + if(link == 0) { + pixel_pred = invlogit(pixel_pred); + } else if(link == 1) { + pixel_pred = exp(pixel_pred); + } else if(link == 2){ + // Don't need to do anything, i.e. pixel_pred = pixel_pred; + } else { + error("Link function not implemented."); + } + + // Aggregate to polygon prediction + numerator_pixels = pixel_pred * aggregation_values.segment(startendindex(polygon, 0), startendindex(polygon, 1)).array(); + normalisation_pixels = aggregation_values.segment(startendindex(polygon, 0), startendindex(polygon, 1)); + normalisation_total = sum(normalisation_pixels); + pred_polygoncases = sum(numerator_pixels); + pred_polygonrate = pred_polygoncases/normalisation_total; + + reportnormalisation[polygon] = normalisation_total; + reportprediction_cases[polygon] = pred_polygoncases; + reportprediction_rate[polygon] = pred_polygonrate; + + // Use correct likelihood function + if(family == 0) { + // Scale the pixel sd to polygon level + polygon_sd = gaussian_sd * sqrt((normalisation_pixels * normalisation_pixels).sum()) / normalisation_total; + reportpolygonsd[polygon] = polygon_sd; + // Calculate normal likelihood in rate space + polygon_response = polygon_response_data(polygon); + normalised_polygon_response = polygon_response/normalisation_total; + nll -= dnorm(normalised_polygon_response, pred_polygonrate, polygon_sd, true); + reportnll[polygon] = -dnorm(normalised_polygon_response, pred_polygonrate, polygon_sd, true); + } else if(family == 1) { + nll -= dbinom(polygon_response_data[polygon], response_sample_size[polygon], pred_polygonrate, true); + reportnll[polygon] = -dbinom(polygon_response_data[polygon], response_sample_size[polygon], pred_polygonrate, true); + } else if(family == 2) { + nll -= dpois(polygon_response_data[polygon], pred_polygoncases, true); + reportnll[polygon] = -dpois(polygon_response_data[polygon], pred_polygoncases, true); + } else { + error("Likelihood not implemented."); + } + + } + + REPORT(reportprediction_cases); + REPORT(reportprediction_rate); + REPORT(reportnormalisation); + REPORT(reportnll); + REPORT(polygon_response_data); + REPORT(nll_priors); + REPORT(nll); + if(family == 0) { + REPORT(reportpolygonsd); + } + + return nll; +} diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index 5cefeb8..37a9b73 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -1,168 +1,168 @@ - -context("Fitting model") - -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -} - -polys <- do.call(raster::spPolygons, polygons) -N <- floor(runif(n_polygons, min = 1, max = 100)) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) - -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -spdf_binom <- sp::SpatialPolygonsDataFrame(polys, response_binom_df) - -# Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) - -if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) -} else { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack, - makeMesh = FALSE) -} - -test_that("disag_model produces errors when expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - expect_error(disag_model(list())) - expect_error(disag_model(test_data, iterations = 'iterations')) - expect_error(disag_model(test_data, priors = list(polygon_sd_men = 0.3, polygon_sd_sd = 0.4))) - expect_error(disag_model(test_data, priors = c(polygon_sd_mean = 1.2))) - expect_error(disag_model(test_data, family = 'banana')) - expect_error(disag_model(test_data, link = 'apple')) - -}) - -test_that("disag_model behaves as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - result <- disag_model(test_data, iterations = 2, iid = FALSE) - - expect_is(result, 'disag_model') - expect_equal(length(result), 5) - expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 4) - expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) - - - -}) - - - - -test_that("disag_model with 1 covariate behaves as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - test_data2 <- test_data - test_data2$covariate_rasters <- test_data2$covariate_rasters[[1]] - test_data2$covariate_data <- test_data2$covariate_data[, 1:3] - - result <- disag_model(test_data2, iterations = 2, iid = FALSE) - - expect_is(result, 'disag_model') - expect_equal(length(result), 5) - - # Should be intercept, 1 slope, tau gaussian, and 2 for space. None for iid anymore. - expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 3) - expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) - - # Confirm only two covariates were fitted. - expect_equal(sum(names(result$opt$par) == 'slope'), 1) - -}) -test_that("user defined model setup is working as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - binom_data <- prepare_data(polygon_shapefile = spdf_binom, - covariate_rasters = cov_stack, - sample_size_var = 'sample_size') - - result2 <- disag_model(test_data, iterations = 2, field = FALSE, family = 'poisson', link = 'log') - result3 <- disag_model(binom_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit') - result4 <- disag_model(test_data, iterations = 2, field = FALSE, iid = FALSE, link = 'identity') - - expect_error(disag_model(test_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit')) - - expect_is(result2, 'disag_model') - expect_equal(length(result2), 5) - expect_equal(length(result2$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) - expect_equal(unique(names(result2$sd_out$par.random)), c("iideffect")) - expect_false(result2$model_setup$field) - expect_true(result2$model_setup$iid) - expect_equal(result2$model_setup$family, 'poisson') - expect_equal(result2$model_setup$link, 'log') - - expect_is(result3, 'disag_model') - expect_equal(length(result3), 5) - expect_equal(length(result3$sd_out$par.fixed), raster::nlayers(binom_data$covariate_rasters) + 3) - expect_equal(unique(names(result3$sd_out$par.random)), c("nodemean")) - expect_true(result3$model_setup$field) - expect_false(result3$model_setup$iid) - expect_equal(result3$model_setup$family, 'binomial') - expect_equal(result3$model_setup$link, 'logit') - - expect_is(result4, 'disag_model') - expect_equal(length(result4), 5) - expect_equal(length(result4$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) - expect_equal(unique(names(result4$sd_out$par.random)), NULL) - expect_false(result4$model_setup$field) - expect_false(result4$model_setup$iid) - expect_equal(result4$model_setup$family, 'gaussian') - expect_equal(result4$model_setup$link, 'identity') -}) - -test_that("make_model_object behaves as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - result <- make_model_object(test_data) - - expect_is(result, 'list') - expect_equal(sum(sapply(c("par", "fn", "gr", "report"), function(x) !(x %in% names(result)))), 0) - -}) - -test_that("setup_hess_control behaves as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - obj <- make_model_object(test_data) - - opt <- stats::nlminb(obj$par, obj$fn, obj$gr, control = list(iter.max = 2, trace = 0)) - - hess_control <- setup_hess_control(opt, hess_control_parscale = c(rep(c(0.9, 1.1), 3), 1), hess_control_ndeps = 1e-3) - - expect_is(hess_control, 'list') - expect_equal(length(hess_control$parscale), length(opt$par)) - expect_equal(length(hess_control$ndeps), length(opt$par)) - -}) - + +context("Fitting model") + +polygons <- list() +n_polygon_per_side <- 10 +n_polygons <- n_polygon_per_side * n_polygon_per_side +n_pixels_per_side <- n_polygon_per_side * 2 + +for(i in 1:n_polygons) { + row <- ceiling(i/n_polygon_per_side) + col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) +} + +polys <- do.call(raster::spPolygons, polygons) +N <- floor(runif(n_polygons, min = 1, max = 100)) +response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) +response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) + +spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +spdf_binom <- sp::SpatialPolygonsDataFrame(polys, response_binom_df) + +# Create raster stack +r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +r <- raster::setExtent(r, raster::extent(spdf)) +r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +r2 <- raster::setExtent(r2, raster::extent(spdf)) +r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- raster::stack(r, r2) + +if(identical(Sys.getenv("NOT_CRAN"), "true")) { + test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack) +} else { + test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack, + makeMesh = FALSE) +} + +test_that("disag_model produces errors when expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + expect_error(disag_model(list())) + expect_error(disag_model(test_data, iterations = 'iterations')) + expect_error(disag_model(test_data, priors = list(polygon_sd_men = 0.3, polygon_sd_sd = 0.4))) + expect_error(disag_model(test_data, priors = c(polygon_sd_mean = 1.2))) + expect_error(disag_model(test_data, family = 'banana')) + expect_error(disag_model(test_data, link = 'apple')) + +}) + +test_that("disag_model behaves as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + result <- disag_model(test_data, iterations = 2, iid = FALSE) + + expect_is(result, 'disag_model') + expect_equal(length(result), 5) + expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 4) + expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) + + + +}) + + + + +test_that("disag_model with 1 covariate behaves as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + test_data2 <- test_data + test_data2$covariate_rasters <- test_data2$covariate_rasters[[1]] + test_data2$covariate_data <- test_data2$covariate_data[, 1:3] + + result <- disag_model(test_data2, iterations = 2, iid = FALSE) + + expect_is(result, 'disag_model') + expect_equal(length(result), 5) + + # Should be intercept, 1 slope, tau gaussian, and 2 for space. None for iid anymore. + expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 3) + expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) + + # Confirm only two covariates were fitted. + expect_equal(sum(names(result$opt$par) == 'slope'), 1) + +}) +test_that("user defined model setup is working as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + binom_data <- prepare_data(polygon_shapefile = spdf_binom, + covariate_rasters = cov_stack, + sample_size_var = 'sample_size') + + result2 <- disag_model(test_data, iterations = 2, field = FALSE, family = 'poisson', link = 'log') + result3 <- disag_model(binom_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit') + result4 <- disag_model(test_data, iterations = 2, field = FALSE, iid = FALSE, link = 'identity') + + expect_error(disag_model(test_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit')) + + expect_is(result2, 'disag_model') + expect_equal(length(result2), 5) + expect_equal(length(result2$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) + expect_equal(unique(names(result2$sd_out$par.random)), c("iideffect")) + expect_false(result2$model_setup$field) + expect_true(result2$model_setup$iid) + expect_equal(result2$model_setup$family, 'poisson') + expect_equal(result2$model_setup$link, 'log') + + expect_is(result3, 'disag_model') + expect_equal(length(result3), 5) + expect_equal(length(result3$sd_out$par.fixed), raster::nlayers(binom_data$covariate_rasters) + 3) + expect_equal(unique(names(result3$sd_out$par.random)), c("nodemean")) + expect_true(result3$model_setup$field) + expect_false(result3$model_setup$iid) + expect_equal(result3$model_setup$family, 'binomial') + expect_equal(result3$model_setup$link, 'logit') + + expect_is(result4, 'disag_model') + expect_equal(length(result4), 5) + expect_equal(length(result4$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) + expect_equal(unique(names(result4$sd_out$par.random)), NULL) + expect_false(result4$model_setup$field) + expect_false(result4$model_setup$iid) + expect_equal(result4$model_setup$family, 'gaussian') + expect_equal(result4$model_setup$link, 'identity') +}) + +test_that("make_model_object behaves as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + result <- make_model_object(test_data) + + expect_is(result, 'list') + expect_equal(sum(sapply(c("par", "fn", "gr", "report"), function(x) !(x %in% names(result)))), 0) + +}) + +test_that("setup_hess_control behaves as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + obj <- make_model_object(test_data) + + opt <- stats::nlminb(obj$par, obj$fn, obj$gr, control = list(iter.max = 2, trace = 0)) + + hess_control <- setup_hess_control(opt, hess_control_parscale = c(rep(c(0.9, 1.1), 3), 1), hess_control_ndeps = 1e-3) + + expect_is(hess_control, 'list') + expect_equal(length(hess_control$parscale), length(opt$par)) + expect_equal(length(hess_control$ndeps), length(opt$par)) + +}) + diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index 322a5bf..fab8f87 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -26,19 +26,19 @@ r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_s cov_stack <- raster::stack(r, r2) if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, + test_data <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack) } else { - test_data <- prepare_data(polygon_shapefile = spdf, + test_data <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack, makeMesh = FALSE) } test_that("Check predict.disag_model function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, iterations = 1000, iid = TRUE, field = TRUE, @@ -56,18 +56,18 @@ test_that("Check predict.disag_model function works as expected", { prior_iideffect_sd_prob = 0.01)) pred2 <- predict(result) - + expect_is(pred2, 'disag_prediction') expect_equal(length(pred2), 2) expect_equal(names(pred2), c('mean_prediction', 'uncertainty_prediction')) - + expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) expect_is(pred2$mean_prediction$prediction, 'Raster') expect_is(pred2$mean_prediction$field, 'Raster') expect_true(is.null(pred2$mean_prediction$iid)) expect_is(pred2$mean_prediction$covariates, 'Raster') - + expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) @@ -77,11 +77,11 @@ test_that("Check predict.disag_model function works as expected", { expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) pred2 <- predict(result, predict_iid = TRUE, N = 10) - + expect_is(pred2, 'disag_prediction') expect_equal(length(pred2), 2) expect_equal(names(pred2), c('mean_prediction', 'uncertainty_prediction')) - + expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) expect_equal(names(pred2$mean_prediction), c('prediction', 'field', 'iid', 'covariates')) @@ -89,7 +89,7 @@ test_that("Check predict.disag_model function works as expected", { expect_is(pred2$mean_prediction$field, 'Raster') expect_is(pred2$mean_prediction$iid, 'Raster') expect_is(pred2$mean_prediction$covariates, 'Raster') - + expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) @@ -97,25 +97,25 @@ test_that("Check predict.disag_model function works as expected", { expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 10) expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) - - + + # For a model with no field or iid - + result <- disag_model(test_data, iterations = 100, field = FALSE, iid = FALSE) - + pred2 <- predict(result) - + expect_is(pred2, 'disag_prediction') expect_equal(length(pred2), 2) expect_equal(names(pred2), c('mean_prediction', 'uncertainty_prediction')) - + expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) expect_is(pred2$mean_prediction$prediction, 'Raster') expect_true(is.null(pred2$mean_prediction$field)) expect_true(is.null(pred2$mean_prediction$iid)) expect_is(pred2$mean_prediction$covariates, 'Raster') - + expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) @@ -123,16 +123,16 @@ test_that("Check predict.disag_model function works as expected", { expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 100) expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) - + }) test_that("Check predict.disag_model function works with newdata", { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, field = FALSE, iid = TRUE, iterations = 100, priors = list(priormean_intercept = 0, priorsd_intercept = 1, @@ -144,15 +144,15 @@ test_that("Check predict.disag_model function works with newdata", { prior_sigma_prob = 0.01, prior_iideffect_sd_max = 0.0001, prior_iideffect_sd_prob = 0.01)) - + newdata <- raster::crop(raster::stack(r, r2), c(0, 10, 0, 10)) pred1 <- predict(result) pred2 <- predict(result, newdata, predict_iid = TRUE, N = 5) - + expect_is(pred2, 'disag_prediction') expect_equal(length(pred2), 2) expect_equal(names(pred2), c('mean_prediction', 'uncertainty_prediction')) - + expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) expect_equal(names(pred2$mean_prediction), c('prediction', 'field', 'iid', 'covariates')) @@ -160,7 +160,7 @@ test_that("Check predict.disag_model function works with newdata", { expect_true(is.null(pred2$mean_prediction$field)) expect_is(pred2$mean_prediction$iid, 'Raster') expect_is(pred2$mean_prediction$covariates, 'Raster') - + expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) @@ -168,23 +168,23 @@ test_that("Check predict.disag_model function works with newdata", { expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 5) expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) - + expect_false(identical(raster::extent(pred1$mean_prediction$prediction), raster::extent(pred2$mean_prediction$prediction))) expect_false(identical(raster::extent(pred1$uncertainty_prediction$realisations), raster::extent(pred2$uncertainty_prediction$realisations))) - + }) test_that('Check that check_newdata works', { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, field = FALSE, iterations = 100) - + newdata <- raster::crop(raster::stack(r, r2), c(0, 10, 0, 10)) nd1 <- check_newdata(newdata, result) expect_is(nd1, 'RasterBrick') - + nn <- newdata[[1]] names(nn) <- 'extra_uneeded' newdata2 <- raster::stack(newdata, nn) @@ -192,18 +192,18 @@ test_that('Check that check_newdata works', { newdata3 <- newdata[[1]] expect_error(check_newdata(newdata3, result), 'All covariates') - + newdata4 <- result$data$covariate_data expect_error(check_newdata(newdata4, result), 'newdata should be NULL or') - - + + }) test_that('Check that setup_objects works', { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, iterations = 100, iid = TRUE, field = TRUE, @@ -217,9 +217,9 @@ test_that('Check that setup_objects works', { prior_sigma_prob = 0.01, prior_iideffect_sd_max = 0.01, prior_iideffect_sd_prob = 0.01)) - + objects <- setup_objects(result) - + expect_is(objects, 'list') expect_equal(length(objects), 3) expect_equal(names(objects), c('covariates', 'field_objects', 'iid_objects')) @@ -228,28 +228,28 @@ test_that('Check that setup_objects works', { newdata <- raster::crop(raster::stack(r, r2), c(0, 180, -90, 90)) objects2 <- setup_objects(result, newdata) - + expect_is(objects2, 'list') expect_equal(length(objects2), 3) expect_equal(names(objects2), c('covariates', 'field_objects', 'iid_objects')) expect_is(objects2$field_objects, 'list') expect_true(is.null(objects$iid_objects)) - + objects3 <- setup_objects(result, predict_iid = TRUE) - + expect_is(objects3, 'list') expect_equal(length(objects3), 3) expect_equal(names(objects3), c('covariates', 'field_objects', 'iid_objects')) expect_is(objects3$field_objects, 'list') expect_is(objects3$iid_objects, 'list') - + }) test_that('Check that predict_single_raster works', { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, iterations = 100, iid = TRUE, field = TRUE, @@ -263,16 +263,16 @@ test_that('Check that predict_single_raster works', { prior_sigma_prob = 0.01, prior_iideffect_sd_max = 0.01, prior_iideffect_sd_prob = 0.01)) - + objects <- setup_objects(result) - + pars <- result$obj$env$last.par.best pars <- split(pars, names(pars)) - - pred2 <- predict_single_raster(pars, + + pred2 <- predict_single_raster(pars, objects = objects, link_function = result$model_setup$link) - + expect_is(pred2, 'list') expect_equal(length(pred2), 4) expect_equal(names(pred2), c('prediction', 'field', 'iid', 'covariates')) @@ -280,13 +280,13 @@ test_that('Check that predict_single_raster works', { expect_is(pred2$field, 'Raster') expect_true(is.null(pred2$iid)) expect_is(pred2$covariates, 'Raster') - + objects2 <- setup_objects(result, predict_iid = TRUE) - - pred2 <- predict_single_raster(pars, + + pred2 <- predict_single_raster(pars, objects = objects2, link_function = result$model_setup$link) - + expect_is(pred2, 'list') expect_equal(length(pred2), 4) expect_equal(names(pred2), c('prediction', 'field', 'iid', 'covariates')) @@ -294,7 +294,7 @@ test_that('Check that predict_single_raster works', { expect_is(pred2$field, 'Raster') expect_is(pred2$iid, 'Raster') expect_is(pred2$covariates, 'Raster') - + }) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 8d2944b..5967ce2 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -122,8 +122,8 @@ The user can also control the parameters of the mesh that is used to create the ```{r, fig.show='hold', eval= isINLA} data_for_model <- prepare_data(x = df, - cov_stack, - pop_raster, + covariate_rasters = cov_stack, + aggregation_raster = pop_raster, response_var = 'cases', id_var = 'censustract.FIPS', mesh.args = list(cut = 0.01, From f1497a424c6abf98382dfde9169eea3ec56aa1db Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 31 May 2023 19:54:46 +0100 Subject: [PATCH 121/168] Fix parallelExtract rows id issue. --- R/extract.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/extract.R b/R/extract.R index db6a302..0db30bc 100644 --- a/R/extract.R +++ b/R/extract.R @@ -72,7 +72,7 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ } else { df <- values[, 2:(ncol(values) - 1)] - df <- cbind(as.data.frame(shape)[, id], values$cell, df) + df <- cbind(as.data.frame(shape)[values$ID, id], values$cell, df) names(df) <- c(id, 'cellid', names(raster)) return(df) From 2f74d59fe1de94504b6e3633e89cc8f0123e8201 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 31 May 2023 20:31:48 +0100 Subject: [PATCH 122/168] Convert build_mesh to sf --- R/build_mesh.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/R/build_mesh.R b/R/build_mesh.R index 71946c9..7c3a1da 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -40,16 +40,16 @@ #' } #' #' -#' #' @export build_mesh <- function(shapes, mesh.args = NULL) { - stopifnot(inherits(shapes, 'SpatialPolygons')) +<<<<<<< HEAD + stopifnot(inherits(shapes, 'sf')) if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) - limits <- sp::bbox(shapes) - hypotenuse <- sqrt((limits[1,2] - limits[1,1])^2 + (limits[2,2] - limits[2,1])^2) + limits <- sf::st_bbox(shapes) + hypotenuse <- sqrt((limits$xmax - limits$xmin)^2 + (limits$ymax - limits$ymin)^2) maxedge <- hypotenuse/10 @@ -65,7 +65,9 @@ build_mesh <- function(shapes, mesh.args = NULL) { outline <- sf::st_union(sf::st_as_sf(shapes)) coords <- sf::st_coordinates(outline) - + #no sure which is needed + # outline <- st_sf(sf::st_union(sf::st_convex_hull(shapes))) + # coords <- sf::st_coordinates(outline)[, c('X', 'Y')] outline.hull <- INLA::inla.nonconvex.hull(coords, @@ -79,6 +81,5 @@ build_mesh <- function(shapes, mesh.args = NULL) { cut = pars$cut, offset = pars$offset) - return(mesh) } From 74aacfac14b23eff94e8bd08e4d0ea76c96f7844 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 31 May 2023 21:18:12 +0100 Subject: [PATCH 123/168] Fix plot.disag_data --- R/plotting.R | 7 ++++--- R/prepare_data.R | 4 ++-- vignettes/disaggregation.Rmd | 3 --- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index d1ed93b..02f36e2 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -1,8 +1,8 @@ -<<<<<<< HEAD #' Plot input data for disaggregation #' #' Plotting function for class \emph{disag_data} (the input data for disaggregation). #' +>>>>>>> Fix plot.disag_data #' Produces three plots: polygon response data, covariate rasters and INLA mesh. #' #' @param x Object of class \emph{disag_data} to be plotted. @@ -22,12 +22,12 @@ plot.disag_data <- function(x, which = c(1,2,3), ...) { titles <- c() if(1 %in% which) { - plots$polygon <- plot_polygon_data(x$polygon_shapefile, x$shapefile_names) + plots$polygon <- plot_polygon_data(x$x, x$shapefile_names) titles <- c(titles, 'Polygon response data') } if(2 %in% which) { - stopifnot(inherits(x$covariate_rasters, c('RasterStack', 'RasterBrick'))) + stopifnot(inherits(x$covariate_rasters, c('SpatRaster'))) plots$covariates <- sp::spplot(x$covariate_rasters) titles <- c(titles, 'Covariate rasters') } @@ -118,6 +118,7 @@ plot.disag_model <- function(x, ...){ #' #' Plotting function for class \emph{disag_prediction} (the mean and uncertainty predictions of the disaggregation fitting). #' +#' #' Produces raster plots of the mean prediction, and the lower and upper confidence intervals. #' #' @param x Object of class \emph{disag_prediction} to be plotted. diff --git a/R/prepare_data.R b/R/prepare_data.R index 0d6831e..94b98ff 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -40,8 +40,8 @@ #' @return A list is returned of class \code{disag_data}. #' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. #' The list of class \code{disag_data} contains: -#' \item{x }{The SpatialPolygonDataFrame used as an input.} -#' \item{covariate_rasters }{The RasterStack used as an input.} +#' \item{x }{The sf object used as an input.} +#' \item{covariate_rasters }{The SpatRaster used as an input.} #' \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} #' \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} #' \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 5967ce2..7f4f223 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -48,14 +48,11 @@ We will demonstrate an example of the **disaggregation** package using areal dat ```{r} library(SpatialEpi, quietly = TRUE) library(dplyr, quietly = TRUE) -library(sp, quietly = TRUE) # Don't need to read data. So just here while I learn sf. -library(raster, quietly = TRUE) library(disaggregation, quietly = TRUE) library(ggplot2) library(sf) library(terra) - polygons <- sf::st_as_sf(NYleukemia$spatial.polygon) df <- cbind(polygons, NYleukemia$data) From 405cf740d2777141701901db89143b5e95f4e081 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 2 Jun 2023 16:43:46 +0100 Subject: [PATCH 124/168] Continue updating vignette --- vignettes/disaggregation.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 7f4f223..1e74593 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -98,7 +98,7 @@ We also create a population raster. This is to allow the model to correctly aggr extracted <- terra::extract(r, terra::vect(df$geometry), fun = sum) n_cells <- terra::extract(r, terra::vect(df$geometry), fun = length) df$pop_per_cell <- df$population/n_cells$lyr.1 -pop_raster <- rasterize(terra::vect(df), cov_stack, field = 'pop_per_cell') +pop_raster <- terra::rasterize(terra::vect(df), cov_stack, field = 'pop_per_cell') ``` From c21747605c2f8e26aec120e63f6a8de9162152da Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 14:48:58 +0100 Subject: [PATCH 125/168] Change imports in description to match new backends. --- DESCRIPTION | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c6ad584..3001c52 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,10 +18,8 @@ Encoding: UTF-8 LazyData: true RoxygenNote: 7.2.3 Imports: - raster, foreach, sp, - sf, parallel, doParallel, splancs, @@ -32,6 +30,8 @@ Imports: ggplot2, cowplot, sparseMVN, + terra, + sf, utils Additional_repositories: https://inla.r-inla-download.org/R/stable Suggests: @@ -45,3 +45,4 @@ LinkingTo: RcppEigen SystemRequirements: GNU make VignetteBuilder: knitr +>>>>>>> Change imports in description to match new backends. From 0f9cbbc3cdea511742ef4cd530974d12ed512b9e Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 17:03:06 +0100 Subject: [PATCH 126/168] Fix small bug in extract. --- R/extract.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/extract.R b/R/extract.R index 0db30bc..75d1e16 100644 --- a/R/extract.R +++ b/R/extract.R @@ -175,7 +175,7 @@ extractCoordsForMesh <- function(cov_rasters, selectIds = NULL) { if(!is.null(selectIds)) stopifnot(inherits(selectIds, 'numeric')) points_raster <- cov_rasters[[1]] - points_raster[is.na(terra::values(points_raster))] <- -9999 + points_raster[is.na(terra::values(points_raster, mat = FALSE))] <- -9999 raster_pts <- terra::as.points(points_raster) coords <- terra::crds(raster_pts) From 2cbd6f6f76760c04f8ce1473ef6ae6a8ce9472d8 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 17:34:46 +0100 Subject: [PATCH 127/168] Update some docs to use sf. --- R/build_mesh.R | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/R/build_mesh.R b/R/build_mesh.R index 7c3a1da..7bc3eec 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -24,19 +24,25 @@ #' #' @examples #' \dontrun{ -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) -#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' my_mesh <- build_mesh(spdf) +#' polygons <- list() +#' for(i in 1:14) { +#' row <- ceiling(i/10) +#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row +#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), +#' c(xmin,ymin), c(xmin, ymax)) +#' } +#' +#' +#' +#' +#' +#' polys <- sf::st_sfc(sf::st_polygon(polygons)) +#' response_df <- data.frame(area_id = 1:100, +#' response = runif(100, min = 0, max = 10)) +#' spdf <- sf::st_sf(polys, response_df) +#' +#' my_mesh <- build_mesh(spdf) #' } #' #' @@ -44,7 +50,6 @@ build_mesh <- function(shapes, mesh.args = NULL) { -<<<<<<< HEAD stopifnot(inherits(shapes, 'sf')) if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) From 63f063324e2f092ee91a0dbf2ae0bca449d6d3cb Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 17:35:20 +0100 Subject: [PATCH 128/168] And some tidyup. --- R/build_mesh.R | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/R/build_mesh.R b/R/build_mesh.R index 7bc3eec..0c9c16d 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -13,7 +13,7 @@ #' Defaults are: #' pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, 8), cut = 0.4, offset = c(1, 15)). #' -#' @param shapes shapefile covering the region under investigation. +#' @param shapes sf covering the region under investigation. #' @param mesh.args list of parameters that control the mesh structure. \emph{convex}, \emph{concave} and \emph{resolution}, #' to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, #' with the parameters having the same meaning as in the INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. @@ -31,11 +31,6 @@ #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row #' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), #' c(xmin,ymin), c(xmin, ymax)) -#' } -#' -#' -#' -#' #' #' polys <- sf::st_sfc(sf::st_polygon(polygons)) #' response_df <- data.frame(area_id = 1:100, From 0e53f67985fbf426b3bd52655be7ba72907f8831 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 21:08:45 +0100 Subject: [PATCH 129/168] Remove raster from some print and summary functions. --- R/summary.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/summary.R b/R/summary.R index 2a744e1..59e1dbf 100644 --- a/R/summary.R +++ b/R/summary.R @@ -116,7 +116,7 @@ print.disag_model <- function(x, ...){ summary.disag_data <- function(object, ...) { n_polygons <- nrow(object$polygon_shapefile) - n_covariates <- raster::nlayers(object$covariate_rasters) + n_covariates <- terra::nlyr(object$covariate_rasters) cat(paste("They data contains", n_polygons, "polygons and", nrow(object$covariate_data), "pixels\n")) @@ -157,7 +157,7 @@ summary.disag_data <- function(object, ...) { print.disag_data <- function(x, ...){ n_polygons <- nrow(x$polygon_shapefile) - n_covariates <- raster::nlayers(x$covariate_rasters) + n_covariates <- terra::nlyr(x$covariate_rasters) cat(paste("They data contains", n_polygons, "polygons and", nrow(x$covariate_data), "pixels\n")) From 783ad21d76cea8203c9518792579db493fab131f Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 21:09:22 +0100 Subject: [PATCH 130/168] Remove raster from some print and summary functions. --- R/summary.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/summary.R b/R/summary.R index 59e1dbf..87ce78c 100644 --- a/R/summary.R +++ b/R/summary.R @@ -187,7 +187,7 @@ print.disag_data <- function(x, ...){ summary.disag_prediction <- function(object, ...) { - number_realisations <- raster::nlayers(object$uncertainty_prediction$realisations) + number_realisations <- terra::nlyr(object$uncertainty_prediction$realisations) max_mean <- max(object$mean_prediction$prediction@data@values) min_mean <- min(object$mean_prediction$prediction@data@values) max_iqr <- max((object$uncertainty_prediction$predictions_ci[[2]] - object$uncertainty_prediction$predictions_ci[[1]])@data@values) @@ -238,7 +238,7 @@ print.disag_prediction <- function(x, ...){ if(!is.null(x$mean_prediction$field)) cat('field ') if(!is.null(x$mean_prediction$iid)) cat('iid ') cat('\n\n') - cat(paste0('There are ', raster::nlayers(x$uncertainty_prediction$realisations), ' uncertainty realisations')) + cat(paste0('There are ', terra::nlyr(x$uncertainty_prediction$realisations), ' uncertainty realisations')) return(invisible(x)) } \ No newline at end of file From 66d6bb80576c30a3e8614aa28d5b6289ce496e80 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 21:29:24 +0100 Subject: [PATCH 131/168] First pass at removing raster from predict. Currently makes massive numbers. --- R/predict.R | 55 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/R/predict.R b/R/predict.R index 4e9c759..5033bfd 100644 --- a/R/predict.R +++ b/R/predict.R @@ -1,4 +1,3 @@ -<<<<<<< HEAD #' Predict mean and uncertainty from the disaggregation model result #' #' \emph{predict.disag_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and @@ -213,7 +212,7 @@ getCoords <- function(data) { raster_pts <- raster::rasterToPoints(points_raster, spatial = TRUE) coords <- raster_pts@coords - return(coords) + return(coords) } # Get Amatrix for field @@ -236,13 +235,12 @@ getAmatrix <- function(mesh, coords) { } - # Helper to check and sort out new raster data. check_newdata <- function(newdata, model_output){ if(is.null(newdata)) return(NULL) if(!is.null(newdata)){ - if(!(inherits(newdata, c('RasterStack', 'RasterBrick', 'RasterLayer')))){ - stop('newdata should be NULL or a RasterStack or a RasterBrick') + if(!(inherits(newdata, c('SpatRaster')))){ + stop('newdata should be NULL or a SpatRaster') } if(!all(names(model_output$data$covariate_rasters) %in% names(newdata))){ stop('All covariates used to fit the model must be in newdata') @@ -288,17 +286,18 @@ setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { } if(predict_iid) { - tmp_shp <- model_output$data$polygon_shapefile - tmp_shp@data <- data.frame(area_id = factor(model_output$data$polygon_data$area_id)) - shapefile_raster <- raster::rasterize(tmp_shp, + tmp_shp <- model_output$data$x + tmp_shp <- dplyr::bind_cols(tmp_shp, + area_id = + factor(model_output$data$polygon_data$area_id)) + shapefile_raster <- terra::rasterize(tmp_shp, model_output$data$covariate_rasters, field = 'area_id') - shapefile_ids <- raster::unique(shapefile_raster) + shapefile_ids <- terra::unique(shapefile_raster) iid_objects <- list(shapefile_raster = shapefile_raster, shapefile_ids = shapefile_ids) } else { iid_objects <- NULL } - return(list(covariates = covariates, field_objects = field_objects, iid_objects = iid_objects)) @@ -309,12 +308,12 @@ predict_single_raster <- function(model_parameters, objects, link_function) { # Create linear predictor covs_by_betas <- list() - for(i in seq_len(raster::nlayers(objects$covariates))){ + for(i in seq_len(terra::nlyr(objects$covariates))){ covs_by_betas[[i]] <- model_parameters$slope[i] * objects$covariates[[i]] } - cov_by_betas <- raster::stack(covs_by_betas) - if(raster::nlayers(cov_by_betas) > 1){ + cov_by_betas <- terra::rast(covs_by_betas) + if(terra::nlyr(cov_by_betas) > 1){ sum_cov_by_betas <- sum(cov_by_betas) } else { # With only 1 covariate, there's nothing to sum. Do this to avoid warnings. @@ -327,7 +326,9 @@ predict_single_raster <- function(model_parameters, objects, link_function) { if(!is.null(objects$field_objects)){ # Extract field values field <- (objects$field_objects$Amatrix %*% model_parameters$nodemean)[, 1] - field_ras <- raster::rasterFromXYZ(cbind(objects$field_objects$coords, field)) + field_ras <- terra::rast(cbind(objects$field_objects$coords, field), + type = 'xyz', + crs = crs(linear_pred)) linear_pred <- linear_pred + field_ras } else { field_ras <- NULL @@ -336,28 +337,34 @@ predict_single_raster <- function(model_parameters, objects, link_function) { if(!is.null(objects$iid_objects)) { iid_ras <- objects$iid_objects$shapefile_raster iideffect_sd <- 1/sqrt(exp(model_parameters$iideffect_log_tau)) + # todo for(i in seq_along(model_parameters$iideffect)) { - iid_ras@data@values[which(objects$iid_objects$shapefile_raster@data@values == objects$iid_objects$shapefile_ids[i])] <- + targetvals <- terra::values(objects$iid_objects$shapefile_raster, + dataframe = FALSE, mat = FALSE) + whichvals <- which(targetvals == objects$iid_objects$shapefile_ids[1, i]) + values(iid_ras)[whichvals] <- model_parameters$iideffect[i] - na_pixels <- which(is.na(iid_ras@data@values)) + na_pixels <- which(is.na(values(iid_ras, dataframe = FALSE, mat = FALSE))) na_iid_values <- stats::rnorm(length(na_pixels), 0, iideffect_sd) - iid_ras@data@values[na_pixels] <- na_iid_values + values(iid_ras)[na_pixels] <- na_iid_values } - if(raster::extent(iid_ras) != raster::extent(linear_pred)) { + if(terra::ext(iid_ras) != terra::ext(linear_pred)) { # Extent of prediction space is different to the original model. Keep any overlapping iid values but predict to the new extent raster_new_extent <- linear_pred - raster_new_extent@data@values <- NA - iid_ras <- raster::merge(iid_ras, raster_new_extent, ext = raster::extent(raster_new_extent)) - missing_pixels <- which(is.na(iid_ras@data@values)) + values(raster_new_extent) <- NA + # iid_ras <- terra::merge(iid_ras, raster_new_extent, ext = terra::ext(raster_new_extent)) + # NOt sure why we no longer need the ext argument + iid_ras <- terra::merge(iid_ras, raster_new_extent) + missing_pixels <- which(is.na(values(iid_ras, dataframe = FALSE, mat = FALSE))) missing_iid_values <- stats::rnorm(length(missing_pixels), 0, iideffect_sd) - iid_ras@data@values[missing_pixels] <- missing_iid_values + values(iid_ras)[missing_pixels] <- missing_iid_values } linear_pred <- linear_pred + iid_ras } else { iid_ras <- NULL } - if(link_function == 'logit') { + if(link_function == 'logit') { prediction_ras <- 1 / (1 + exp(-1 * linear_pred)) } else if(link_function == 'log') { prediction_ras <- exp(linear_pred) @@ -365,7 +372,7 @@ predict_single_raster <- function(model_parameters, objects, link_function) { prediction_ras <- linear_pred } - predictions <- list(prediction = prediction_ras, + predictions <- list(prediction = prediction_ras, field = field_ras, iid = iid_ras, covariates = cov_contribution) From e334b3dec4b38b3807e4e49c311875d581010ae0 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 21:30:07 +0100 Subject: [PATCH 132/168] Line endings. --- R/build_mesh.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/R/build_mesh.R b/R/build_mesh.R index 0c9c16d..0485ff2 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -31,6 +31,7 @@ #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row #' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), #' c(xmin,ymin), c(xmin, ymax)) +#' } #' #' polys <- sf::st_sfc(sf::st_polygon(polygons)) #' response_df <- data.frame(area_id = 1:100, @@ -40,7 +41,6 @@ #' my_mesh <- build_mesh(spdf) #' } #' -#' #' @export build_mesh <- function(shapes, mesh.args = NULL) { @@ -63,12 +63,10 @@ build_mesh <- function(shapes, mesh.args = NULL) { pars[names(mesh.args)] <- mesh.args - outline <- sf::st_union(sf::st_as_sf(shapes)) - coords <- sf::st_coordinates(outline) - #no sure which is needed - # outline <- st_sf(sf::st_union(sf::st_convex_hull(shapes))) - # coords <- sf::st_coordinates(outline)[, c('X', 'Y')] + #outline <- maptools::unionSpatialPolygons(shapes_old, IDs = rep(1, length(shapes_old))) + outline <- st_sf(sf::st_union(sf::st_convex_hull(shapes))) + coords <- sf::st_coordinates(outline)[, c('X', 'Y')] outline.hull <- INLA::inla.nonconvex.hull(coords, convex = pars$convex, @@ -81,5 +79,6 @@ build_mesh <- function(shapes, mesh.args = NULL) { cut = pars$cut, offset = pars$offset) + return(mesh) } From cb40b8161d216fbd114306b5b4011d74fa556639 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 21:30:49 +0100 Subject: [PATCH 133/168] line endings. --- R/extract.R | 384 +++++++++++++++++++++++++------------------------- R/fit_model.R | 7 +- 2 files changed, 196 insertions(+), 195 deletions(-) diff --git a/R/extract.R b/R/extract.R index 75d1e16..808f0cb 100644 --- a/R/extract.R +++ b/R/extract.R @@ -1,192 +1,192 @@ -#' Parallel extraction of raster stack by shape file. -#' -#' Parallelisation is performed across rasters, not shapes. -#' So this function is only useful if you are extracting -#' data from many raster layers. -#' As the overhead for parallel computation in windows is high -#' it only makes sense to parallelise in this way. -#' -#' -#' @param raster A RasterBrick or RasterStack object. -#' @param shape A SpatialPolygons object. -#' @param fun The function used to aggregate the pixel data. If NULL, raw pixel data is returned. -#' @param id Name of column in shape object to be used to bind an ID column to output. -#' @param ... Other arguments to raster::extract. -#' -#' @return A data.frame with columns of polygon id, cell id (if fun = NULL) and a column for each raster in the stack -#' -#' @importFrom foreach %dopar% -#' @importFrom parallel stopCluster -#' @importFrom parallel makeCluster -#' @importFrom doParallel registerDoParallel -#' -#' @export -#' @examples -#' \dontrun{ -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) -#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' r <- raster::raster(ncol=20, nrow=20) -#' r <- raster::setExtent(r, raster::extent(spdf)) -#' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) -#' r2 <- raster::raster(ncol=20, nrow=20) -#' r2 <- raster::setExtent(r2, raster::extent(spdf)) -#' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) -#' cov_rasters <- raster::stack(r, r2) -#' -#' cl <- parallel::makeCluster(2) -#' doParallel::registerDoParallel(cl) -#' result <- parallelExtract(cov_rasters, spdf, fun = NULL, id = 'area_id') -#' parallel::stopCluster(cl) -#' foreach::registerDoSEQ() -#' } - -parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ - - shape[, id] <- as.character(shape[, id, drop = TRUE]) - - # Run extract in parallel. - values <- terra::extract(raster, terra::vect(shape), fun = fun, na.rm = TRUE, cells = TRUE, ...) - - if(!is.null(fun)){ - - # If a summary function was given, just bind everything together and add ID column - df <- values - if(inherits(shape, 'df')){ - df <- cbind(ID = as.data.frame(shape)[, id], df) - } else{ - df <- cbind(ID = names(shape), df) - id <- 'id' - } - names(df) <- c(id, names(raster)) - return(df) - - } else { - df <- values[, 2:(ncol(values) - 1)] - df <- cbind(as.data.frame(shape)[values$ID, id], values$cell, df) - names(df) <- c(id, 'cellid', names(raster)) - - return(df) - } - -} - - -#' Extract polygon id and response data into a data.frame from a SpatialPolygonsDataFrame -#' -#' Returns a data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the -#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does -#' not exist), this column will contain NAs. -#' -#' @param shape A sf object containing response data. -#' @param id_var Name of column in shape object with the polygon id. Default 'area_id'. -#' @param response_var Name of column in shape object with the response data. Default 'response'. -#' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. Default NULL. -#' -#' @return A data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the -#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does -#' not exist), this column will contain NAs. -#' -#' @export -#' @examples { -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) -#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' getPolygonData(spdf, id_var = 'area_id', response_var = 'response') -#' } -#' -#' - -getPolygonData <- function(shape, id_var = 'area_id', response_var = 'response', sample_size_var = NULL) { - - if(is.null(sample_size_var)) { - polygon_df <- shape[, c(id_var, response_var), drop = TRUE] - polygon_df$N <- rep(NA, nrow(polygon_df)) - } else { - polygon_df <- shape[, c(id_var, response_var, sample_size_var), drop = TRUE] - } - - names(polygon_df) <- c('area_id', 'response', 'N') - - return(polygon_df) -} - - -#' Get a RasterStack of covariates from a folder containing .tif files -#' -#' Looks in a specified folder for raster files. Returns a RasterStack of the rasters cropped to the extent specified by the shape parameter. -#' -#' @param directory Filepath to the directory containing the rasters. -#' @param file_pattern Pattern the filenames must match. Default is all files ending in .tif . -#' @param shape An object with an extent that the rasters will be cropped to. -#' -#' @return A RasterStack of the raster files in the directory -#' -#' @export -#' @examples -#' \dontrun{ -#' getCovariateRasters('/home/rasters', '.tif$', shape) -#' } -#' - -getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { - - stopifnot(dir.exists(directory)) - - covariate_files <- list.files(directory, pattern = file_pattern, full.names = TRUE) - stopifnot(length(covariate_files) != 0) - - covariate_rasters <- lapply(covariate_files, function(x) raster::raster(x)) - covariate_stack <- raster::stack(covariate_rasters) - - covariate_stack <- raster::crop(covariate_stack, shape) - - return(covariate_stack) -} - -# Extract coordinates from raster to use constructing the INLA mesh -# -# @param cov_rasters RasterStack of the covariate rasters. -# @param selectIds numeric vector containing cell ids to retain. Default NULL retains all cell ids in the covariate rasters. -# -# @return A matrix containing the coordinates used to make the mesh - -extractCoordsForMesh <- function(cov_rasters, selectIds = NULL) { - - stopifnot(inherits(cov_rasters, 'SpatRaster')) - if(!is.null(selectIds)) stopifnot(inherits(selectIds, 'numeric')) - - points_raster <- cov_rasters[[1]] - points_raster[is.na(terra::values(points_raster, mat = FALSE))] <- -9999 - raster_pts <- terra::as.points(points_raster) - coords <- terra::crds(raster_pts) - - # If specified, only retain certain pixel ids - if(!is.null(selectIds)) { - coords <- coords[selectIds, ] - } - - return(coords) - -} - - - +#' Parallel extraction of raster stack by shape file. +#' +#' Parallelisation is performed across rasters, not shapes. +#' So this function is only useful if you are extracting +#' data from many raster layers. +#' As the overhead for parallel computation in windows is high +#' it only makes sense to parallelise in this way. +#' +#' +#' @param raster A RasterBrick or RasterStack object. +#' @param shape A SpatialPolygons object. +#' @param fun The function used to aggregate the pixel data. If NULL, raw pixel data is returned. +#' @param id Name of column in shape object to be used to bind an ID column to output. +#' @param ... Other arguments to raster::extract. +#' +#' @return A data.frame with columns of polygon id, cell id (if fun = NULL) and a column for each raster in the stack +#' +#' @importFrom foreach %dopar% +#' @importFrom parallel stopCluster +#' @importFrom parallel makeCluster +#' @importFrom doParallel registerDoParallel +#' +#' @export +#' @examples +#' \dontrun{ +#' polygons <- list() +#' for(i in 1:100) { +#' row <- ceiling(i/10) +#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row +#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) +#' } +#' +#' polys <- do.call(raster::spPolygons, polygons) +#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +#' +#' r <- raster::raster(ncol=20, nrow=20) +#' r <- raster::setExtent(r, raster::extent(spdf)) +#' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) +#' r2 <- raster::raster(ncol=20, nrow=20) +#' r2 <- raster::setExtent(r2, raster::extent(spdf)) +#' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) +#' cov_rasters <- raster::stack(r, r2) +#' +#' cl <- parallel::makeCluster(2) +#' doParallel::registerDoParallel(cl) +#' result <- parallelExtract(cov_rasters, spdf, fun = NULL, id = 'area_id') +#' parallel::stopCluster(cl) +#' foreach::registerDoSEQ() +#' } + +parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ + + shape[, id] <- as.character(shape[, id, drop = TRUE]) + + # Run extract in parallel. + values <- terra::extract(raster, terra::vect(shape), fun = fun, na.rm = TRUE, cells = TRUE, ...) + + if(!is.null(fun)){ + + # If a summary function was given, just bind everything together and add ID column + df <- values + if(inherits(shape, 'df')){ + df <- cbind(ID = as.data.frame(shape)[, id], df) + } else{ + df <- cbind(ID = names(shape), df) + id <- 'id' + } + names(df) <- c(id, names(raster)) + return(df) + + } else { + df <- values[, 2:(ncol(values) - 1)] + df <- cbind(as.data.frame(shape)[values$ID, id], values$cell, df) + names(df) <- c(id, 'cellid', names(raster)) + + return(df) + } + +} + + +#' Extract polygon id and response data into a data.frame from a SpatialPolygonsDataFrame +#' +#' Returns a data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the +#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does +#' not exist), this column will contain NAs. +#' +#' @param shape A sf object containing response data. +#' @param id_var Name of column in shape object with the polygon id. Default 'area_id'. +#' @param response_var Name of column in shape object with the response data. Default 'response'. +#' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. Default NULL. +#' +#' @return A data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the +#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does +#' not exist), this column will contain NAs. +#' +#' @export +#' @examples { +#' polygons <- list() +#' for(i in 1:100) { +#' row <- ceiling(i/10) +#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row +#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) +#' } +#' +#' polys <- do.call(raster::spPolygons, polygons) +#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +#' +#' getPolygonData(spdf, id_var = 'area_id', response_var = 'response') +#' } +#' +#' + +getPolygonData <- function(shape, id_var = 'area_id', response_var = 'response', sample_size_var = NULL) { + + if(is.null(sample_size_var)) { + polygon_df <- shape[, c(id_var, response_var), drop = TRUE] + polygon_df$N <- rep(NA, nrow(polygon_df)) + } else { + polygon_df <- shape[, c(id_var, response_var, sample_size_var), drop = TRUE] + } + + names(polygon_df) <- c('area_id', 'response', 'N') + + return(polygon_df) +} + + +#' Get a RasterStack of covariates from a folder containing .tif files +#' +#' Looks in a specified folder for raster files. Returns a RasterStack of the rasters cropped to the extent specified by the shape parameter. +#' +#' @param directory Filepath to the directory containing the rasters. +#' @param file_pattern Pattern the filenames must match. Default is all files ending in .tif . +#' @param shape An object with an extent that the rasters will be cropped to. +#' +#' @return A RasterStack of the raster files in the directory +#' +#' @export +#' @examples +#' \dontrun{ +#' getCovariateRasters('/home/rasters', '.tif$', shape) +#' } +#' + +getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { + + stopifnot(dir.exists(directory)) + + covariate_files <- list.files(directory, pattern = file_pattern, full.names = TRUE) + stopifnot(length(covariate_files) != 0) + + covariate_rasters <- lapply(covariate_files, function(x) raster::raster(x)) + covariate_stack <- raster::stack(covariate_rasters) + + covariate_stack <- raster::crop(covariate_stack, shape) + + return(covariate_stack) +} + +# Extract coordinates from raster to use constructing the INLA mesh +# +# @param cov_rasters RasterStack of the covariate rasters. +# @param selectIds numeric vector containing cell ids to retain. Default NULL retains all cell ids in the covariate rasters. +# +# @return A matrix containing the coordinates used to make the mesh + +extractCoordsForMesh <- function(cov_rasters, selectIds = NULL) { + + stopifnot(inherits(cov_rasters, 'SpatRaster')) + if(!is.null(selectIds)) stopifnot(inherits(selectIds, 'numeric')) + + points_raster <- cov_rasters[[1]] + points_raster[is.na(terra::values(points_raster, mat = FALSE))] <- -9999 + raster_pts <- terra::as.points(points_raster) + coords <- terra::crds(raster_pts) + + # If specified, only retain certain pixel ids + if(!is.null(selectIds)) { + coords <- coords[selectIds, ] + } + + return(coords) + +} + + + diff --git a/R/fit_model.R b/R/fit_model.R index 28df455..fe99c35 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -1,3 +1,4 @@ +<<<<<<< HEAD #' Fit the disaggregation model #' #' \emph{fit_model} function takes a \emph{disag_data} object created by @@ -358,9 +359,9 @@ make_model_object <- function(data, } # Construct sensible default field hyperpriors - limits <- sp::bbox(data$polygon_shapefile) - hypontenuse <- sqrt((limits[1,2] - limits[1,1])^2 + (limits[2,2] - limits[2,1])^2) - prior_rho <- hypontenuse/3 + limits <- sf::st_bbox(data$x) + hypotenuse <- sqrt((limits$xmax - limits$xmin)^2 + (limits$ymax - limits$ymin)^2) + prior_rho <- hypotenuse/3 prior_sigma <- sd(data$polygon_data$response/mean(data$polygon_data$response)) From be450ede41948c817355be179c668499b062d4f7 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Wed, 13 Sep 2023 14:42:14 +0100 Subject: [PATCH 134/168] migrate prepare_data to terra/sf --- R/extract.R | 301 +++++++++++++++++------------------------------ R/matching.R | 106 ++++++++--------- R/prepare_data.R | 183 ++++++++++++++-------------- 3 files changed, 256 insertions(+), 334 deletions(-) diff --git a/R/extract.R b/R/extract.R index 808f0cb..dcf704f 100644 --- a/R/extract.R +++ b/R/extract.R @@ -1,192 +1,109 @@ -#' Parallel extraction of raster stack by shape file. -#' -#' Parallelisation is performed across rasters, not shapes. -#' So this function is only useful if you are extracting -#' data from many raster layers. -#' As the overhead for parallel computation in windows is high -#' it only makes sense to parallelise in this way. -#' -#' -#' @param raster A RasterBrick or RasterStack object. -#' @param shape A SpatialPolygons object. -#' @param fun The function used to aggregate the pixel data. If NULL, raw pixel data is returned. -#' @param id Name of column in shape object to be used to bind an ID column to output. -#' @param ... Other arguments to raster::extract. -#' -#' @return A data.frame with columns of polygon id, cell id (if fun = NULL) and a column for each raster in the stack -#' -#' @importFrom foreach %dopar% -#' @importFrom parallel stopCluster -#' @importFrom parallel makeCluster -#' @importFrom doParallel registerDoParallel -#' -#' @export -#' @examples -#' \dontrun{ -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) -#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' r <- raster::raster(ncol=20, nrow=20) -#' r <- raster::setExtent(r, raster::extent(spdf)) -#' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) -#' r2 <- raster::raster(ncol=20, nrow=20) -#' r2 <- raster::setExtent(r2, raster::extent(spdf)) -#' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) -#' cov_rasters <- raster::stack(r, r2) -#' -#' cl <- parallel::makeCluster(2) -#' doParallel::registerDoParallel(cl) -#' result <- parallelExtract(cov_rasters, spdf, fun = NULL, id = 'area_id') -#' parallel::stopCluster(cl) -#' foreach::registerDoSEQ() -#' } - -parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ - - shape[, id] <- as.character(shape[, id, drop = TRUE]) - - # Run extract in parallel. - values <- terra::extract(raster, terra::vect(shape), fun = fun, na.rm = TRUE, cells = TRUE, ...) - - if(!is.null(fun)){ - - # If a summary function was given, just bind everything together and add ID column - df <- values - if(inherits(shape, 'df')){ - df <- cbind(ID = as.data.frame(shape)[, id], df) - } else{ - df <- cbind(ID = names(shape), df) - id <- 'id' - } - names(df) <- c(id, names(raster)) - return(df) - - } else { - df <- values[, 2:(ncol(values) - 1)] - df <- cbind(as.data.frame(shape)[values$ID, id], values$cell, df) - names(df) <- c(id, 'cellid', names(raster)) - - return(df) - } - -} - - -#' Extract polygon id and response data into a data.frame from a SpatialPolygonsDataFrame -#' -#' Returns a data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the -#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does -#' not exist), this column will contain NAs. -#' -#' @param shape A sf object containing response data. -#' @param id_var Name of column in shape object with the polygon id. Default 'area_id'. -#' @param response_var Name of column in shape object with the response data. Default 'response'. -#' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. Default NULL. -#' -#' @return A data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the -#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does -#' not exist), this column will contain NAs. -#' -#' @export -#' @examples { -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) -#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' getPolygonData(spdf, id_var = 'area_id', response_var = 'response') -#' } -#' -#' - -getPolygonData <- function(shape, id_var = 'area_id', response_var = 'response', sample_size_var = NULL) { - - if(is.null(sample_size_var)) { - polygon_df <- shape[, c(id_var, response_var), drop = TRUE] - polygon_df$N <- rep(NA, nrow(polygon_df)) - } else { - polygon_df <- shape[, c(id_var, response_var, sample_size_var), drop = TRUE] - } - - names(polygon_df) <- c('area_id', 'response', 'N') - - return(polygon_df) -} - - -#' Get a RasterStack of covariates from a folder containing .tif files -#' -#' Looks in a specified folder for raster files. Returns a RasterStack of the rasters cropped to the extent specified by the shape parameter. -#' -#' @param directory Filepath to the directory containing the rasters. -#' @param file_pattern Pattern the filenames must match. Default is all files ending in .tif . -#' @param shape An object with an extent that the rasters will be cropped to. -#' -#' @return A RasterStack of the raster files in the directory -#' -#' @export -#' @examples -#' \dontrun{ -#' getCovariateRasters('/home/rasters', '.tif$', shape) -#' } -#' - -getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { - - stopifnot(dir.exists(directory)) - - covariate_files <- list.files(directory, pattern = file_pattern, full.names = TRUE) - stopifnot(length(covariate_files) != 0) - - covariate_rasters <- lapply(covariate_files, function(x) raster::raster(x)) - covariate_stack <- raster::stack(covariate_rasters) - - covariate_stack <- raster::crop(covariate_stack, shape) - - return(covariate_stack) -} - -# Extract coordinates from raster to use constructing the INLA mesh -# -# @param cov_rasters RasterStack of the covariate rasters. -# @param selectIds numeric vector containing cell ids to retain. Default NULL retains all cell ids in the covariate rasters. -# -# @return A matrix containing the coordinates used to make the mesh - -extractCoordsForMesh <- function(cov_rasters, selectIds = NULL) { - - stopifnot(inherits(cov_rasters, 'SpatRaster')) - if(!is.null(selectIds)) stopifnot(inherits(selectIds, 'numeric')) - - points_raster <- cov_rasters[[1]] - points_raster[is.na(terra::values(points_raster, mat = FALSE))] <- -9999 - raster_pts <- terra::as.points(points_raster) - coords <- terra::crds(raster_pts) - - # If specified, only retain certain pixel ids - if(!is.null(selectIds)) { - coords <- coords[selectIds, ] - } - - return(coords) - -} - - - +#' Extract polygon id and response data into a data.frame from a SpatialPolygonsDataFrame +#' +#' Returns a data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the +#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does +#' not exist), this column will contain NAs. +#' +#' @param shape A sf object containing response data. +#' @param id_var Name of column in shape object with the polygon id. Default 'area_id'. +#' @param response_var Name of column in shape object with the response data. Default 'response'. +#' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. Default NULL. +#' +#' @return A data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the +#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does +#' not exist), this column will contain NAs. +#' +#' @export +#' @examples { +#' polygons <- list() +#' for(i in 1:100) { +#' row <- ceiling(i/10) +#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row +#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) +#' } +#' +#' polys <- do.call(raster::spPolygons, polygons) +#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +#' +#' getPolygonData(spdf, id_var = 'area_id', response_var = 'response') +#' } +#' +#' + +getPolygonData <- function(shape, id_var = 'area_id', response_var = 'response', sample_size_var = NULL) { + + if(is.null(sample_size_var)) { + polygon_df <- shape[, c(id_var, response_var), drop = TRUE] + polygon_df$N <- rep(NA, nrow(polygon_df)) + } else { + polygon_df <- shape[, c(id_var, response_var, sample_size_var), drop = TRUE] + } + + names(polygon_df) <- c('area_id', 'response', 'N') + + return(polygon_df) +} + + +#' Get a RasterStack of covariates from a folder containing .tif files +#' +#' Looks in a specified folder for raster files. Returns a RasterStack of the rasters cropped to the extent specified by the shape parameter. +#' +#' @param directory Filepath to the directory containing the rasters. +#' @param file_pattern Pattern the filenames must match. Default is all files ending in .tif . +#' @param shape An object with an extent that the rasters will be cropped to. +#' +#' @return A RasterStack of the raster files in the directory +#' +#' @export +#' @examples +#' \dontrun{ +#' getCovariateRasters('/home/rasters', '.tif$', shape) +#' } +#' + +getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { + + stopifnot(dir.exists(directory)) + + covariate_files <- list.files(directory, pattern = file_pattern, full.names = TRUE) + stopifnot(length(covariate_files) != 0) + + covariate_rasters <- lapply(covariate_files, function(x) raster::raster(x)) + covariate_stack <- raster::stack(covariate_rasters) + + covariate_stack <- raster::crop(covariate_stack, shape) + + return(covariate_stack) +} + +# Extract coordinates from raster to use constructing the INLA mesh +# +# @param cov_rasters RasterStack of the covariate rasters. +# @param selectIds numeric vector containing cell ids to retain. Default NULL retains all cell ids in the covariate rasters. +# +# @return A matrix containing the coordinates used to make the mesh + +extractCoordsForMesh <- function(cov_rasters, selectIds = NULL) { + + stopifnot(inherits(cov_rasters, 'SpatRaster')) + if(!is.null(selectIds)) stopifnot(inherits(selectIds, 'numeric')) + + points_raster <- cov_rasters[[1]] + points_raster[is.na(terra::values(points_raster, mat = FALSE))] <- -9999 + raster_pts <- terra::as.points(points_raster) + coords <- terra::crds(raster_pts) + + # If specified, only retain certain pixel ids + if(!is.null(selectIds)) { + coords <- coords[selectIds, ] + } + + return(coords) + +} + + + diff --git a/R/matching.R b/R/matching.R index 18684c3..615b5fc 100644 --- a/R/matching.R +++ b/R/matching.R @@ -1,53 +1,53 @@ -#' Function to match pixels to their corresponding polygon -#' -#' From the covariate data and polygon data, the function matches the polygon id between the two to find -#' which pixels from the covariate data are contained in each of the polygons. -#' -#' Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, -#' and another data.frame containing polygon data with a polygon id, response and sample size column (as returned -#' by \code{getPolygonData} function). -#' -#' Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in -#' covariate data that corresponds to that polygon, the second column is the index of the last row in -#' covariate data that corresponds to that polygon. -#' -#' @param covariates data.frame with each covariate as a column an and id column. -#' @param polygon_data data.frame with polygon id and response data. -#' @param id_var string with the name of the column in the covariate data.frame containing the polygon id. -#' -#' @return A matrix with two columns and one row for each polygon. The first column is the index of the first row in -#' covariate data that corresponds to that polygon, the second column is the index of the last row in -#' covariate data that corresponds to that polygon. -#' -#' @name getStartendindex -#' -#' @examples { -#' covs <- data.frame(area_id = c(1, 1, 1, 2, 2, 3, 3, 3, 3), response = c(3, 9, 5, 2, 3, 6, 7, 3, 5)) -#' response <- data.frame(area_id = c(1, 2, 3), response = c(4, 7, 2), N = c(NA, NA, NA)) -#' getStartendindex(covs, response, 'area_id') -#' } -#' -#' -#' @export - -getStartendindex <- function(covariates, polygon_data, id_var = 'area_id') { - - stopifnot(ncol(polygon_data) == 3) - stopifnot(ncol(covariates) >= 2) - stopifnot(nrow(covariates) > nrow(polygon_data)) - stopifnot(sum(polygon_data$area_id %in% covariates[, id_var]) == nrow(polygon_data)) - - # Create startendindex matrix - # This defines which pixels in the matrix are associated with which polygon. - startendindex <- lapply(unique(covariates[, id_var]), function(x) range(which(covariates[, id_var] == x))) - - startendindex <- do.call(rbind, startendindex) - - whichindices <- match(polygon_data$area_id, unique(covariates[, id_var])) - - # c++ is zero indexed. - startendindex <- startendindex[whichindices, ] - 1L - - return(startendindex) -} - +#' Function to match pixels to their corresponding polygon +#' +#' From the covariate data and polygon data, the function matches the polygon id between the two to find +#' which pixels from the covariate data are contained in each of the polygons. +#' +#' Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, +#' and another data.frame containing polygon data with a polygon id, response and sample size column (as returned +#' by \code{getPolygonData} function). +#' +#' Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in +#' covariate data that corresponds to that polygon, the second column is the index of the last row in +#' covariate data that corresponds to that polygon. +#' +#' @param covariates data.frame with each covariate as a column an and id column. +#' @param polygon_data data.frame with polygon id and response data. +#' @param id_var string with the name of the column in the covariate data.frame containing the polygon id. +#' +#' @return A matrix with two columns and one row for each polygon. The first column is the index of the first row in +#' covariate data that corresponds to that polygon, the second column is the index of the last row in +#' covariate data that corresponds to that polygon. +#' +#' @name getStartendindex +#' +#' @examples { +#' covs <- data.frame(area_id = c(1, 1, 1, 2, 2, 3, 3, 3, 3), response = c(3, 9, 5, 2, 3, 6, 7, 3, 5)) +#' response <- data.frame(area_id = c(1, 2, 3), response = c(4, 7, 2), N = c(NA, NA, NA)) +#' getStartendindex(covs, response, 'area_id') +#' } +#' +#' +#' @export + +getStartendindex <- function(covariates, polygon_data, id_var = 'area_id') { + + stopifnot(ncol(polygon_data) == 3) + stopifnot(ncol(covariates) >= 2) + stopifnot(nrow(covariates) > nrow(polygon_data)) + stopifnot(sum(polygon_data$area_id %in% covariates[, id_var]) == nrow(polygon_data)) + + # Create startendindex matrix + # This defines which pixels in the matrix are associated with which polygon. + startendindex <- lapply(unique(covariates[, id_var]), function(x) range(which(covariates[, id_var] == x))) + + startendindex <- do.call(rbind, startendindex) + + whichindices <- terra::match(polygon_data$area_id, unique(covariates[, id_var])) + + # c++ is zero indexed. + startendindex <- startendindex[whichindices, ] - 1L + + return(startendindex) +} + diff --git a/R/prepare_data.R b/R/prepare_data.R index 94b98ff..3c40d1e 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -1,31 +1,31 @@ #' Prepare data for disaggregation modelling -#' -#' \emph{prepare_data} function is used to extract all the data required for fitting a disaggregation model. +#' +#' \emph{prepare_data} function is used to extract all the data required for fitting a disaggregation model. #' Designed to be used in the \emph{disaggregation::fit_model} function. -#' -#' Takes a SpatialPolygonDataFrame with the response data and a RasterStack of covariates. -#' -#' Extract the values of the covariates (as well as the aggregation raster, if given) at each pixel within the polygons -#' (\emph{parallelExtract} function). This is done in parallel and \emph{n.cores} argument is used to set the number of cores +#' +#' Takes a SpatialPolygonDataFrame with the response data and a RasterStack of covariates. +#' +#' Extract the values of the covariates (as well as the aggregation raster, if given) at each pixel within the polygons +#' (\emph{parallelExtract} function). This is done in parallel and \emph{n.cores} argument is used to set the number of cores #' to use for covariate extraction. This can be the number of covariates used in the model. -#' -#' The aggregation raster defines how the pixels within each polygon are aggregated. +#' +#' The aggregation raster defines how the pixels within each polygon are aggregated. #' The disaggregation model performs a weighted sum of the pixel prediction, weighted by the pixel values in the aggregation raster. -#' For disease incidence rate you use the population raster to aggregate pixel incidence rate by summing the number of cases -#' (rate weighted by population). If no aggregation raster is provided a uniform distribution is assumed, i.e. the pixel predictions +#' For disease incidence rate you use the population raster to aggregate pixel incidence rate by summing the number of cases +#' (rate weighted by population). If no aggregation raster is provided a uniform distribution is assumed, i.e. the pixel predictions #' are aggregated to polygon level by summing the pixel values. -#' -#' Makes a matrix that contains the start and end pixel index for each polygon. Builds an INLA mesh to use for the spatial field +#' +#' Makes a matrix that contains the start and end pixel index for each polygon. Builds an INLA mesh to use for the spatial field #' (\emph{getStartendindex} function). -#' -#' The \emph{mesh.args} argument allows you to supply a list of INLA mesh parameters to control the mesh used for the spatial field +#' +#' The \emph{mesh.args} argument allows you to supply a list of INLA mesh parameters to control the mesh used for the spatial field #' (\emph{build_mesh} function). -#' -#' The \emph{na.action} flag is automatically off. If there are any NAs in the response or covariate data within the polygons the -#' \emph{prepare_data} method will error. Ideally the NAs in the data would be dealt with beforehand, however, setting na.action = TRUE -#' will automatically deal with NAs. It removes any polygons that have NAs as a response, sets any aggregation pixels with NA to zero +#' +#' The \emph{na.action} flag is automatically off. If there are any NAs in the response or covariate data within the polygons the +#' \emph{prepare_data} method will error. Ideally the NAs in the data would be dealt with beforehand, however, setting na.action = TRUE +#' will automatically deal with NAs. It removes any polygons that have NAs as a response, sets any aggregation pixels with NA to zero #' and sets covariate NAs pixels to the median value for the that covariate. -#' +#' #' @param x sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}). #' @param covariate_rasters RasterStack of covariate rasters to be used in the model. #' @param aggregation_raster Raster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used. @@ -37,11 +37,11 @@ #' @param makeMesh logical. If TRUE, build INLA mesh, takes some time. Default TRUE. #' @param ncores Number of cores used to perform covariate extraction. #' -#' @return A list is returned of class \code{disag_data}. -#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. +#' @return A list is returned of class \code{disag_data}. +#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. #' The list of class \code{disag_data} contains: -#' \item{x }{The sf object used as an input.} -#' \item{covariate_rasters }{The SpatRaster used as an input.} +#' \item{x }{The sf object used as an input.} +#' \item{covariate_rasters }{The SpatRaster used as an input.} #' \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} #' \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} #' \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} @@ -53,43 +53,46 @@ #' @import utils #' @name prepare_data #' -#' @examples +#' @examples #' \donttest{ -#' polygons <- list() -#' for(i in 1:100) { +#' polygons <- list() +#' for(i in 1:100) { #' row <- ceiling(i/10) #' col <- ifelse(i %% 10 != 0, i %% 10, 10) #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' r <- raster::raster(ncol=20, nrow=20) -#' r <- raster::setExtent(r, raster::extent(spdf)) -#' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) -#' r2 <- raster::raster(ncol=20, nrow=20) -#' r2 <- raster::setExtent(r2, raster::extent(spdf)) -#' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) -#' cov_rasters <- raster::stack(r, r2) -#' -#' test_data <- prepare_data(x = spdf, -#' covariate_rasters = cov_rasters) -#' } -#' +#' polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), +#' c(ymax, ymax, ymin, ymin, ymax))) +#' } +#' +#' polys <- lapply(polygons,sf::st_polygon) +#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +#' spdf <- sf::st_sf(response_df,geometry=polys) +#' +#' plot(spdf) +#' +#' r <- terra::rast(nrow=20,ncol=20) +#' terra::ext(r) <- terra::ext(spdf) +#' r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) +#' +#' r2 <- terra::rast(nrow=20,ncol=20) +#' terra::ext(r2) <- terra::ext(spdf) +#' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) +#' cov_rasters <- c(r, r2) +#' test_data <- prepare_data(x = spdf, +#' covariate_rasters = cov_rasters) +#' } +#' #' @export -#' -#' +#' +#' -prepare_data <- function(x, +prepare_data <- function(x, covariate_rasters, aggregation_raster = NULL, - id_var = 'area_id', - response_var = 'response', + id_var = 'area_id', + response_var = 'response', sample_size_var = NULL, - mesh.args = NULL, + mesh.args = NULL, na.action = FALSE, makeMesh = TRUE, ncores = 2) { @@ -100,7 +103,7 @@ prepare_data <- function(x, stopifnot(inherits(id_var, 'character')) stopifnot(inherits(response_var, 'character')) if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) - + # Check for NAs in response data na_rows <- is.na(x[, response_var, drop = TRUE]) if(sum(na_rows) != 0) { @@ -110,9 +113,9 @@ prepare_data <- function(x, stop('There are NAs in the response data. Please deal with these, or set na.action = TRUE') } } - + polygon_data <- getPolygonData(x, id_var, response_var, sample_size_var) - + # Save raster layer names so we can reassign it to make sure names don't change. cov_names <- names(covariate_rasters) @@ -124,18 +127,20 @@ prepare_data <- function(x, } names(aggregation_raster) <- 'aggregation_raster' - + covariate_rasters <- c(covariate_rasters, aggregation_raster) - covariate_data <- parallelExtract(covariate_rasters, x, fun = NULL, id = id_var) + #covariate_data <- parallelExtract(covariate_rasters, x, fun = NULL, id = id_var) + covariate_data <- terra::extract(covariate_rasters, x, cells=TRUE, na.rm=TRUE, ID=TRUE) + names(covariate_data)[1] <- id_var # Remove the aggregation raster - covariate_rasters <- covariate_rasters[[seq(nlyr(covariate_rasters) - 1)]] - + covariate_rasters <- covariate_rasters[[seq(terra::nlyr(covariate_rasters) - 1)]] + names(covariate_rasters) <- cov_names - - aggregation_pixels <- as.numeric(covariate_data[ , ncol(covariate_data)]) - covariate_data <- covariate_data[, -ncol(covariate_data)] - + + aggregation_pixels <- as.numeric(covariate_data[ , terra::ncol(covariate_data)]) + covariate_data <- covariate_data[, -terra::ncol(covariate_data)] + # Check for NAs in population data if(sum(is.na(aggregation_pixels)) != 0) { if(na.action) { @@ -144,7 +149,7 @@ prepare_data <- function(x, stop('There are NAs in the aggregation rasters within polygons. Please deal with these, or set na.action = TRUE') } } - + # Check for NAs in covariate data if(sum(is.na(covariate_data)) != 0) { if(na.action) { @@ -153,13 +158,13 @@ prepare_data <- function(x, stop('There are NAs in the covariate rasters within polygons. Please deal with these, or set na.action = TRUE') } } - + coordsForFit <- extractCoordsForMesh(covariate_rasters, selectIds = covariate_data$cellid) - + coordsForPrediction <- extractCoordsForMesh(covariate_rasters) - + startendindex <- getStartendindex(covariate_data, polygon_data, id_var = id_var) - + if(makeMesh) { if(!requireNamespace('INLA', quietly = TRUE)) { mesh <- NULL @@ -171,7 +176,7 @@ prepare_data <- function(x, mesh <- NULL message("A mesh is not being built. You will not be able to run a spatial model without a mesh.") } - + disag_data <- list(x = x, shapefile_names = list(id_var = id_var, response_var = response_var), covariate_rasters = covariate_rasters, @@ -182,16 +187,16 @@ prepare_data <- function(x, coordsForPrediction = coordsForPrediction, startendindex = startendindex, mesh = mesh) - + class(disag_data) <- c('disag_data', 'list') - + return(disag_data) - + } #' Function to fit the disaggregation model #' -#' @param x SpatialPolygonDataFrame containing the response data +#' @param x SpatialPolygonDataFrame containing the response data #' @param shapefile_names List of 2: polygon id variable name and response variable name from x #' @param covariate_rasters RasterStack of covariates #' @param polygon_data data.frame with two columns: polygon id and response @@ -201,12 +206,12 @@ prepare_data <- function(x, #' @param coordsForPrediction coordinates of the covariate data points in the whole raster extent #' @param startendindex matrix containing the start and end index for each polygon #' @param mesh inla.mesh object to use in the fit -#' -#' @return A list is returned of class \code{disag_data}. -#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. +#' +#' @return A list is returned of class \code{disag_data}. +#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. #' The list of class \code{disag_data} contains: -#' \item{x }{The SpatialPolygonDataFrame used as an input.} -#' \item{covariate_rasters }{The RasterStack used as an input.} +#' \item{x }{The SpatialPolygonDataFrame used as an input.} +#' \item{covariate_rasters }{The RasterStack used as an input.} #' \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} #' \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} #' \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} @@ -216,21 +221,21 @@ prepare_data <- function(x, #' \item{mesh }{A INLA mesh to be used for the spatial field of the disaggregation model.} #' #' @name as.disag_data -#' +#' #' @export -as.disag_data <- function(x, +as.disag_data <- function(x, shapefile_names, - covariate_rasters, - polygon_data, - covariate_data, + covariate_rasters, + polygon_data, + covariate_data, aggregation_pixels, - coordsForFit, + coordsForFit, coordsForPrediction, - startendindex, + startendindex, mesh = NULL) { - + stopifnot(inherits(x, 'SpatialPolygonsDataFrame')) stopifnot(inherits(shapefile_names, 'list')) stopifnot(inherits(covariate_rasters, c('RasterBrick', 'RasterStack'))) @@ -243,7 +248,7 @@ as.disag_data <- function(x, if(!is.null(mesh)) { stopifnot(inherits(mesh, 'inla.mesh')) } - + disag_data <- list(x = x, shapefile_names = shapefile_names, covariate_rasters = covariate_rasters, @@ -254,8 +259,8 @@ as.disag_data <- function(x, coordsForPrediction = coordsForPrediction, startendindex = startendindex, mesh = mesh) - + class(disag_data) <- c('disag_data', 'list') - + return(disag_data) } From b2bff700001a895865222110498a7cae0e48b29b Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Wed, 13 Sep 2023 17:34:58 +0100 Subject: [PATCH 135/168] more terra migration --- R/extract.R | 32 +++++++++++++++++--------------- R/plotting.R | 10 ++++------ R/predict.R | 12 +++++++----- R/prepare_data.R | 4 +--- 4 files changed, 29 insertions(+), 29 deletions(-) diff --git a/R/extract.R b/R/extract.R index dcf704f..c704f2c 100644 --- a/R/extract.R +++ b/R/extract.R @@ -15,17 +15,18 @@ #' #' @export #' @examples { -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) -#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } +#' polygons <- list() +#' for(i in 1:100) { +#' row <- ceiling(i/10) +#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row +#' polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), +#' c(ymax, ymax, ymin, ymin, ymax))) +#' } #' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +#' polys <- lapply(polygons,sf::st_polygon) +#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +#' spdf <- sf::st_sf(response_df,geometry=polys) #' #' getPolygonData(spdf, id_var = 'area_id', response_var = 'response') #' } @@ -49,13 +50,13 @@ getPolygonData <- function(shape, id_var = 'area_id', response_var = 'response', #' Get a RasterStack of covariates from a folder containing .tif files #' -#' Looks in a specified folder for raster files. Returns a RasterStack of the rasters cropped to the extent specified by the shape parameter. +#' Looks in a specified folder for raster files. Returns a multi-layered SpatRaster of the rasters cropped to the extent specified by the shape parameter. #' #' @param directory Filepath to the directory containing the rasters. #' @param file_pattern Pattern the filenames must match. Default is all files ending in .tif . #' @param shape An object with an extent that the rasters will be cropped to. #' -#' @return A RasterStack of the raster files in the directory +#' @return A multi-layered SpatRaster of the raster files in the directory #' #' @export #' @examples @@ -71,10 +72,11 @@ getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { covariate_files <- list.files(directory, pattern = file_pattern, full.names = TRUE) stopifnot(length(covariate_files) != 0) - covariate_rasters <- lapply(covariate_files, function(x) raster::raster(x)) - covariate_stack <- raster::stack(covariate_rasters) + covariate_rasters <- lapply(covariate_files, function(x) terra::rast(x)) + covariate_stack <- terra::rast(covariate_rasters) - covariate_stack <- raster::crop(covariate_stack, shape) + covariate_stack <- terra::crop(covariate_stack, shape) + covariate_stack <- terra::mask(covariate_stack, shape) return(covariate_stack) } diff --git a/R/plotting.R b/R/plotting.R index 02f36e2..e87f645 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -2,7 +2,6 @@ #' #' Plotting function for class \emph{disag_data} (the input data for disaggregation). #' ->>>>>>> Fix plot.disag_data #' Produces three plots: polygon response data, covariate rasters and INLA mesh. #' #' @param x Object of class \emph{disag_data} to be plotted. @@ -28,7 +27,7 @@ plot.disag_data <- function(x, which = c(1,2,3), ...) { if(2 %in% which) { stopifnot(inherits(x$covariate_rasters, c('SpatRaster'))) - plots$covariates <- sp::spplot(x$covariate_rasters) + plots$covariates <- plot(x$covariate_rasters) titles <- c(titles, 'Covariate rasters') } @@ -70,7 +69,7 @@ plot.disag_model <- function(x, ...){ posteriors$type <- ifelse(posteriors$fixed, 'Slope', 'Other') # Check name lengths match before substituting. - lengths_match <- raster::nlayers(x$data$covariate_rasters) == sum(posteriors$fixed) + lengths_match <- terra::nlyr(x$data$covariate_rasters) == sum(posteriors$fixed) if(lengths_match){ posteriors$parameter[grepl('slope', posteriors$parameter)] <- names(x$data$covariate_rasters) } @@ -133,10 +132,10 @@ plot.disag_model <- function(x, ...){ plot.disag_prediction <- function(x, ...) { - rasters_to_plot <- raster::stack(x$mean_prediction$prediction, x$uncertainty_prediction$predictions_ci) + rasters_to_plot <- terra::rast(list(x$mean_prediction$prediction, x$uncertainty_prediction$predictions_ci)) names(rasters_to_plot) <- c('mean prediction', 'lower CI', 'upper CI') - plots <- sp::spplot(rasters_to_plot) + plots <- plot(rasters_to_plot) print(plots) @@ -163,7 +162,6 @@ plot_polygon_data <- function(x, names) { area_id <- long <- lat <- group <- response <- NULL stopifnot(inherits(shp, 'sf')) - shp <- dplyr::mutate(shp, area_id = as.character(area_id)) p <- ggplot(shp, aes(fill = response)) + diff --git a/R/predict.R b/R/predict.R index 5033bfd..0aeeedf 100644 --- a/R/predict.R +++ b/R/predict.R @@ -185,10 +185,12 @@ predict_uncertainty <- function(model_output, newdata = NULL, predict_iid = FALS predictions[[r]] <- prediction_result$prediction } - predictions <- raster::stack(predictions) + predictions <- terra::rast(predictions) probs <- c((1 - CI) / 2, 1 - (1 - CI) / 2) - predictions_ci <- raster::calc(predictions, function(x) stats::quantile(x, probs = probs, na.rm = TRUE)) + predictions_ci <- terra::app(predictions, function(x) stats::quantile(x, probs = probs, na.rm = TRUE)) + + names(predictions_ci) <- c('lower CI', 'upper CI') uncertainty <- list(realisations = predictions, @@ -208,9 +210,9 @@ predict_uncertainty <- function(model_output, newdata = NULL, predict_iid = FALS getCoords <- function(data) { points_raster <- data$covariate_rasters[[1]] - points_raster[is.na(points_raster)] <- -9999 - raster_pts <- raster::rasterToPoints(points_raster, spatial = TRUE) - coords <- raster_pts@coords + points_raster[is.na(terra::values(points_raster, mat = FALSE))] <- -9999 + raster_pts <- terra::as.points(points_raster) + coords <- terra::crds(raster_pts) return(coords) } diff --git a/R/prepare_data.R b/R/prepare_data.R index 3c40d1e..eeb4a65 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -68,8 +68,6 @@ #' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) #' spdf <- sf::st_sf(response_df,geometry=polys) #' -#' plot(spdf) -#' #' r <- terra::rast(nrow=20,ncol=20) #' terra::ext(r) <- terra::ext(spdf) #' r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) @@ -78,6 +76,7 @@ #' terra::ext(r2) <- terra::ext(spdf) #' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) #' cov_rasters <- c(r, r2) +#' #' test_data <- prepare_data(x = spdf, #' covariate_rasters = cov_rasters) #' } @@ -129,7 +128,6 @@ prepare_data <- function(x, covariate_rasters <- c(covariate_rasters, aggregation_raster) - #covariate_data <- parallelExtract(covariate_rasters, x, fun = NULL, id = id_var) covariate_data <- terra::extract(covariate_rasters, x, cells=TRUE, na.rm=TRUE, ID=TRUE) names(covariate_data)[1] <- id_var From 91d7cad0f6a9eb742df6cb4ca1414d26e3c62b4f Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Fri, 15 Sep 2023 12:27:04 +0100 Subject: [PATCH 136/168] migrate test-extract and minor git fixes --- DESCRIPTION | 1 - NAMESPACE | 1 - R/extract.R | 4 +- R/fit_model.R | 1 - tests/testthat/test-extract.R | 112 +++++++++++++--------------------- 5 files changed, 44 insertions(+), 75 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3001c52..873a8bd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -45,4 +45,3 @@ LinkingTo: RcppEigen SystemRequirements: GNU make VignetteBuilder: knitr ->>>>>>> Change imports in description to match new backends. diff --git a/NAMESPACE b/NAMESPACE index b298c2b..bd927c7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,7 +18,6 @@ export(getCovariateRasters) export(getPolygonData) export(getStartendindex) export(make_model_object) -export(parallelExtract) export(predict_model) export(predict_uncertainty) export(prepare_data) diff --git a/R/extract.R b/R/extract.R index c704f2c..6bf26b4 100644 --- a/R/extract.R +++ b/R/extract.R @@ -26,7 +26,7 @@ #' #' polys <- lapply(polygons,sf::st_polygon) #' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sf::st_sf(response_df,geometry=polys) +#' spdf <- sf::st_sf(response_df, geometry = polys) #' #' getPolygonData(spdf, id_var = 'area_id', response_var = 'response') #' } @@ -76,7 +76,7 @@ getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { covariate_stack <- terra::rast(covariate_rasters) covariate_stack <- terra::crop(covariate_stack, shape) - covariate_stack <- terra::mask(covariate_stack, shape) + #covariate_stack <- terra::mask(covariate_stack, shape) return(covariate_stack) } diff --git a/R/fit_model.R b/R/fit_model.R index fe99c35..b1fff72 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -1,4 +1,3 @@ -<<<<<<< HEAD #' Fit the disaggregation model #' #' \emph{fit_model} function takes a \emph{disag_data} object created by diff --git a/tests/testthat/test-extract.R b/tests/testthat/test-extract.R index 4a50ef5..8a0e253 100644 --- a/tests/testthat/test-extract.R +++ b/tests/testthat/test-extract.R @@ -10,115 +10,87 @@ for(i in 1:n_polygons) { row <- ceiling(i/n_polygon_per_side) col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) } -polys <- do.call(raster::spPolygons, polygons) +polys <- lapply(polygons,sf::st_polygon) N <- floor(runif(n_polygons, min = 1, max = 100)) response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -spdf_binom <- sp::SpatialPolygonsDataFrame(polys, response_binom_df) +spdf <- sf::st_sf(response_df, geometry = polys) +spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) # Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) - -test_that("parallelExtract gives errors when it should", { - - skip_on_cran() - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - - expect_error(parallelExtract(spdf, cov_stack, fun = NULL, id = 'area_id')) - expect_error(parallelExtract(cov_stack, spdf, fun = NULL, id = 'id')) - - parallel::stopCluster(cl) - foreach::registerDoSEQ() -}) - -test_that("parallelExtract give the right form of output", { - - skip_on_cran() - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - cov_data <- parallelExtract(cov_stack, spdf, fun = NULL, id = 'area_id') - parallel::stopCluster(cl) - foreach::registerDoSEQ() - - expect_is(cov_data, 'data.frame') - expect_equal(sort(as.numeric(unique(cov_data$area_id))), spdf$area_id)# - expect_equal(ncol(cov_data), raster::nlayers(cov_stack) + 2)# - expect_equal(names(cov_stack), names(cov_data)[-c(1,2)])# - expect_equal(length(unique(cov_data$area_id)), length(spdf)) - -}) +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) test_that("getPolygonData function", { - + skip_on_cran() - + expect_error(getPolygonData(spdf, id_var = 'id', response_var = 'response')) expect_error(getPolygonData(spdf, id_var = 'area_id', response_var = 'data')) - + result <- getPolygonData(spdf, id_var = 'area_id', response_var = 'response') result_binom <- getPolygonData(spdf_binom, id_var = 'area_id', response_var = 'response', sample_size_var = 'sample_size') - + expect_is(result, 'data.frame') expect_equal(ncol(result), 3) expect_equal(nrow(result), nrow(spdf)) expect_equal(result$area_id, spdf$area_id) expect_equal(result$response, spdf$response) expect_equal(result$N, rep(NA, nrow(result))) - + expect_is(result_binom, 'data.frame') expect_equal(ncol(result_binom), 3) expect_equal(nrow(result_binom), nrow(spdf_binom)) expect_equal(result_binom$area_id, spdf_binom$area_id) expect_equal(result_binom$response, spdf_binom$response) expect_equal(result_binom$N, spdf_binom$sample_size) - + }) test_that("getCovariateData function gives errors when it should", { - + skip_on_cran() - + expect_error(getCovariateRasters('/home/rasters', '.tif$', spdf)) - + # Save .tif files in tempdir() - r <- raster::raster(ncol=20, nrow=20) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_stack <- raster::stack(r, r2) - raster::writeRaster(r, paste0(tempdir(), '/cov1.tif'), overwrite = TRUE) - raster::writeRaster(r2, paste0(tempdir(), '/cov2.tif'), overwrite = TRUE) - - expect_is(getCovariateRasters(tempdir(), '.tif$', spdf), 'RasterBrick') - + r <- terra::rast(ncol=20, nrow=20) + r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) + r2 <- terra::rast(ncol=20, nrow=20) + r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) + cov_stack <- c(r, r2) + terra::writeRaster(r, paste0(tempdir(), '/cov1.tif'), overwrite = TRUE) + terra::writeRaster(r2, paste0(tempdir(), '/cov2.tif'), overwrite = TRUE) + + expect_is(getCovariateRasters(tempdir(), '.tif$', spdf), 'SpatRaster') + }) test_that("extractCoordsForMesh function behaves as it should", { skip_on_cran() - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - cov_data <- parallelExtract(cov_stack, spdf, fun = NULL, id = 'area_id') - parallel::stopCluster(cl) - foreach::registerDoSEQ() - + + # cl <- parallel::makeCluster(2) + # doParallel::registerDoParallel(cl) + # cov_data <- parallelExtract(cov_stack, spdf, fun = NULL, id = ) + # parallel::stopCluster(cl) + # foreach::registerDoSEQ() + + cov_data <- terra::extract(cov_stack, spdf, cells=TRUE, na.rm=TRUE, ID=TRUE) + names(cov_data)[1] <- 'area_id' + result <- extractCoordsForMesh(cov_stack, cov_data$cellid) - + result2 <- extractCoordsForMesh(cov_stack) expect_error(extractCoordsForMesh(cov_data$cellid, cov_stack)) From ba3a522ff433fc3f582a833a7e3fe2a3789bbbd0 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Fri, 15 Sep 2023 16:15:05 +0100 Subject: [PATCH 137/168] migrate test-prepare-data and further migration in prepare_data --- R/build_mesh.R | 2 +- R/prepare_data.R | 34 +++++---- tests/testthat/test-prepare-data.R | 118 ++++++++++++++--------------- 3 files changed, 77 insertions(+), 77 deletions(-) diff --git a/R/build_mesh.R b/R/build_mesh.R index 0485ff2..7ad5017 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -64,7 +64,7 @@ build_mesh <- function(shapes, mesh.args = NULL) { pars[names(mesh.args)] <- mesh.args #outline <- maptools::unionSpatialPolygons(shapes_old, IDs = rep(1, length(shapes_old))) - outline <- st_sf(sf::st_union(sf::st_convex_hull(shapes))) + outline <- sf::st_sf(sf::st_union(sf::st_convex_hull(shapes))) coords <- sf::st_coordinates(outline)[, c('X', 'Y')] diff --git a/R/prepare_data.R b/R/prepare_data.R index eeb4a65..96e132c 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -85,7 +85,7 @@ #' #' -prepare_data <- function(x, +prepare_data <- function(polygon_shapefile, covariate_rasters, aggregation_raster = NULL, id_var = 'area_id', @@ -96,7 +96,7 @@ prepare_data <- function(x, makeMesh = TRUE, ncores = 2) { - stopifnot(inherits(x, 'sf')) + stopifnot(inherits(polygon_shapefile, 'sf')) stopifnot(inherits(covariate_rasters, 'SpatRaster')) if(!is.null(aggregation_raster)) stopifnot(inherits(aggregation_raster, 'SpatRaster')) stopifnot(inherits(id_var, 'character')) @@ -104,16 +104,16 @@ prepare_data <- function(x, if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) # Check for NAs in response data - na_rows <- is.na(x[, response_var, drop = TRUE]) + na_rows <- is.na(polygon_shapefile[, response_var, drop = TRUE]) if(sum(na_rows) != 0) { if(na.action) { - x <- x[!na_rows, ] + polygon_shapefile <- polygon_shapefile[!na_rows, ] } else { stop('There are NAs in the response data. Please deal with these, or set na.action = TRUE') } } - polygon_data <- getPolygonData(x, id_var, response_var, sample_size_var) + polygon_data <- getPolygonData(polygon_shapefile, id_var, response_var, sample_size_var) # Save raster layer names so we can reassign it to make sure names don't change. @@ -128,7 +128,7 @@ prepare_data <- function(x, covariate_rasters <- c(covariate_rasters, aggregation_raster) - covariate_data <- terra::extract(covariate_rasters, x, cells=TRUE, na.rm=TRUE, ID=TRUE) + covariate_data <- terra::extract(covariate_rasters, polygon_shapefile, cells=TRUE, na.rm=TRUE, ID=TRUE) names(covariate_data)[1] <- id_var # Remove the aggregation raster @@ -136,8 +136,9 @@ prepare_data <- function(x, names(covariate_rasters) <- cov_names - aggregation_pixels <- as.numeric(covariate_data[ , terra::ncol(covariate_data)]) - covariate_data <- covariate_data[, -terra::ncol(covariate_data)] + agg_filter <- names(covariate_data) %in% c('aggregation_raster') + aggregation_pixels <- as.numeric(covariate_data[ , agg_filter]) + covariate_data <- covariate_data[, !agg_filter] # Check for NAs in population data if(sum(is.na(aggregation_pixels)) != 0) { @@ -151,13 +152,14 @@ prepare_data <- function(x, # Check for NAs in covariate data if(sum(is.na(covariate_data)) != 0) { if(na.action) { - covariate_data[-c(1:2)] <- sapply(covariate_data[-c(1:2)], function(x) { x[is.na(x)] <- stats::median(x, na.rm = T); return(x) }) + cov_filter <- !(names(covariate_data) %in% c(id_var,'cell')) + covariate_data[ , cov_filter] <- sapply(covariate_data[ , cov_filter], function(x) { x[is.na(x)] <- stats::median(x, na.rm = T); return(x) }) } else { stop('There are NAs in the covariate rasters within polygons. Please deal with these, or set na.action = TRUE') } } - coordsForFit <- extractCoordsForMesh(covariate_rasters, selectIds = covariate_data$cellid) + coordsForFit <- extractCoordsForMesh(covariate_rasters, selectIds = covariate_data$cell) coordsForPrediction <- extractCoordsForMesh(covariate_rasters) @@ -168,14 +170,14 @@ prepare_data <- function(x, mesh <- NULL message("Cannot build mesh as INLA is not installed. If you need a spatial field in your model, you must install INLA.") } else { - mesh <- build_mesh(x, mesh.args) + mesh <- build_mesh(polygon_shapefile, mesh.args) } } else { mesh <- NULL message("A mesh is not being built. You will not be able to run a spatial model without a mesh.") } - disag_data <- list(x = x, + disag_data <- list(polygon_shapefile = polygon_shapefile, shapefile_names = list(id_var = id_var, response_var = response_var), covariate_rasters = covariate_rasters, polygon_data = polygon_data, @@ -223,7 +225,7 @@ prepare_data <- function(x, #' @export -as.disag_data <- function(x, +as.disag_data <- function(polygon_shapefile, shapefile_names, covariate_rasters, polygon_data, @@ -234,9 +236,9 @@ as.disag_data <- function(x, startendindex, mesh = NULL) { - stopifnot(inherits(x, 'SpatialPolygonsDataFrame')) + stopifnot(inherits(polygon_shapefile, 'sf')) stopifnot(inherits(shapefile_names, 'list')) - stopifnot(inherits(covariate_rasters, c('RasterBrick', 'RasterStack'))) + stopifnot(inherits(covariate_rasters, 'SpatRaster')) stopifnot(inherits(polygon_data, 'data.frame')) stopifnot(inherits(covariate_data, 'data.frame')) stopifnot(inherits(aggregation_pixels, 'numeric')) @@ -247,7 +249,7 @@ as.disag_data <- function(x, stopifnot(inherits(mesh, 'inla.mesh')) } - disag_data <- list(x = x, + disag_data <- list(polygon_shapefile = polygon_shapefile, shapefile_names = shapefile_names, covariate_rasters = covariate_rasters, polygon_data = polygon_data, diff --git a/tests/testthat/test-prepare-data.R b/tests/testthat/test-prepare-data.R index 7cdcee8..3702171 100644 --- a/tests/testthat/test-prepare-data.R +++ b/tests/testthat/test-prepare-data.R @@ -10,44 +10,45 @@ for(i in 1:n_polygons) { row <- ceiling(i/n_polygon_per_side) col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) } -polys <- do.call(raster::spPolygons, polygons) +polys <- lapply(polygons,sf::st_polygon) N <- floor(runif(n_polygons, min = 1, max = 100)) response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -spdf_na <- sp::SpatialPolygonsDataFrame(polys, response_na_df) -spdf_binom <- sp::SpatialPolygonsDataFrame(polys, response_binom_df) +spdf <- sf::st_sf(response_df, geometry = polys) +spdf_na <- sf::st_sf(response_na_df, geometry = polys) +spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) # Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) test_that("Check prepare_data function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - - result <- prepare_data(polygon_shapefile = spdf, + + result <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack) - + expect_is(result, 'disag_data') expect_equal(length(result), 10) - expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', + expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', 'aggregation_pixels', 'coordsForFit', 'coordsForPrediction', 'startendindex', 'mesh')) - expect_is(result$polygon_shapefile, 'SpatialPolygonsDataFrame') + expect_is(result$polygon_shapefile, 'sf') expect_is(result$shapefile_names, 'list') - expect_is(result$covariate_rasters, c('RasterBrick', 'RasterStack')) + expect_is(result$covariate_rasters, 'SpatRaster') expect_is(result$polygon_data, 'data.frame') expect_is(result$covariate_data, 'data.frame') expect_is(result$aggregation_pixels, 'numeric') @@ -58,25 +59,25 @@ test_that("Check prepare_data function works as expected", { expect_equal(sum(is.na(result$polygon_data$N)), length(result$polygon_data$N)) expect_equal(nrow(result$polygon_data), nrow(result$startendindex)) expect_equal(nrow(result$covariate_data), nrow(result$coordsForFit)) - + }) test_that("Check prepare_data function with sample size works as expected", { - + skip_on_cran() - - result <- prepare_data(polygon_shapefile = spdf_binom, + + result <- prepare_data(polygon_shapefile = spdf_binom, covariate_rasters = cov_stack, sample_size_var = 'sample_size', makeMesh = FALSE) - + expect_is(result, 'disag_data') expect_equal(length(result), 10) - expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', + expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', 'aggregation_pixels', 'coordsForFit', 'coordsForPrediction', 'startendindex', 'mesh')) - expect_is(result$polygon_shapefile, 'SpatialPolygonsDataFrame') + expect_is(result$polygon_shapefile, 'sf') expect_is(result$shapefile_names, 'list') - expect_is(result$covariate_rasters, c('RasterBrick', 'RasterStack')) + expect_is(result$covariate_rasters, 'SpatRaster') expect_is(result$polygon_data, 'data.frame') expect_is(result$covariate_data, 'data.frame') expect_is(result$aggregation_pixels, 'numeric') @@ -87,36 +88,36 @@ test_that("Check prepare_data function with sample size works as expected", { expect_equal(sum(is.na(result$polygon_data$N)), 0) expect_equal(nrow(result$polygon_data), nrow(result$startendindex)) expect_equal(nrow(result$covariate_data), nrow(result$coordsForFit)) - + }) test_that("Check prepare_data function deals with NAs as expected", { - + skip_on_cran() - + cov_stack_na <- cov_stack cov_stack_na[[1]][c(1:10)] <- NA - + aggregation_raster_na <- r aggregation_raster_na[c(1:10)] <- NA - + expect_error(prepare_data(polygon_shapefile = spdf_na, covariate_rasters = cov_stack, makeMesh = FALSE)) expect_error(prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack_na, makeMesh = FALSE)) expect_error(prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack, aggregation_raster = aggregation_raster_na, makeMesh = FALSE)) - - result <- prepare_data(polygon_shapefile = spdf_na, + + result <- prepare_data(polygon_shapefile = spdf_na, covariate_rasters = cov_stack_na, aggregation_raster = aggregation_raster_na, na.action = TRUE, makeMesh = FALSE) - + expect_is(result, 'disag_data') expect_equal(length(result), 10) - expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', + expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', 'aggregation_pixels', 'coordsForFit', 'coordsForPrediction', 'startendindex', 'mesh')) - expect_is(result$polygon_shapefile, 'SpatialPolygonsDataFrame') + expect_is(result$polygon_shapefile, 'sf') expect_is(result$shapefile_names, 'list') - expect_is(result$covariate_rasters, c('RasterBrick', 'RasterStack')) + expect_is(result$covariate_rasters, 'SpatRaster') expect_is(result$polygon_data, 'data.frame') expect_is(result$covariate_data, 'data.frame') expect_is(result$aggregation_pixels, 'numeric') @@ -133,43 +134,40 @@ test_that("Check prepare_data function deals with NAs as expected", { test_that("Check as.disag_data function works as expected", { - + skip_on_cran() - + polygon_data <- getPolygonData(spdf, id_var = 'area_id', response_var = 'response') - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - cov_data <- parallelExtract(cov_stack, spdf, fun = NULL, id = 'area_id') - parallel::stopCluster(cl) - foreach::registerDoSEQ() - + + cov_data <- terra::extract(cov_stack, spdf, cells=TRUE, na.rm=TRUE, ID=TRUE) + names(cov_data)[1] <- 'area_id' + aggregation_data <- rep(1, nrow(cov_data)) - + coordsForFit <- extractCoordsForMesh(cov_stack, cov_data$cellid) - + coordsForPrediction <- extractCoordsForMesh(cov_stack) - + startendindex <- getStartendindex(cov_data, polygon_data, 'area_id') - - result <- as.disag_data(spdf, + + result <- as.disag_data(spdf, list('area_id', 'response'), cov_stack, - polygon_data, - cov_data, + polygon_data, + cov_data, aggregation_data, - coordsForFit, + coordsForFit, coordsForPrediction, startendindex, mesh = NULL) - + expect_is(result, 'disag_data') expect_equal(length(result), 10) - expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', + expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', 'aggregation_pixels', 'coordsForFit', 'coordsForPrediction', 'startendindex', 'mesh')) - expect_is(result$polygon_shapefile, 'SpatialPolygonsDataFrame') + expect_is(result$polygon_shapefile, 'sf') expect_is(result$shapefile_names, 'list') - expect_is(result$covariate_rasters, c('RasterBrick', 'RasterStack')) + expect_is(result$covariate_rasters, 'SpatRaster') expect_is(result$polygon_data, 'data.frame') expect_is(result$covariate_data, 'data.frame') expect_is(result$aggregation_pixels, 'numeric') @@ -179,6 +177,6 @@ test_that("Check as.disag_data function works as expected", { expect_true(is.null(result$mesh)) expect_equal(nrow(result$polygon_data), nrow(result$startendindex)) expect_equal(nrow(result$covariate_data), nrow(result$coordsForFit)) - + }) From 0b83bd7c936406d3ca537a706752d23be751b88c Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Fri, 15 Sep 2023 17:00:28 +0100 Subject: [PATCH 138/168] start to migrate test-fit-model --- R/fit_model.R | 2 +- tests/testthat/test-fit-model.R | 340 +++++++++++++++-------------- tests/testthat/test-prepare-data.R | 2 - 3 files changed, 173 insertions(+), 171 deletions(-) diff --git a/R/fit_model.R b/R/fit_model.R index b1fff72..dba8bb0 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -358,7 +358,7 @@ make_model_object <- function(data, } # Construct sensible default field hyperpriors - limits <- sf::st_bbox(data$x) + limits <- sf::st_bbox(data$polygon_shapefile) hypotenuse <- sqrt((limits$xmax - limits$xmin)^2 + (limits$ymax - limits$ymin)^2) prior_rho <- hypotenuse/3 diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index 37a9b73..f1b76b4 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -1,168 +1,172 @@ - -context("Fitting model") - -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -} - -polys <- do.call(raster::spPolygons, polygons) -N <- floor(runif(n_polygons, min = 1, max = 100)) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) - -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -spdf_binom <- sp::SpatialPolygonsDataFrame(polys, response_binom_df) - -# Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) - -if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) -} else { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack, - makeMesh = FALSE) -} - -test_that("disag_model produces errors when expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - expect_error(disag_model(list())) - expect_error(disag_model(test_data, iterations = 'iterations')) - expect_error(disag_model(test_data, priors = list(polygon_sd_men = 0.3, polygon_sd_sd = 0.4))) - expect_error(disag_model(test_data, priors = c(polygon_sd_mean = 1.2))) - expect_error(disag_model(test_data, family = 'banana')) - expect_error(disag_model(test_data, link = 'apple')) - -}) - -test_that("disag_model behaves as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - result <- disag_model(test_data, iterations = 2, iid = FALSE) - - expect_is(result, 'disag_model') - expect_equal(length(result), 5) - expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 4) - expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) - - - -}) - - - - -test_that("disag_model with 1 covariate behaves as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - test_data2 <- test_data - test_data2$covariate_rasters <- test_data2$covariate_rasters[[1]] - test_data2$covariate_data <- test_data2$covariate_data[, 1:3] - - result <- disag_model(test_data2, iterations = 2, iid = FALSE) - - expect_is(result, 'disag_model') - expect_equal(length(result), 5) - - # Should be intercept, 1 slope, tau gaussian, and 2 for space. None for iid anymore. - expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 3) - expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) - - # Confirm only two covariates were fitted. - expect_equal(sum(names(result$opt$par) == 'slope'), 1) - -}) -test_that("user defined model setup is working as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - binom_data <- prepare_data(polygon_shapefile = spdf_binom, - covariate_rasters = cov_stack, - sample_size_var = 'sample_size') - - result2 <- disag_model(test_data, iterations = 2, field = FALSE, family = 'poisson', link = 'log') - result3 <- disag_model(binom_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit') - result4 <- disag_model(test_data, iterations = 2, field = FALSE, iid = FALSE, link = 'identity') - - expect_error(disag_model(test_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit')) - - expect_is(result2, 'disag_model') - expect_equal(length(result2), 5) - expect_equal(length(result2$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) - expect_equal(unique(names(result2$sd_out$par.random)), c("iideffect")) - expect_false(result2$model_setup$field) - expect_true(result2$model_setup$iid) - expect_equal(result2$model_setup$family, 'poisson') - expect_equal(result2$model_setup$link, 'log') - - expect_is(result3, 'disag_model') - expect_equal(length(result3), 5) - expect_equal(length(result3$sd_out$par.fixed), raster::nlayers(binom_data$covariate_rasters) + 3) - expect_equal(unique(names(result3$sd_out$par.random)), c("nodemean")) - expect_true(result3$model_setup$field) - expect_false(result3$model_setup$iid) - expect_equal(result3$model_setup$family, 'binomial') - expect_equal(result3$model_setup$link, 'logit') - - expect_is(result4, 'disag_model') - expect_equal(length(result4), 5) - expect_equal(length(result4$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) - expect_equal(unique(names(result4$sd_out$par.random)), NULL) - expect_false(result4$model_setup$field) - expect_false(result4$model_setup$iid) - expect_equal(result4$model_setup$family, 'gaussian') - expect_equal(result4$model_setup$link, 'identity') -}) - -test_that("make_model_object behaves as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - result <- make_model_object(test_data) - - expect_is(result, 'list') - expect_equal(sum(sapply(c("par", "fn", "gr", "report"), function(x) !(x %in% names(result)))), 0) - -}) - -test_that("setup_hess_control behaves as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - obj <- make_model_object(test_data) - - opt <- stats::nlminb(obj$par, obj$fn, obj$gr, control = list(iter.max = 2, trace = 0)) - - hess_control <- setup_hess_control(opt, hess_control_parscale = c(rep(c(0.9, 1.1), 3), 1), hess_control_ndeps = 1e-3) - - expect_is(hess_control, 'list') - expect_equal(length(hess_control$parscale), length(opt$par)) - expect_equal(length(hess_control$ndeps), length(opt$par)) - -}) - + +context("Fitting model") + +polygons <- list() +n_polygon_per_side <- 10 +n_polygons <- n_polygon_per_side * n_polygon_per_side +n_pixels_per_side <- n_polygon_per_side * 2 + +for(i in 1:n_polygons) { + row <- ceiling(i/n_polygon_per_side) + col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} + +polys <- lapply(polygons,sf::st_polygon) +N <- floor(runif(n_polygons, min = 1, max = 100)) +response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) +response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) +response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) + +spdf <- sf::st_sf(response_df, geometry = polys) +spdf_na <- sf::st_sf(response_na_df, geometry = polys) +spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) + +# Create raster stack +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) + + +if(identical(Sys.getenv("NOT_CRAN"), "true")) { + test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack) +} else { + test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack, + makeMesh = FALSE) +} + +test_that("disag_model produces errors when expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + expect_error(disag_model(list())) + expect_error(disag_model(test_data, iterations = 'iterations')) + expect_error(disag_model(test_data, priors = list(polygon_sd_men = 0.3, polygon_sd_sd = 0.4))) + expect_error(disag_model(test_data, priors = c(polygon_sd_mean = 1.2))) + expect_error(disag_model(test_data, family = 'banana')) + expect_error(disag_model(test_data, link = 'apple')) + +}) + +test_that("disag_model behaves as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + result <- disag_model(test_data, iterations = 2, iid = FALSE) + + expect_is(result, 'disag_model') + expect_equal(length(result), 5) + expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 4) + expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) + + + +}) + + + + +test_that("disag_model with 1 covariate behaves as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + test_data2 <- test_data + test_data2$covariate_rasters <- test_data2$covariate_rasters[[1]] + test_data2$covariate_data <- test_data2$covariate_data[, 1:3] + + result <- disag_model(test_data2, iterations = 2, iid = FALSE) + + expect_is(result, 'disag_model') + expect_equal(length(result), 5) + + # Should be intercept, 1 slope, tau gaussian, and 2 for space. None for iid anymore. + expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 3) + expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) + + # Confirm only two covariates were fitted. + expect_equal(sum(names(result$opt$par) == 'slope'), 1) + +}) +test_that("user defined model setup is working as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + binom_data <- prepare_data(polygon_shapefile = spdf_binom, + covariate_rasters = cov_stack, + sample_size_var = 'sample_size') + + result2 <- disag_model(test_data, iterations = 2, field = FALSE, family = 'poisson', link = 'log') + result3 <- disag_model(binom_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit') + result4 <- disag_model(test_data, iterations = 2, field = FALSE, iid = FALSE, link = 'identity') + + expect_error(disag_model(test_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit')) + + expect_is(result2, 'disag_model') + expect_equal(length(result2), 5) + expect_equal(length(result2$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) + expect_equal(unique(names(result2$sd_out$par.random)), c("iideffect")) + expect_false(result2$model_setup$field) + expect_true(result2$model_setup$iid) + expect_equal(result2$model_setup$family, 'poisson') + expect_equal(result2$model_setup$link, 'log') + + expect_is(result3, 'disag_model') + expect_equal(length(result3), 5) + expect_equal(length(result3$sd_out$par.fixed), raster::nlayers(binom_data$covariate_rasters) + 3) + expect_equal(unique(names(result3$sd_out$par.random)), c("nodemean")) + expect_true(result3$model_setup$field) + expect_false(result3$model_setup$iid) + expect_equal(result3$model_setup$family, 'binomial') + expect_equal(result3$model_setup$link, 'logit') + + expect_is(result4, 'disag_model') + expect_equal(length(result4), 5) + expect_equal(length(result4$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) + expect_equal(unique(names(result4$sd_out$par.random)), NULL) + expect_false(result4$model_setup$field) + expect_false(result4$model_setup$iid) + expect_equal(result4$model_setup$family, 'gaussian') + expect_equal(result4$model_setup$link, 'identity') +}) + +test_that("make_model_object behaves as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + result <- make_model_object(test_data) + + expect_is(result, 'list') + expect_equal(sum(sapply(c("par", "fn", "gr", "report"), function(x) !(x %in% names(result)))), 0) + +}) + +test_that("setup_hess_control behaves as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + obj <- make_model_object(test_data) + + opt <- stats::nlminb(obj$par, obj$fn, obj$gr, control = list(iter.max = 2, trace = 0)) + + hess_control <- setup_hess_control(opt, hess_control_parscale = c(rep(c(0.9, 1.1), 3), 1), hess_control_ndeps = 1e-3) + + expect_is(hess_control, 'list') + expect_equal(length(hess_control$parscale), length(opt$par)) + expect_equal(length(hess_control$ndeps), length(opt$par)) + +}) + diff --git a/tests/testthat/test-prepare-data.R b/tests/testthat/test-prepare-data.R index 3702171..6af1b94 100644 --- a/tests/testthat/test-prepare-data.R +++ b/tests/testthat/test-prepare-data.R @@ -17,11 +17,9 @@ for(i in 1:n_polygons) { polys <- lapply(polygons,sf::st_polygon) N <- floor(runif(n_polygons, min = 1, max = 100)) response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) spdf <- sf::st_sf(response_df, geometry = polys) -spdf_na <- sf::st_sf(response_na_df, geometry = polys) spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) # Create raster stack From 0a095c23960cbfca927c01ded343b674a43fef75 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 10 Oct 2023 14:24:35 +0100 Subject: [PATCH 139/168] more test migration, migrate to fmesher, update docs --- DESCRIPTION | 1 + NAMESPACE | 5 -- R/build_mesh.R | 5 +- R/predict.R | 32 +++++---- man/as.disag_data.Rd | 16 ++--- man/fit_model.Rd | 42 +++++------ man/getCovariateRasters.Rd | 4 +- man/getPolygonData.Rd | 27 +++---- man/getStartendindex.Rd | 6 +- man/make_model_object.Rd | 26 +++---- man/parallelExtract.Rd | 58 --------------- man/predict.disag_model.Rd | 16 ++--- man/predict_model.Rd | 8 +-- man/predict_uncertainty.Rd | 10 +-- man/prepare_data.Rd | 80 +++++++++++---------- tests/testthat/test-fit-model.R | 22 +++--- tests/testthat/test-predict-model.R | 106 ++++++++++++++-------------- tests/testthat/test-prepare-data.R | 2 + 18 files changed, 207 insertions(+), 259 deletions(-) delete mode 100644 man/parallelExtract.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 873a8bd..faddfb0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,6 +30,7 @@ Imports: ggplot2, cowplot, sparseMVN, + fmesher, terra, sf, utils diff --git a/NAMESPACE b/NAMESPACE index bd927c7..47705eb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,12 +24,7 @@ export(prepare_data) import(ggplot2) import(splancs) import(utils) -importFrom(doParallel,registerDoParallel) -importFrom(foreach,"%dopar%") -importFrom(parallel,makeCluster) -importFrom(parallel,stopCluster) importFrom(stats,cor) importFrom(stats,quantile) importFrom(stats,sd) useDynLib(disaggregation) - diff --git a/R/build_mesh.R b/R/build_mesh.R index 7ad5017..97a2bcc 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -63,17 +63,16 @@ build_mesh <- function(shapes, mesh.args = NULL) { pars[names(mesh.args)] <- mesh.args - #outline <- maptools::unionSpatialPolygons(shapes_old, IDs = rep(1, length(shapes_old))) outline <- sf::st_sf(sf::st_union(sf::st_convex_hull(shapes))) coords <- sf::st_coordinates(outline)[, c('X', 'Y')] - outline.hull <- INLA::inla.nonconvex.hull(coords, + outline.hull <- fmesher::fm_nonconvex_hull_inla(coords, convex = pars$convex, concave = pars$concave, resolution = pars$resolution) - mesh <- INLA::inla.mesh.2d( + mesh <- fmesher::fm_mesh_2d( boundary = outline.hull, max.edge = pars$max.edge, cut = pars$cut, diff --git a/R/predict.R b/R/predict.R index 0aeeedf..f27bf2b 100644 --- a/R/predict.R +++ b/R/predict.R @@ -236,7 +236,6 @@ getAmatrix <- function(mesh, coords) { return(Amatrix) } - # Helper to check and sort out new raster data. check_newdata <- function(newdata, model_output){ if(is.null(newdata)) return(NULL) @@ -288,10 +287,13 @@ setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { } if(predict_iid) { - tmp_shp <- model_output$data$x - tmp_shp <- dplyr::bind_cols(tmp_shp, - area_id = - factor(model_output$data$polygon_data$area_id)) + tmp_shp <- model_output$data$polygon_shapefile + #needed to avoid errors in testing + if (!("area_id" %in% names(model_output$data$polygon_shapefile))){ + tmp_shp <- dplyr::bind_cols(tmp_shp, + area_id = + factor(model_output$data$polygon_data$area_id)) + } shapefile_raster <- terra::rasterize(tmp_shp, model_output$data$covariate_rasters, field = 'area_id') @@ -330,13 +332,13 @@ predict_single_raster <- function(model_parameters, objects, link_function) { field <- (objects$field_objects$Amatrix %*% model_parameters$nodemean)[, 1] field_ras <- terra::rast(cbind(objects$field_objects$coords, field), type = 'xyz', - crs = crs(linear_pred)) + crs = terra::crs(linear_pred)) linear_pred <- linear_pred + field_ras } else { field_ras <- NULL } - if(!is.null(objects$iid_objects)) { + if(!is.null(objects$iid_objects)) { iid_ras <- objects$iid_objects$shapefile_raster iideffect_sd <- 1/sqrt(exp(model_parameters$iideffect_log_tau)) # todo @@ -344,22 +346,24 @@ predict_single_raster <- function(model_parameters, objects, link_function) { targetvals <- terra::values(objects$iid_objects$shapefile_raster, dataframe = FALSE, mat = FALSE) whichvals <- which(targetvals == objects$iid_objects$shapefile_ids[1, i]) - values(iid_ras)[whichvals] <- + terra::values(iid_ras)[whichvals] <- model_parameters$iideffect[i] - na_pixels <- which(is.na(values(iid_ras, dataframe = FALSE, mat = FALSE))) + na_pixels <- which(is.na(terra::values(iid_ras, dataframe = FALSE, mat = FALSE))) na_iid_values <- stats::rnorm(length(na_pixels), 0, iideffect_sd) - values(iid_ras)[na_pixels] <- na_iid_values + terra::values(iid_ras)[na_pixels] <- na_iid_values } if(terra::ext(iid_ras) != terra::ext(linear_pred)) { # Extent of prediction space is different to the original model. Keep any overlapping iid values but predict to the new extent raster_new_extent <- linear_pred - values(raster_new_extent) <- NA - # iid_ras <- terra::merge(iid_ras, raster_new_extent, ext = terra::ext(raster_new_extent)) + terra::values(raster_new_extent) <- NA + #iid_ras <- terra::merge(iid_ras, raster_new_extent, ext = terra::ext(raster_new_extent)) # NOt sure why we no longer need the ext argument + # SS - added a crop which I think does the same thing iid_ras <- terra::merge(iid_ras, raster_new_extent) - missing_pixels <- which(is.na(values(iid_ras, dataframe = FALSE, mat = FALSE))) + iid_ras <- terra::crop(iid_ras, raster_new_extent) + missing_pixels <- which(is.na(terra::values(iid_ras, dataframe = FALSE, mat = FALSE))) missing_iid_values <- stats::rnorm(length(missing_pixels), 0, iideffect_sd) - values(iid_ras)[missing_pixels] <- missing_iid_values + terra::values(iid_ras)[missing_pixels] <- missing_iid_values } linear_pred <- linear_pred + iid_ras } else { diff --git a/man/as.disag_data.Rd b/man/as.disag_data.Rd index 01903e3..c8e92f9 100644 --- a/man/as.disag_data.Rd +++ b/man/as.disag_data.Rd @@ -18,9 +18,7 @@ as.disag_data( ) } \arguments{ -\item{polygon_shapefile}{SpatialPolygonDataFrame containing the response data} - -\item{shapefile_names}{List of 2: polygon id variable name and response variable name from polygon_shapefile} +\item{shapefile_names}{List of 2: polygon id variable name and response variable name from x} \item{covariate_rasters}{RasterStack of covariates} @@ -30,20 +28,22 @@ as.disag_data( \item{aggregation_pixels}{vector with value of aggregation raster at each pixel} -\item{coordsForFit}{coordinates of the covariate data points within the polygons in polygon_shapefile} +\item{coordsForFit}{coordinates of the covariate data points within the polygons in x} \item{coordsForPrediction}{coordinates of the covariate data points in the whole raster extent} \item{startendindex}{matrix containing the start and end index for each polygon} \item{mesh}{inla.mesh object to use in the fit} + +\item{x}{SpatialPolygonDataFrame containing the response data} } \value{ -A list is returned of class \code{disag_data}. -The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. +A list is returned of class \code{disag_data}. +The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. The list of class \code{disag_data} contains: - \item{polygon_shapefile }{The SpatialPolygonDataFrame used as an input.} - \item{covariate_rasters }{The RasterStack used as an input.} + \item{x }{The SpatialPolygonDataFrame used as an input.} + \item{covariate_rasters }{The RasterStack used as an input.} \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} diff --git a/man/fit_model.Rd b/man/fit_model.Rd index c1dec35..6a6b2d3 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -46,28 +46,28 @@ disag_model( \item{iid}{logical. Flag the iid effect on or off} -\item{hess_control_parscale}{Argument to scale parameters during the calculation of the Hessian. +\item{hess_control_parscale}{Argument to scale parameters during the calculation of the Hessian. Must be the same length as the number of parameters. See \code{\link[stats]{optimHess}} for details.} -\item{hess_control_ndeps}{Argument to control step sizes during the calculation of the Hessian. -Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. -Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. +\item{hess_control_ndeps}{Argument to control step sizes during the calculation of the Hessian. +Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. +Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. See \code{\link[stats]{optimHess}} for details.} \item{silent}{logical. Suppress verbose output.} } \value{ -A list is returned of class \code{disag_model}. -The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. +A list is returned of class \code{disag_model}. +The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. The list of class \code{disag_model} contains: - \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} - \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} + \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} + \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} \item{sd_out }{The TMB object returned by \code{\link[TMB]{sdreport}}.} \item{data }{The \emph{disag_data} object used as an input to the model.} \item{model_setup }{A list of information on the model setup. Likelihood function (\emph{family}), link function(\emph{link}), logical: whether a field was used (\emph{field}) and logical: whether an iid effect was used (\emph{iid}).} } \description{ -\emph{fit_model} function takes a \emph{disag_data} object created by +\emph{fit_model} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} and performs a Bayesian disaggregation fit. } \details{ @@ -82,24 +82,24 @@ And then aggregates these predictions to the polygon level using the weighted su The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): \itemize{ - \item Gaussian: - If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where + \item Gaussian: + If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. - \item Binomial: + \item Binomial: For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. - \item Poisson: + \item Poisson: \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. } -Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field -are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field +Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field +are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect -The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. These are specified as strings. @@ -133,16 +133,16 @@ The \emph{silent} argument can be used to publish/suppress verbose output. Defau cl <- parallel::makeCluster(2) doParallel::registerDoParallel(cl) - test_data <- prepare_data(polygon_shapefile = spdf, + test_data <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_rasters) parallel::stopCluster(cl) foreach::registerDoSEQ() - + result <- fit_model(test_data, iterations = 2) } } \references{ -Nanda et al. (2023) disaggregation: An R Package for Bayesian +Nanda et al. (2023) disaggregation: An R Package for Bayesian Spatial Disaggregation Modeling. } diff --git a/man/getCovariateRasters.Rd b/man/getCovariateRasters.Rd index eeb6cc0..fcac9b0 100644 --- a/man/getCovariateRasters.Rd +++ b/man/getCovariateRasters.Rd @@ -14,10 +14,10 @@ getCovariateRasters(directory, file_pattern = ".tif$", shape) \item{shape}{An object with an extent that the rasters will be cropped to.} } \value{ -A RasterStack of the raster files in the directory +A multi-layered SpatRaster of the raster files in the directory } \description{ -Looks in a specified folder for raster files. Returns a RasterStack of the rasters cropped to the extent specified by the shape parameter. +Looks in a specified folder for raster files. Returns a multi-layered SpatRaster of the rasters cropped to the extent specified by the shape parameter. } \examples{ \dontrun{ diff --git a/man/getPolygonData.Rd b/man/getPolygonData.Rd index 3b57903..e6ec032 100644 --- a/man/getPolygonData.Rd +++ b/man/getPolygonData.Rd @@ -12,7 +12,7 @@ getPolygonData( ) } \arguments{ -\item{shape}{A SpatialPolygons object containing response data.} +\item{shape}{A sf object containing response data.} \item{id_var}{Name of column in shape object with the polygon id. Default 'area_id'.} @@ -22,27 +22,28 @@ getPolygonData( } \value{ A data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the -polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does +polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does not exist), this column will contain NAs. } \description{ Returns a data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the -polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does +polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does not exist), this column will contain NAs. } \examples{ { - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } +polygons <- list() +for(i in 1:100) { + row <- ceiling(i/10) + col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +polys <- lapply(polygons,sf::st_polygon) +response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +spdf <- sf::st_sf(response_df, geometry = polys) getPolygonData(spdf, id_var = 'area_id', response_var = 'response') } diff --git a/man/getStartendindex.Rd b/man/getStartendindex.Rd index 6848ada..e30350e 100644 --- a/man/getStartendindex.Rd +++ b/man/getStartendindex.Rd @@ -19,12 +19,12 @@ covariate data that corresponds to that polygon, the second column is the index covariate data that corresponds to that polygon. } \description{ -From the covariate data and polygon data, the function matches the polygon id between the two to find +From the covariate data and polygon data, the function matches the polygon id between the two to find which pixels from the covariate data are contained in each of the polygons. } \details{ -Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, -and another data.frame containing polygon data with a polygon id, response and sample size column (as returned +Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, +and another data.frame containing polygon data with a polygon id, response and sample size column (as returned by \code{getPolygonData} function). Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in diff --git a/man/make_model_object.Rd b/man/make_model_object.Rd index b040041..d855661 100644 --- a/man/make_model_object.Rd +++ b/man/make_model_object.Rd @@ -33,7 +33,7 @@ make_model_object( The TMB model object returned by \code{\link[TMB]{MakeADFun}}. } \description{ -\emph{make_model_object} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} +\emph{make_model_object} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} and creates a TMB model object to be used in fitting. } \details{ @@ -48,20 +48,20 @@ And then aggregates these predictions to the polygon level using the weighted su The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): \itemize{ - \item Gaussian: - If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where + \item Gaussian: + If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. - \item Binomial: + \item Binomial: For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. - \item Poisson: + \item Poisson: \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. } -Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field -are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field +Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field +are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect. The precise names and default values for these priors are: @@ -78,8 +78,8 @@ The precise names and default values for these priors are: \item prior_iideffect_sd_prob: 0.01 } -The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. These are specified as strings. @@ -113,12 +113,12 @@ The \emph{silent} argument can be used to publish/supress verbose output. Defaul cl <- parallel::makeCluster(2) doParallel::registerDoParallel(cl) - test_data <- prepare_data(polygon_shapefile = spdf, + test_data <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_rasters) parallel::stopCluster(cl) foreach::registerDoSEQ() - + result <- make_model_object(test_data) } - + } diff --git a/man/parallelExtract.Rd b/man/parallelExtract.Rd deleted file mode 100644 index 52be889..0000000 --- a/man/parallelExtract.Rd +++ /dev/null @@ -1,58 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/extract.R -\name{parallelExtract} -\alias{parallelExtract} -\title{Parallel extraction of raster stack by shape file.} -\usage{ -parallelExtract(raster, shape, fun = mean, id = "OBJECTID", ...) -} -\arguments{ -\item{raster}{A RasterBrick or RasterStack object.} - -\item{shape}{A SpatialPolygons object.} - -\item{fun}{The function used to aggregate the pixel data. If NULL, raw pixel data is returned.} - -\item{id}{Name of column in shape object to be used to bind an ID column to output.} - -\item{...}{Other arguments to raster::extract.} -} -\value{ -A data.frame with columns of polygon id, cell id (if fun = NULL) and a column for each raster in the stack -} -\description{ -Parallelisation is performed across rasters, not shapes. -So this function is only useful if you are extracting -data from many raster layers. -As the overhead for parallel computation in windows is high -it only makes sense to parallelise in this way. -} -\examples{ - \dontrun{ - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } - - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - - r <- raster::raster(ncol=20, nrow=20) - r <- raster::setExtent(r, raster::extent(spdf)) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2 <- raster::setExtent(r2, raster::extent(spdf)) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_rasters <- raster::stack(r, r2) - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - result <- parallelExtract(cov_rasters, spdf, fun = NULL, id = 'area_id') - parallel::stopCluster(cl) - foreach::registerDoSEQ() - } -} diff --git a/man/predict.disag_model.Rd b/man/predict.disag_model.Rd index a94b1ce..edea4ac 100644 --- a/man/predict.disag_model.Rd +++ b/man/predict.disag_model.Rd @@ -9,7 +9,7 @@ \arguments{ \item{object}{disag_model object returned by disag_model function.} -\item{newdata}{If NULL, predictions are made using the data in model_output. +\item{newdata}{If NULL, predictions are made using the data in model_output. If this is a raster stack or brick, predictions will be made over this data.} \item{predict_iid}{logical. If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE.} @@ -21,14 +21,14 @@ If this is a raster stack or brick, predictions will be made over this data.} \item{...}{Further arguments passed to or from other methods.} } \value{ -An object of class \emph{disag_prediction} which consists of a list of two objects: +An object of class \emph{disag_prediction} which consists of a list of two objects: \item{mean_prediction }{List of: \itemize{ \item \emph{prediction} Raster of mean predictions based. \item \emph{field} Raster of the field component of the linear predictor. \item \emph{iid} Raster of the iid component of the linear predictor. \item \emph{covariates} Raster of the covariate component of the linear predictor. - }} + }} \item{uncertainty_prediction: }{List of: \itemize{ \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. @@ -36,17 +36,17 @@ An object of class \emph{disag_prediction} which consists of a list of two objec }} } \description{ -\emph{predict.disag_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and +\emph{predict.disag_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and predicts mean and uncertainty maps. } \details{ -To predict over a different spatial extent to that used in the model, -a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +To predict over a different spatial extent to that used in the model, +a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. If this is not given predictions are made over the data used in the fit. -The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. +The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. -For the uncertainty calculations, the number of the realisations and the size of the confidence interval to be calculated +For the uncertainty calculations, the number of the realisations and the size of the confidence interval to be calculated are given by the arguments \emph{N} and \emph{CI} respectively. } \examples{ diff --git a/man/predict_model.Rd b/man/predict_model.Rd index 93a9647..3fbbc26 100644 --- a/man/predict_model.Rd +++ b/man/predict_model.Rd @@ -9,7 +9,7 @@ predict_model(model_output, newdata = NULL, predict_iid = FALSE) \arguments{ \item{model_output}{disag_model object returned by disag_model function} -\item{newdata}{If NULL, predictions are made using the data in model_output. +\item{newdata}{If NULL, predictions are made using the data in model_output. If this is a raster stack or brick, predictions will be made over this data. Default NULL.} \item{predict_iid}{If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE.} @@ -24,15 +24,15 @@ The mean prediction, which is a list of: } } \description{ -\emph{predict_model} function takes a \emph{disag_model} object created by +\emph{predict_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and predicts mean maps. } \details{ Function returns rasters of the mean predictions as well as the covariate and field contributions to the linear predictor. -To predict over a different spatial extent to that used in the model, -a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +To predict over a different spatial extent to that used in the model, +a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. If this is not given predictions are made over the data used in the fit. The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. diff --git a/man/predict_uncertainty.Rd b/man/predict_uncertainty.Rd index b51c09e..5297230 100644 --- a/man/predict_uncertainty.Rd +++ b/man/predict_uncertainty.Rd @@ -15,7 +15,7 @@ predict_uncertainty( \arguments{ \item{model_output}{disag_model object returned by disag_model function.} -\item{newdata}{If NULL, predictions are made using the data in model_output. +\item{newdata}{If NULL, predictions are made using the data in model_output. If this is a raster stack or brick, predictions will be made over this data. Default NULL.} \item{predict_iid}{If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE.} @@ -32,17 +32,17 @@ The uncertainty prediction, which is a list of: } } \description{ -\emph{predict_uncertainty} function takes a \emph{disag_model} object created by +\emph{predict_uncertainty} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and predicts upper and lower credible interval maps. } \details{ Function returns a RasterStack of the realisations as well as the upper and lower credible interval rasters. -To predict over a different spatial extent to that used in the model, -a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +To predict over a different spatial extent to that used in the model, +a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. If this is not given predictions are made over the data used in the fit. -The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. +The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. The number of the realisations and the size of the confidence interval to be calculated. are given by the arguments \emph{N} and \emph{CI} respectively. diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index e66a503..83792d2 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -18,8 +18,6 @@ prepare_data( ) } \arguments{ -\item{polygon_shapefile}{SpatialPolygonDataFrame containing at least two columns: one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}).} - \item{covariate_rasters}{RasterStack of covariate rasters to be used in the model.} \item{aggregation_raster}{Raster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used.} @@ -37,13 +35,15 @@ prepare_data( \item{makeMesh}{logical. If TRUE, build INLA mesh, takes some time. Default TRUE.} \item{ncores}{Number of cores used to perform covariate extraction.} + +\item{x}{sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}).} } \value{ -A list is returned of class \code{disag_data}. -The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. +A list is returned of class \code{disag_data}. +The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. The list of class \code{disag_data} contains: - \item{polygon_shapefile }{The SpatialPolygonDataFrame used as an input.} - \item{covariate_rasters }{The RasterStack used as an input.} + \item{x }{The sf object used as an input.} + \item{covariate_rasters }{The SpatRaster used as an input.} \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} @@ -53,57 +53,59 @@ The list of class \code{disag_data} contains: \item{mesh }{A INLA mesh to be used for the spatial field of the disaggregation model.} } \description{ -\emph{prepare_data} function is used to extract all the data required for fitting a disaggregation model. +\emph{prepare_data} function is used to extract all the data required for fitting a disaggregation model. Designed to be used in the \emph{disaggregation::fit_model} function. } \details{ -Takes a SpatialPolygonDataFrame with the response data and a RasterStack of covariates. +Takes a SpatialPolygonDataFrame with the response data and a RasterStack of covariates. -Extract the values of the covariates (as well as the aggregation raster, if given) at each pixel within the polygons -(\emph{parallelExtract} function). This is done in parallel and \emph{n.cores} argument is used to set the number of cores +Extract the values of the covariates (as well as the aggregation raster, if given) at each pixel within the polygons +(\emph{parallelExtract} function). This is done in parallel and \emph{n.cores} argument is used to set the number of cores to use for covariate extraction. This can be the number of covariates used in the model. -The aggregation raster defines how the pixels within each polygon are aggregated. +The aggregation raster defines how the pixels within each polygon are aggregated. The disaggregation model performs a weighted sum of the pixel prediction, weighted by the pixel values in the aggregation raster. -For disease incidence rate you use the population raster to aggregate pixel incidence rate by summing the number of cases -(rate weighted by population). If no aggregation raster is provided a uniform distribution is assumed, i.e. the pixel predictions +For disease incidence rate you use the population raster to aggregate pixel incidence rate by summing the number of cases +(rate weighted by population). If no aggregation raster is provided a uniform distribution is assumed, i.e. the pixel predictions are aggregated to polygon level by summing the pixel values. -Makes a matrix that contains the start and end pixel index for each polygon. Builds an INLA mesh to use for the spatial field +Makes a matrix that contains the start and end pixel index for each polygon. Builds an INLA mesh to use for the spatial field (\emph{getStartendindex} function). -The \emph{mesh.args} argument allows you to supply a list of INLA mesh parameters to control the mesh used for the spatial field +The \emph{mesh.args} argument allows you to supply a list of INLA mesh parameters to control the mesh used for the spatial field (\emph{build_mesh} function). -The \emph{na.action} flag is automatically off. If there are any NAs in the response or covariate data within the polygons the -\emph{prepare_data} method will error. Ideally the NAs in the data would be dealt with beforehand, however, setting na.action = TRUE -will automatically deal with NAs. It removes any polygons that have NAs as a response, sets any aggregation pixels with NA to zero +The \emph{na.action} flag is automatically off. If there are any NAs in the response or covariate data within the polygons the +\emph{prepare_data} method will error. Ideally the NAs in the data would be dealt with beforehand, however, setting na.action = TRUE +will automatically deal with NAs. It removes any polygons that have NAs as a response, sets any aggregation pixels with NA to zero and sets covariate NAs pixels to the median value for the that covariate. } \examples{ \donttest{ - polygons <- list() - for(i in 1:100) { +polygons <- list() +for(i in 1:100) { row <- ceiling(i/10) col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } - - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - - r <- raster::raster(ncol=20, nrow=20) - r <- raster::setExtent(r, raster::extent(spdf)) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2 <- raster::setExtent(r2, raster::extent(spdf)) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_rasters <- raster::stack(r, r2) - - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_rasters) -} - + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} + +polys <- lapply(polygons,sf::st_polygon) +response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +spdf <- sf::st_sf(response_df,geometry=polys) + +r <- terra::rast(nrow=20,ncol=20) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) + +r2 <- terra::rast(nrow=20,ncol=20) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) +cov_rasters <- c(r, r2) + +test_data <- prepare_data(x = spdf, + covariate_rasters = cov_rasters) +} + } diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index f1b76b4..9a46eed 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -62,11 +62,11 @@ test_that("disag_model behaves as expected", { skip_if_not_installed('INLA') skip_on_cran() - result <- disag_model(test_data, iterations = 2, iid = FALSE) + result <- disag_model(test_data, iterations = 100, iid = FALSE) expect_is(result, 'disag_model') expect_equal(length(result), 5) - expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 4) + expect_equal(length(result$sd_out$par.fixed), terra::nlyr(test_data$covariate_rasters) + 4) expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) @@ -85,13 +85,13 @@ test_that("disag_model with 1 covariate behaves as expected", { test_data2$covariate_rasters <- test_data2$covariate_rasters[[1]] test_data2$covariate_data <- test_data2$covariate_data[, 1:3] - result <- disag_model(test_data2, iterations = 2, iid = FALSE) + result <- disag_model(test_data2, iterations = 100, iid = FALSE) expect_is(result, 'disag_model') expect_equal(length(result), 5) # Should be intercept, 1 slope, tau gaussian, and 2 for space. None for iid anymore. - expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 3) + expect_equal(length(result$sd_out$par.fixed), terra::nlyr(test_data$covariate_rasters) + 3) expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) # Confirm only two covariates were fitted. @@ -107,15 +107,15 @@ test_that("user defined model setup is working as expected", { covariate_rasters = cov_stack, sample_size_var = 'sample_size') - result2 <- disag_model(test_data, iterations = 2, field = FALSE, family = 'poisson', link = 'log') - result3 <- disag_model(binom_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit') - result4 <- disag_model(test_data, iterations = 2, field = FALSE, iid = FALSE, link = 'identity') + result2 <- disag_model(test_data, iterations = 100, field = FALSE, family = 'poisson', link = 'log') + result3 <- disag_model(binom_data, iterations = 100, iid = FALSE, family = 'binomial', link = 'logit') + result4 <- disag_model(test_data, iterations = 100, field = FALSE, iid = FALSE, link = 'identity') - expect_error(disag_model(test_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit')) + expect_error(disag_model(test_data, iterations = 100, iid = FALSE, family = 'binomial', link = 'logit')) expect_is(result2, 'disag_model') expect_equal(length(result2), 5) - expect_equal(length(result2$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) + expect_equal(length(result2$sd_out$par.fixed), terra::nlyr(test_data$covariate_rasters) + 2) expect_equal(unique(names(result2$sd_out$par.random)), c("iideffect")) expect_false(result2$model_setup$field) expect_true(result2$model_setup$iid) @@ -124,7 +124,7 @@ test_that("user defined model setup is working as expected", { expect_is(result3, 'disag_model') expect_equal(length(result3), 5) - expect_equal(length(result3$sd_out$par.fixed), raster::nlayers(binom_data$covariate_rasters) + 3) + expect_equal(length(result3$sd_out$par.fixed), terra::nlyr(binom_data$covariate_rasters) + 3) expect_equal(unique(names(result3$sd_out$par.random)), c("nodemean")) expect_true(result3$model_setup$field) expect_false(result3$model_setup$iid) @@ -133,7 +133,7 @@ test_that("user defined model setup is working as expected", { expect_is(result4, 'disag_model') expect_equal(length(result4), 5) - expect_equal(length(result4$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) + expect_equal(length(result4$sd_out$par.fixed), terra::nlyr(test_data$covariate_rasters) + 2) expect_equal(unique(names(result4$sd_out$par.random)), NULL) expect_false(result4$model_setup$field) expect_false(result4$model_setup$iid) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index fab8f87..3bd32f5 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -9,21 +9,23 @@ for(i in 1:n_polygons) { row <- ceiling(i/n_polygon_per_side) col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) } -polys <- do.call(raster::spPolygons, polygons) +polys <- lapply(polygons,sf::st_polygon) +N <- floor(runif(n_polygons, min = 1, max = 100)) response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +spdf <- sf::st_sf(response_df, geometry = polys) # Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) if(identical(Sys.getenv("NOT_CRAN"), "true")) { test_data <- prepare_data(polygon_shapefile = spdf, @@ -63,18 +65,18 @@ test_that("Check predict.disag_model function works as expected", { expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) - expect_is(pred2$mean_prediction$prediction, 'Raster') - expect_is(pred2$mean_prediction$field, 'Raster') + expect_is(pred2$mean_prediction$prediction, 'SpatRaster') + expect_is(pred2$mean_prediction$field, 'SpatRaster') expect_true(is.null(pred2$mean_prediction$iid)) - expect_is(pred2$mean_prediction$covariates, 'Raster') + expect_is(pred2$mean_prediction$covariates, 'SpatRaster') expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) - expect_is(pred2$uncertainty_prediction$realisations, 'RasterStack') - expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') - expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 100) - expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) + expect_is(pred2$uncertainty_prediction$realisations, 'SpatRaster') + expect_is(pred2$uncertainty_prediction$predictions_ci, 'SpatRaster') + expect_equal(terra::nlyr(pred2$uncertainty_prediction$realisations), 100) + expect_equal(terra::nlyr(pred2$uncertainty_prediction$predictions_ci), 2) pred2 <- predict(result, predict_iid = TRUE, N = 10) @@ -85,18 +87,18 @@ test_that("Check predict.disag_model function works as expected", { expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) expect_equal(names(pred2$mean_prediction), c('prediction', 'field', 'iid', 'covariates')) - expect_is(pred2$mean_prediction$prediction, 'Raster') - expect_is(pred2$mean_prediction$field, 'Raster') - expect_is(pred2$mean_prediction$iid, 'Raster') - expect_is(pred2$mean_prediction$covariates, 'Raster') + expect_is(pred2$mean_prediction$prediction, 'SpatRaster') + expect_is(pred2$mean_prediction$field, 'SpatRaster') + expect_is(pred2$mean_prediction$iid, 'SpatRaster') + expect_is(pred2$mean_prediction$covariates, 'SpatRaster') expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) - expect_is(pred2$uncertainty_prediction$realisations, 'RasterStack') - expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') - expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 10) - expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) + expect_is(pred2$uncertainty_prediction$realisations, 'SpatRaster') + expect_is(pred2$uncertainty_prediction$predictions_ci, 'SpatRaster') + expect_equal(terra::nlyr(pred2$uncertainty_prediction$realisations), 10) + expect_equal(terra::nlyr(pred2$uncertainty_prediction$predictions_ci), 2) # For a model with no field or iid @@ -111,18 +113,18 @@ test_that("Check predict.disag_model function works as expected", { expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) - expect_is(pred2$mean_prediction$prediction, 'Raster') + expect_is(pred2$mean_prediction$prediction, 'SpatRaster') expect_true(is.null(pred2$mean_prediction$field)) expect_true(is.null(pred2$mean_prediction$iid)) - expect_is(pred2$mean_prediction$covariates, 'Raster') + expect_is(pred2$mean_prediction$covariates, 'SpatRaster') expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) - expect_is(pred2$uncertainty_prediction$realisations, 'RasterStack') - expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') - expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 100) - expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) + expect_is(pred2$uncertainty_prediction$realisations, 'SpatRaster') + expect_is(pred2$uncertainty_prediction$predictions_ci, 'SpatRaster') + expect_equal(terra::nlyr(pred2$uncertainty_prediction$realisations), 100) + expect_equal(terra::nlyr(pred2$uncertainty_prediction$predictions_ci), 2) }) @@ -145,7 +147,7 @@ test_that("Check predict.disag_model function works with newdata", { prior_iideffect_sd_max = 0.0001, prior_iideffect_sd_prob = 0.01)) - newdata <- raster::crop(raster::stack(r, r2), c(0, 10, 0, 10)) + newdata <- terra::crop(c(r, r2), c(0, 10, 0, 10)) pred1 <- predict(result) pred2 <- predict(result, newdata, predict_iid = TRUE, N = 5) @@ -156,21 +158,21 @@ test_that("Check predict.disag_model function works with newdata", { expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) expect_equal(names(pred2$mean_prediction), c('prediction', 'field', 'iid', 'covariates')) - expect_is(pred2$mean_prediction$prediction, 'Raster') + expect_is(pred2$mean_prediction$prediction, 'SpatRaster') expect_true(is.null(pred2$mean_prediction$field)) - expect_is(pred2$mean_prediction$iid, 'Raster') - expect_is(pred2$mean_prediction$covariates, 'Raster') + expect_is(pred2$mean_prediction$iid, 'SpatRaster') + expect_is(pred2$mean_prediction$covariates, 'SpatRaster') expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) - expect_is(pred2$uncertainty_prediction$realisations, 'RasterStack') - expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') - expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 5) - expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) + expect_is(pred2$uncertainty_prediction$realisations, 'SpatRaster') + expect_is(pred2$uncertainty_prediction$predictions_ci, 'SpatRaster') + expect_equal(terra::nlyr(pred2$uncertainty_prediction$realisations), 5) + expect_equal(terra::nlyr(pred2$uncertainty_prediction$predictions_ci), 2) - expect_false(identical(raster::extent(pred1$mean_prediction$prediction), raster::extent(pred2$mean_prediction$prediction))) - expect_false(identical(raster::extent(pred1$uncertainty_prediction$realisations), raster::extent(pred2$uncertainty_prediction$realisations))) + expect_false(identical(terra::ext(pred1$mean_prediction$prediction), terra::ext(pred2$mean_prediction$prediction))) + expect_false(identical(terra::ext(pred1$uncertainty_prediction$realisations), terra::ext(pred2$uncertainty_prediction$realisations))) }) @@ -181,13 +183,13 @@ test_that('Check that check_newdata works', { result <- disag_model(test_data, field = FALSE, iterations = 100) - newdata <- raster::crop(raster::stack(r, r2), c(0, 10, 0, 10)) + newdata <- terra::crop(c(r, r2), c(0, 10, 0, 10)) nd1 <- check_newdata(newdata, result) - expect_is(nd1, 'RasterBrick') + expect_is(nd1, 'SpatRaster') nn <- newdata[[1]] names(nn) <- 'extra_uneeded' - newdata2 <- raster::stack(newdata, nn) + newdata2 <- c(newdata, nn) expect_error(check_newdata(newdata2, result), NA) newdata3 <- newdata[[1]] @@ -226,7 +228,7 @@ test_that('Check that setup_objects works', { expect_is(objects$field_objects, 'list') expect_true(is.null(objects$iid_objects)) - newdata <- raster::crop(raster::stack(r, r2), c(0, 180, -90, 90)) + newdata <- terra::crop(c(r, r2), c(0, 180, -90, 90)) objects2 <- setup_objects(result, newdata) expect_is(objects2, 'list') @@ -276,10 +278,10 @@ test_that('Check that predict_single_raster works', { expect_is(pred2, 'list') expect_equal(length(pred2), 4) expect_equal(names(pred2), c('prediction', 'field', 'iid', 'covariates')) - expect_is(pred2$prediction, 'Raster') - expect_is(pred2$field, 'Raster') + expect_is(pred2$prediction, 'SpatRaster') + expect_is(pred2$field, 'SpatRaster') expect_true(is.null(pred2$iid)) - expect_is(pred2$covariates, 'Raster') + expect_is(pred2$covariates, 'SpatRaster') objects2 <- setup_objects(result, predict_iid = TRUE) @@ -290,10 +292,10 @@ test_that('Check that predict_single_raster works', { expect_is(pred2, 'list') expect_equal(length(pred2), 4) expect_equal(names(pred2), c('prediction', 'field', 'iid', 'covariates')) - expect_is(pred2$prediction, 'Raster') - expect_is(pred2$field, 'Raster') - expect_is(pred2$iid, 'Raster') - expect_is(pred2$covariates, 'Raster') + expect_is(pred2$prediction, 'SpatRaster') + expect_is(pred2$field, 'SpatRaster') + expect_is(pred2$iid, 'SpatRaster') + expect_is(pred2$covariates, 'SpatRaster') }) diff --git a/tests/testthat/test-prepare-data.R b/tests/testthat/test-prepare-data.R index 6af1b94..3702171 100644 --- a/tests/testthat/test-prepare-data.R +++ b/tests/testthat/test-prepare-data.R @@ -17,9 +17,11 @@ for(i in 1:n_polygons) { polys <- lapply(polygons,sf::st_polygon) N <- floor(runif(n_polygons, min = 1, max = 100)) response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) +response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) spdf <- sf::st_sf(response_df, geometry = polys) +spdf_na <- sf::st_sf(response_na_df, geometry = polys) spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) # Create raster stack From 4130016a543a06f811ab583bf25d8e6486c3da7d Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 10 Oct 2023 15:18:50 +0100 Subject: [PATCH 140/168] fix predict tests and remove some dependencies --- DESCRIPTION | 3 --- tests/testthat/test-predict-model.R | 13 +++++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index faddfb0..5f757ad 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,8 +20,6 @@ RoxygenNote: 7.2.3 Imports: foreach, sp, - parallel, - doParallel, splancs, Matrix, stats, @@ -37,7 +35,6 @@ Imports: Additional_repositories: https://inla.r-inla-download.org/R/stable Suggests: testthat, - INLA, knitr, rmarkdown, SpatialEpi diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index 3bd32f5..79b2960 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -27,6 +27,8 @@ terra::ext(r2) <- terra::ext(spdf) r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) cov_stack <- c(r, r2) +names(cov_stack) <- c('layer1', 'layer2') + if(identical(Sys.getenv("NOT_CRAN"), "true")) { test_data <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack) @@ -38,7 +40,6 @@ if(identical(Sys.getenv("NOT_CRAN"), "true")) { test_that("Check predict.disag_model function works as expected", { - skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, iterations = 1000, @@ -132,7 +133,6 @@ test_that("Check predict.disag_model function works as expected", { test_that("Check predict.disag_model function works with newdata", { - skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, field = FALSE, iid = TRUE, iterations = 100, @@ -148,6 +148,7 @@ test_that("Check predict.disag_model function works with newdata", { prior_iideffect_sd_prob = 0.01)) newdata <- terra::crop(c(r, r2), c(0, 10, 0, 10)) + names(newdata) <- c('layer1', 'layer2') pred1 <- predict(result) pred2 <- predict(result, newdata, predict_iid = TRUE, N = 5) @@ -178,17 +179,18 @@ test_that("Check predict.disag_model function works with newdata", { test_that('Check that check_newdata works', { - skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, field = FALSE, iterations = 100) newdata <- terra::crop(c(r, r2), c(0, 10, 0, 10)) + names(newdata) <- c('layer1', 'layer2') + nd1 <- check_newdata(newdata, result) expect_is(nd1, 'SpatRaster') nn <- newdata[[1]] - names(nn) <- 'extra_uneeded' + names(nn) <- 'extra_unneeded' newdata2 <- c(newdata, nn) expect_error(check_newdata(newdata2, result), NA) @@ -203,7 +205,6 @@ test_that('Check that check_newdata works', { test_that('Check that setup_objects works', { - skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, iterations = 100, @@ -229,6 +230,7 @@ test_that('Check that setup_objects works', { expect_true(is.null(objects$iid_objects)) newdata <- terra::crop(c(r, r2), c(0, 180, -90, 90)) + names(newdata) <- c('layer1', 'layer2') objects2 <- setup_objects(result, newdata) expect_is(objects2, 'list') @@ -249,7 +251,6 @@ test_that('Check that setup_objects works', { test_that('Check that predict_single_raster works', { - skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, iterations = 100, From aca877b332cab4b048c481e18a2a70f48516fee9 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 10 Oct 2023 15:37:56 +0100 Subject: [PATCH 141/168] move test data to helper_data --- tests/testthat/helper_data.R | 35 +++++++++++ tests/testthat/test-build-mesh.R | 24 +------- tests/testthat/test-extract.R | 41 ------------- tests/testthat/test-fit-model.R | 45 -------------- tests/testthat/test-plotting.R | 93 +++++++++------------------- tests/testthat/test-predict-model.R | 38 ------------ tests/testthat/test-prepare-data.R | 34 ----------- tests/testthat/test-summary.R | 94 +++++++++-------------------- 8 files changed, 96 insertions(+), 308 deletions(-) create mode 100644 tests/testthat/helper_data.R diff --git a/tests/testthat/helper_data.R b/tests/testthat/helper_data.R new file mode 100644 index 0000000..01f1f0c --- /dev/null +++ b/tests/testthat/helper_data.R @@ -0,0 +1,35 @@ +polygons <- list() +n_polygon_per_side <- 10 +n_polygons <- n_polygon_per_side * n_polygon_per_side +n_pixels_per_side <- n_polygon_per_side * 2 + +for(i in 1:n_polygons) { + row <- ceiling(i/n_polygon_per_side) + col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} + +polys <- lapply(polygons,sf::st_polygon) +N <- floor(runif(n_polygons, min = 1, max = 100)) +response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) +response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) +response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) + +spdf <- sf::st_sf(response_df, geometry = polys) +spdf_na <- sf::st_sf(response_na_df, geometry = polys) +spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) + +# Create raster stack +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) +names(cov_stack) <- c('layer1', 'layer2') + +test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack) diff --git a/tests/testthat/test-build-mesh.R b/tests/testthat/test-build-mesh.R index 97e1d98..8bd19a2 100644 --- a/tests/testthat/test-build-mesh.R +++ b/tests/testthat/test-build-mesh.R @@ -2,26 +2,8 @@ context("Build mesh") test_that("build_mesh behaves as expected", { - - skip_if_not_installed('INLA') + skip_on_cran() - - polygons <- list() - n_polygon_per_side <- 10 - n_polygons <- n_polygon_per_side * n_polygon_per_side - n_pixels_per_side <- n_polygon_per_side * 2 - - for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } - - polys <- do.call(raster::spPolygons, polygons) - - response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) my_mesh <- build_mesh(spdf) @@ -29,5 +11,5 @@ test_that("build_mesh behaves as expected", { expect_error(build_mesh(spdf, mesh.args = c(4, 8))) expect_is(my_mesh, 'inla.mesh') expect_is(build_mesh(spdf, mesh.args = list(max.edge = c(50, 100))), 'inla.mesh') - -}) \ No newline at end of file + +}) diff --git a/tests/testthat/test-extract.R b/tests/testthat/test-extract.R index 8a0e253..74e16f5 100644 --- a/tests/testthat/test-extract.R +++ b/tests/testthat/test-extract.R @@ -1,36 +1,6 @@ context("Extract covariates and polygon data") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), - c(ymax, ymax, ymin, ymin, ymax))) -} - -polys <- lapply(polygons,sf::st_polygon) -N <- floor(runif(n_polygons, min = 1, max = 100)) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) - -spdf <- sf::st_sf(response_df, geometry = polys) -spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) - -# Create raster stack -r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r) <- terra::ext(spdf) -r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r2) <- terra::ext(spdf) -r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- c(r, r2) - test_that("getPolygonData function", { skip_on_cran() @@ -64,11 +34,6 @@ test_that("getCovariateData function gives errors when it should", { expect_error(getCovariateRasters('/home/rasters', '.tif$', spdf)) # Save .tif files in tempdir() - r <- terra::rast(ncol=20, nrow=20) - r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) - r2 <- terra::rast(ncol=20, nrow=20) - r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_stack <- c(r, r2) terra::writeRaster(r, paste0(tempdir(), '/cov1.tif'), overwrite = TRUE) terra::writeRaster(r2, paste0(tempdir(), '/cov2.tif'), overwrite = TRUE) @@ -80,12 +45,6 @@ test_that("extractCoordsForMesh function behaves as it should", { skip_on_cran() - # cl <- parallel::makeCluster(2) - # doParallel::registerDoParallel(cl) - # cov_data <- parallelExtract(cov_stack, spdf, fun = NULL, id = ) - # parallel::stopCluster(cl) - # foreach::registerDoSEQ() - cov_data <- terra::extract(cov_stack, spdf, cells=TRUE, na.rm=TRUE, ID=TRUE) names(cov_data)[1] <- 'area_id' diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index 9a46eed..4c7a384 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -1,51 +1,8 @@ context("Fitting model") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), - c(ymax, ymax, ymin, ymin, ymax))) -} - -polys <- lapply(polygons,sf::st_polygon) -N <- floor(runif(n_polygons, min = 1, max = 100)) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) -response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) - -spdf <- sf::st_sf(response_df, geometry = polys) -spdf_na <- sf::st_sf(response_na_df, geometry = polys) -spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) - -# Create raster stack -r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r) <- terra::ext(spdf) -r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r2) <- terra::ext(spdf) -r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- c(r, r2) - - -if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) -} else { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack, - makeMesh = FALSE) -} - test_that("disag_model produces errors when expected", { - skip_if_not_installed('INLA') skip_on_cran() expect_error(disag_model(list())) @@ -59,7 +16,6 @@ test_that("disag_model produces errors when expected", { test_that("disag_model behaves as expected", { - skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, iterations = 100, iid = FALSE) @@ -70,7 +26,6 @@ test_that("disag_model behaves as expected", { expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) - }) diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R index 7e4e567..e4f32f6 100644 --- a/tests/testthat/test-plotting.R +++ b/tests/testthat/test-plotting.R @@ -1,111 +1,74 @@ context("Plotting data") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -} - -polys <- do.call(raster::spPolygons, polygons) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_df2 <- data.frame(area_id = 1:n_polygons, n_positive = runif(n_polygons, min = 0, max = 1), sample_size = floor(runif(n_polygons, min = 1, max = 100))) -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -spdf2 <- sp::SpatialPolygonsDataFrame(polys, response_df2) - -# Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) - -if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) -} else { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack, - makeMesh = FALSE) -} - test_that("Check plot_polygon_data function works as expected", { - + skip_on_cran() - + p <- plot_polygon_data(spdf, list(id_var = 'area_id', response_var = 'response')) expect_error(plot_polygon_data(polys, list(id_var = 'area_id', response_var = 'response'))) expect_is(p, 'ggplot') - + p2 <- plot_polygon_data(spdf2, list(id_var = 'area_id', response_var = 'n_positive')) expect_is(p2, 'ggplot') - + }) test_that("Check plot.disag.data function works as expected", { - - skip_if_not_installed('INLA') + skip_on_cran() - - test_data2 <- prepare_data(polygon_shapefile = spdf2, + + test_data2 <- prepare_data(polygon_shapefile = spdf2, covariate_rasters = cov_stack, response_var = 'n_positive') - + p <- plot(test_data) - + expect_is(p, 'list') expect_equal(length(p), 3) expect_equal(names(p), c('polygon', 'covariates', 'mesh')) - + p2 <- plot(test_data2) - + expect_is(p2, 'list') expect_equal(length(p2), 3) expect_equal(names(p2), c('polygon', 'covariates', 'mesh')) - + p3 <- plot(test_data, which = c(1,3)) - + expect_is(p3, 'list') expect_equal(length(p3), 2) expect_equal(names(p3), c('polygon', 'mesh')) - + }) test_that("Check plot.disag_model function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - + fit_result <- disag_model(test_data, iterations = 10) - + fit_result_nofield <- disag_model(test_data, iterations = 10, field = FALSE) - + p1 <- plot(fit_result) - + p2 <- plot(fit_result_nofield) - + expect_is(p1, 'list') expect_equal(length(p1), 2) - + expect_is(p2, 'list') expect_equal(length(p2), 2) - - + + }) test_that("Check plot.disag_prediction function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - - + + fit_result <- disag_model(test_data, iterations = 1000, iid = TRUE, field = TRUE, @@ -123,8 +86,8 @@ test_that("Check plot.disag_prediction function works as expected", { prior_iideffect_sd_prob = 0.01)) pred <- predict(fit_result) p <- plot(pred) - + expect_is(p, 'trellis') - + }) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index 79b2960..ae95e13 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -1,43 +1,5 @@ context("Predict model") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), - c(ymax, ymax, ymin, ymin, ymax))) -} - -polys <- lapply(polygons,sf::st_polygon) -N <- floor(runif(n_polygons, min = 1, max = 100)) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -spdf <- sf::st_sf(response_df, geometry = polys) - -# Create raster stack -r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r) <- terra::ext(spdf) -r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r2) <- terra::ext(spdf) -r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- c(r, r2) - -names(cov_stack) <- c('layer1', 'layer2') - -if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) -} else { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack, - makeMesh = FALSE) -} - test_that("Check predict.disag_model function works as expected", { skip_on_cran() diff --git a/tests/testthat/test-prepare-data.R b/tests/testthat/test-prepare-data.R index 3702171..bef721e 100644 --- a/tests/testthat/test-prepare-data.R +++ b/tests/testthat/test-prepare-data.R @@ -1,39 +1,5 @@ - context("Preparing data") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), - c(ymax, ymax, ymin, ymin, ymax))) -} - -polys <- lapply(polygons,sf::st_polygon) -N <- floor(runif(n_polygons, min = 1, max = 100)) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) -response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) - -spdf <- sf::st_sf(response_df, geometry = polys) -spdf_na <- sf::st_sf(response_na_df, geometry = polys) -spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) - -# Create raster stack -r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r) <- terra::ext(spdf) -r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r2) <- terra::ext(spdf) -r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- c(r, r2) - - test_that("Check prepare_data function works as expected", { skip_if_not_installed('INLA') diff --git a/tests/testthat/test-summary.R b/tests/testthat/test-summary.R index c913ecf..56961be 100644 --- a/tests/testthat/test-summary.R +++ b/tests/testthat/test-summary.R @@ -1,45 +1,11 @@ context("Summary functions") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -} - -polys <- do.call(raster::spPolygons, polygons) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - -# Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) - -if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) -} else { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack, - makeMesh = FALSE) -} - test_that("Check summary.disag_data function works as expected", { - + skip_on_cran() - + data_summary <- summary(test_data) - + expect_is(data_summary, 'list') expect_equal(length(data_summary), 3) expect_equal(names(data_summary), c('number_polygons', 'number_covariates', 'covariate_summary')) @@ -47,29 +13,29 @@ test_that("Check summary.disag_data function works as expected", { expect_is(data_summary$number_covariates, 'integer') expect_is(data_summary$covariate_summary, 'table') expect_equal(ncol(data_summary$covariate_summary), data_summary$number_covariates) - + }) test_that("Check print.disag_data function works as expected", { - + skip_on_cran() - + print_output <- print(test_data) - + expect_is(print_output, 'disag_data') expect_equal(print_output, test_data) - + }) test_that("Check summary.disag_model function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, field = FALSE, iterations = 2) - + model_summary <- summary(result) - + expect_is(model_summary, 'list') expect_equal(length(model_summary), 3) expect_equal(names(model_summary), c('model_params', 'nll', 'metrics')) @@ -77,28 +43,28 @@ test_that("Check summary.disag_model function works as expected", { expect_is(model_summary$nll, 'numeric') expect_is(model_summary$metrics, 'data.frame') expect_equal(dim(model_summary$metrics), c(1, 5)) - + }) test_that("Check print.disag_model function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, field = FALSE, iterations = 2) - + print_output <- print(result) - + expect_is(print_output, 'disag_model') expect_equal(print_output, result) - + }) test_that("Check summary.disag_predictions function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, iid = FALSE, iterations = 100, list(priormean_intercept = 0, priorsd_intercept = 0.1, @@ -110,25 +76,25 @@ test_that("Check summary.disag_predictions function works as expected", { prior_sigma_prob = 0.01, prior_iideffect_sd_max = 0.0001, prior_iideffect_sd_prob = 0.01)) - + pred <- predict(result) - + model_summary <- summary(pred) - + expect_is(model_summary, 'list') expect_equal(length(model_summary), 3) expect_equal(names(model_summary), c('number_realisations', 'range_mean_values', 'range_iqr_values')) expect_is(model_summary$number_realisations, 'integer') expect_is(model_summary$range_mean_values, 'numeric') expect_is(model_summary$range_iqr_values, 'numeric') - + }) test_that("Check print.disag_predictions function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, iid = FALSE, iterations = 100, list(priormean_intercept = 0, priorsd_intercept = 0.1, @@ -140,12 +106,12 @@ test_that("Check print.disag_predictions function works as expected", { prior_sigma_prob = 0.01, prior_iideffect_sd_max = 0.0001, prior_iideffect_sd_prob = 0.01)) - + pred <- predict(result) - + print_output <- print(pred) - + expect_is(print_output, 'disag_prediction') expect_equal(print_output, pred) - -}) \ No newline at end of file + +}) From b41253766c49ac68f7ff82004b16b24ddd854dbf Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 10 Oct 2023 15:57:19 +0100 Subject: [PATCH 142/168] update examples and docs. remove sp from imports --- DESCRIPTION | 1 - R/build_mesh.R | 6 +- R/fit_model.R | 106 +++++++++++++++++++---------------- man/build_mesh.Rd | 28 ++++----- man/fit_model.Rd | 53 ++++++++++-------- man/make_model_object.Rd | 53 ++++++++++-------- tests/testthat/helper_data.R | 2 +- 7 files changed, 134 insertions(+), 115 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5f757ad..21731a5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,6 @@ LazyData: true RoxygenNote: 7.2.3 Imports: foreach, - sp, splancs, Matrix, stats, diff --git a/R/build_mesh.R b/R/build_mesh.R index 97a2bcc..1a14713 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -29,11 +29,11 @@ #' row <- ceiling(i/10) #' col <- ifelse(i %% 10 != 0, i %% 10, 10) #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), -#' c(xmin,ymin), c(xmin, ymax)) +#' polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), +#' c(ymax, ymax, ymin, ymin, ymax))) #' } #' -#' polys <- sf::st_sfc(sf::st_polygon(polygons)) +#' polys <- lapply(polygons, sf::st_polygon) #' response_df <- data.frame(area_id = 1:100, #' response = runif(100, min = 0, max = 10)) #' spdf <- sf::st_sf(polys, response_df) diff --git a/R/fit_model.R b/R/fit_model.R index dba8bb0..5fc7966 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -72,32 +72,37 @@ #' #' @examples #' \dontrun{ -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' polygons <- list() +#' n_polygon_per_side <- 10 +#' n_polygons <- n_polygon_per_side * n_polygon_per_side +#' n_pixels_per_side <- n_polygon_per_side * 2 +#' +#' for(i in 1:n_polygons) { +#' row <- ceiling(i/n_polygon_per_side) +#' col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } +#' polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), +#' c(ymax, ymax, ymin, ymin, ymax))) +#' } +#' +#' polys <- lapply(polygons,sf::st_polygon) +#' N <- floor(runif(n_polygons, min = 1, max = 100)) +#' response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) +#' +#' spdf <- sf::st_sf(response_df, geometry = polys) +#' +#' # Create raster stack +#' r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +#' terra::ext(r) <- terra::ext(spdf) +#' r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +#' r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +#' terra::ext(r2) <- terra::ext(spdf) +#' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +#' cov_stack <- c(r, r2) +#' names(cov_stack) <- c('layer1', 'layer2') #' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' r <- raster::raster(ncol=20, nrow=20) -#' r <- raster::setExtent(r, raster::extent(spdf)) -#' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) -#' r2 <- raster::raster(ncol=20, nrow=20) -#' r2 <- raster::setExtent(r2, raster::extent(spdf)) -#' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) -#' cov_rasters <- raster::stack(r, r2) -#' -#' cl <- parallel::makeCluster(2) -#' doParallel::registerDoParallel(cl) -#' test_data <- prepare_data(polygon_shapefile = spdf, -#' covariate_rasters = cov_rasters) -#' parallel::stopCluster(cl) -#' foreach::registerDoSEQ() +#' test_data <- prepare_data(polygon_shapefile = spdf, +#' covariate_rasters = cov_stack) #' #' result <- fit_model(test_data, iterations = 2) #' } @@ -264,32 +269,37 @@ disag_model <- function(data, #' #' @examples #' \dontrun{ -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' polygons <- list() +#' n_polygon_per_side <- 10 +#' n_polygons <- n_polygon_per_side * n_polygon_per_side +#' n_pixels_per_side <- n_polygon_per_side * 2 +#' +#' for(i in 1:n_polygons) { +#' row <- ceiling(i/n_polygon_per_side) +#' col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } +#' polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), +#' c(ymax, ymax, ymin, ymin, ymax))) +#' } +#' +#' polys <- lapply(polygons,sf::st_polygon) +#' N <- floor(runif(n_polygons, min = 1, max = 100)) +#' response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) +#' +#' spdf <- sf::st_sf(response_df, geometry = polys) +#' +#' # Create raster stack +#' r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +#' terra::ext(r) <- terra::ext(spdf) +#' r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +#' r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +#' terra::ext(r2) <- terra::ext(spdf) +#' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +#' cov_stack <- c(r, r2) +#' names(cov_stack) <- c('layer1', 'layer2') #' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' r <- raster::raster(ncol=20, nrow=20) -#' r <- raster::setExtent(r, raster::extent(spdf)) -#' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) -#' r2 <- raster::raster(ncol=20, nrow=20) -#' r2 <- raster::setExtent(r2, raster::extent(spdf)) -#' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) -#' cov_rasters <- raster::stack(r, r2) -#' -#' cl <- parallel::makeCluster(2) -#' doParallel::registerDoParallel(cl) -#' test_data <- prepare_data(polygon_shapefile = spdf, -#' covariate_rasters = cov_rasters) -#' parallel::stopCluster(cl) -#' foreach::registerDoSEQ() +#' test_data <- prepare_data(polygon_shapefile = spdf, +#' covariate_rasters = cov_stack) #' #' result <- make_model_object(test_data) #' } diff --git a/man/build_mesh.Rd b/man/build_mesh.Rd index 5e17944..fb0df25 100644 --- a/man/build_mesh.Rd +++ b/man/build_mesh.Rd @@ -1,4 +1,3 @@ -<<<<<<< HEAD % Generated by roxygen2: do not edit by hand % Please edit documentation in R/build_mesh.R \name{build_mesh} @@ -8,7 +7,7 @@ build_mesh(shapes, mesh.args = NULL) } \arguments{ -\item{shapes}{shapefile covering the region under investigation.} +\item{shapes}{sf covering the region under investigation.} \item{mesh.args}{list of parameters that control the mesh structure. \emph{convex}, \emph{concave} and \emph{resolution}, to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, @@ -34,20 +33,21 @@ pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, } \examples{ \dontrun{ - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } +polygons <- list() +for(i in 1:14) { + row <- ceiling(i/10) + col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row +polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +polys <- lapply(polygons, sf::st_polygon) +response_df <- data.frame(area_id = 1:100, + response = runif(100, min = 0, max = 10)) +spdf <- sf::st_sf(polys, response_df) - my_mesh <- build_mesh(spdf) +my_mesh <- build_mesh(spdf) } } - diff --git a/man/fit_model.Rd b/man/fit_model.Rd index 6a6b2d3..f06c963 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -111,32 +111,37 @@ The \emph{silent} argument can be used to publish/suppress verbose output. Defau } \examples{ \dontrun{ - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) +polygons <- list() +n_polygon_per_side <- 10 +n_polygons <- n_polygon_per_side * n_polygon_per_side +n_pixels_per_side <- n_polygon_per_side * 2 + +for(i in 1:n_polygons) { + row <- ceiling(i/n_polygon_per_side) + col <- ifelse(i \%\% n_polygon_per_side != 0, i \%\% n_polygon_per_side, n_polygon_per_side) xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} + +polys <- lapply(polygons,sf::st_polygon) +N <- floor(runif(n_polygons, min = 1, max = 100)) +response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) + +spdf <- sf::st_sf(response_df, geometry = polys) + +# Create raster stack +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x \%\% n_pixels_per_side != 0, x \%\% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) +names(cov_stack) <- c('layer1', 'layer2') - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - - r <- raster::raster(ncol=20, nrow=20) - r <- raster::setExtent(r, raster::extent(spdf)) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2 <- raster::setExtent(r2, raster::extent(spdf)) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_rasters <- raster::stack(r, r2) - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_rasters) - parallel::stopCluster(cl) - foreach::registerDoSEQ() +test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack) result <- fit_model(test_data, iterations = 2) } diff --git a/man/make_model_object.Rd b/man/make_model_object.Rd index d855661..1f12a97 100644 --- a/man/make_model_object.Rd +++ b/man/make_model_object.Rd @@ -91,32 +91,37 @@ The \emph{silent} argument can be used to publish/supress verbose output. Defaul } \examples{ \dontrun{ - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) +polygons <- list() +n_polygon_per_side <- 10 +n_polygons <- n_polygon_per_side * n_polygon_per_side +n_pixels_per_side <- n_polygon_per_side * 2 + +for(i in 1:n_polygons) { + row <- ceiling(i/n_polygon_per_side) + col <- ifelse(i \%\% n_polygon_per_side != 0, i \%\% n_polygon_per_side, n_polygon_per_side) xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} + +polys <- lapply(polygons,sf::st_polygon) +N <- floor(runif(n_polygons, min = 1, max = 100)) +response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) + +spdf <- sf::st_sf(response_df, geometry = polys) + +# Create raster stack +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x \%\% n_pixels_per_side != 0, x \%\% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) +names(cov_stack) <- c('layer1', 'layer2') - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - - r <- raster::raster(ncol=20, nrow=20) - r <- raster::setExtent(r, raster::extent(spdf)) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2 <- raster::setExtent(r2, raster::extent(spdf)) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_rasters <- raster::stack(r, r2) - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_rasters) - parallel::stopCluster(cl) - foreach::registerDoSEQ() +test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack) result <- make_model_object(test_data) } diff --git a/tests/testthat/helper_data.R b/tests/testthat/helper_data.R index 01f1f0c..b3aa456 100644 --- a/tests/testthat/helper_data.R +++ b/tests/testthat/helper_data.R @@ -11,7 +11,7 @@ for(i in 1:n_polygons) { c(ymax, ymax, ymin, ymin, ymax))) } -polys <- lapply(polygons,sf::st_polygon) +polys <- lapply(polygons, sf::st_polygon) N <- floor(runif(n_polygons, min = 1, max = 100)) response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) From 6ad196c926cc10eb467fcfe5e080dd526e4a1632 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Wed, 11 Oct 2023 10:42:10 +0100 Subject: [PATCH 143/168] partially migrate plotting (not functional yet) --- R/plotting.R | 4 ++-- tests/testthat/helper_data.R | 2 ++ tests/testthat/test-plotting.R | 2 -- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index e87f645..2a0a568 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -21,7 +21,7 @@ plot.disag_data <- function(x, which = c(1,2,3), ...) { titles <- c() if(1 %in% which) { - plots$polygon <- plot_polygon_data(x$x, x$shapefile_names) + plots$polygon <- plot_polygon_data(x$polygon_shapefile, x$shapefile_names) titles <- c(titles, 'Polygon response data') } @@ -155,7 +155,7 @@ plot.disag_prediction <- function(x, ...) { plot_polygon_data <- function(x, names) { # Rename the response variable for plotting - shp <- sf::st_as_sf(x) + shp <- x shp <- dplyr::rename(shp, 'response' = names$response_var) shp <- dplyr::rename(shp, 'area_id' = names$id_var) diff --git a/tests/testthat/helper_data.R b/tests/testthat/helper_data.R index b3aa456..89deef8 100644 --- a/tests/testthat/helper_data.R +++ b/tests/testthat/helper_data.R @@ -16,10 +16,12 @@ N <- floor(runif(n_polygons, min = 1, max = 100)) response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) +response_df2 <- data.frame(area_id = 1:n_polygons, n_positive = runif(n_polygons, min = 0, max = 1), sample_size = floor(runif(n_polygons, min = 1, max = 100))) spdf <- sf::st_sf(response_df, geometry = polys) spdf_na <- sf::st_sf(response_na_df, geometry = polys) spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) +spdf2 <- sf::st_sf(response_df2, geometry = polys) # Create raster stack r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R index e4f32f6..cb55e51 100644 --- a/tests/testthat/test-plotting.R +++ b/tests/testthat/test-plotting.R @@ -43,7 +43,6 @@ test_that("Check plot.disag.data function works as expected", { test_that("Check plot.disag_model function works as expected", { - skip_if_not_installed('INLA') skip_on_cran() fit_result <- disag_model(test_data, iterations = 10) @@ -65,7 +64,6 @@ test_that("Check plot.disag_model function works as expected", { test_that("Check plot.disag_prediction function works as expected", { - skip_if_not_installed('INLA') skip_on_cran() From 793077b42641acd53de4805775f53110b7ad82e9 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Wed, 11 Oct 2023 12:17:10 +0100 Subject: [PATCH 144/168] migrate summary --- R/summary.R | 152 ++++++++++++++++++++++++++-------------------------- 1 file changed, 76 insertions(+), 76 deletions(-) diff --git a/R/summary.R b/R/summary.R index 87ce78c..2c34c07 100644 --- a/R/summary.R +++ b/R/summary.R @@ -1,28 +1,28 @@ #' Summary function for disaggregation fit result -#' +#' #' Function that summarises the result of the fit from the disaggregation model. -#' +#' #' Prints the negative log likelihood, model parameters and calculates metrics from in-sample performance. #' #' @param object Object returned from disag_model. #' @param ... Further arguments to \emph{summary} function. -#' +#' #' @return A list of the model parameters, negative log likelihood and metrics from in-sample performance. -#' +#' #' @method summary disag_model -#' +#' #' @export #' @importFrom stats cor quantile sd summary.disag_model <- function(object, ...) { - + pred <- obs <- NULL - + model_params <- summary(object$sd_out, select = 'fixed') - + report <- object$obj$report() nll <- report$nll - + # Form of the observed and predicted results depends on the likelihood function used if(object$model_setup$family == 'gaussian') { observed_data = report$polygon_response_data/report$reportnormalisation @@ -34,165 +34,165 @@ summary.disag_model <- function(object, ...) { observed_data = report$polygon_response_data predicted_data = report$reportprediction_cases } - + in_sample <- data.frame(obs = observed_data, pred = predicted_data) in_sample_reduced <- in_sample[!is.na(in_sample$pred), ] - metrics <- dplyr::summarise(in_sample_reduced, + metrics <- dplyr::summarise(in_sample_reduced, RMSE = sqrt(mean((pred - obs) ^ 2)), MAE = mean(abs(pred - obs)), pearson = cor(pred, obs, method = 'pearson'), spearman = cor(pred, obs, method = 'spearman'), log_pearson = cor(log1p(pred), log1p(obs), method = 'pearson')) - + cat(paste('Likelihood function:', object$model_setup$family, '\n')) cat(paste('Link function:', object$model_setup$link, '\n')) - + cat('Model parameters:\n') print(model_params) - + cat(paste0('\nModel convergence: ', object$opt$convergence, ' (', object$opt$message, ')')) - + cat(paste('\nNegative log likelihood: ', nll, '\n')) - + cat('\nIn sample performance:\n') print(metrics) - + summary <- list(model_params = model_params, nll = nll, metrics = metrics) - + return(invisible(summary)) - + } #' Print function for disaggregation fit result. -#' +#' #' Function that prints the result of the fit from the disaggregation model. -#' +#' #' Prints the negative log likelihood, model parameters and calculates metrics from in-sample performance. #' #' @param x Object returned from disag_model. #' @param ... Further arguments to \emph{print} function. -#' +#' #' @return NULL -#' +#' #' @method print disag_model -#' +#' #' @export #' @importFrom stats cor quantile sd print.disag_model <- function(x, ...){ - + model_params <- summary(x$sd_out, select = 'fixed') - + cat('Bayesian disaggregation model result\n') cat('\n') cat(paste('Likelihood function:', x$model_setup$family, '\n')) cat(paste('Link function:', x$model_setup$link, '\n')) - + cat('\nParameter values:\n') print(model_params[ , 1]) - + return(invisible(x)) } #' Summary function for disaggregation input data -#' +#' #' Function that summarizes the input data from the disaggregation model. -#' +#' #' Prints the number of polyons and pixels, the number of pixels in the largest and smallest polygons and summaries of the covariates. #' #' @param object Object returned from prepare_data. #' @param ... Further arguments to \emph{summary} function. -#' +#' #' @return A list of the number of polyons, the number of covariates and summaries of the covariates. -#' +#' #' @method summary disag_data -#' +#' #' @export summary.disag_data <- function(object, ...) { n_polygons <- nrow(object$polygon_shapefile) - n_covariates <- terra::nlyr(object$covariate_rasters) - + n_covariates <- as.integer(terra::nlyr(object$covariate_rasters)) + cat(paste("They data contains", n_polygons, "polygons and", nrow(object$covariate_data), "pixels\n")) - - cat(paste("The largest polygon contains", max(table(object$covariate_data[ , object$shapefile_names$id_var])), "pixels", + + cat(paste("The largest polygon contains", max(table(object$covariate_data[ , object$shapefile_names$id_var])), "pixels", "and the smallest polygon contains", min(table(object$covariate_data[ , object$shapefile_names$id_var])), "pixels\n")) - + cat(paste("There are", n_covariates, "covariates\n")) - + covariate_summary <- summary(object$covariate_data[ , names(object$covariate_rasters)]) - + cat("\nCovariate summary:\n") print(covariate_summary) - + summary <- list(number_polygons = n_polygons, number_covariates = n_covariates, covariate_summary = covariate_summary) - + return(invisible(summary)) - + } #' Print function for disaggregation input data -#' +#' #' Function that prints the input data from the disaggregation model. -#' +#' #' Prints the number of polyons and pixels, the number of pixels in the largest and smallest polygons and summaries of the covariates. #' #' @param x Object returned from prepare_data. #' @param ... Further arguments to \emph{print} function. -#' +#' #' @return NULL -#' +#' #' @method print disag_data -#' +#' #' @export print.disag_data <- function(x, ...){ - + n_polygons <- nrow(x$polygon_shapefile) n_covariates <- terra::nlyr(x$covariate_rasters) - + cat(paste("They data contains", n_polygons, "polygons and", nrow(x$covariate_data), "pixels\n")) - - cat(paste("The largest polygon contains", max(table(x$covariate_data[ , x$shapefile_names$id_var])), "pixels", + + cat(paste("The largest polygon contains", max(table(x$covariate_data[ , x$shapefile_names$id_var])), "pixels", "and the smallest polygon contains", min(table(x$covariate_data[ , x$shapefile_names$id_var])), "pixels\n")) - + cat(paste("There are", n_covariates, "covariates\n")) - + return(invisible(x)) } #' Summary function for disaggregation prediction -#' +#' #' Function that summarizes the prediction from the disaggregation model. -#' +#' #' Prints the number of polyons and pixels, the number of pixels in the largest and smallest polygons and summaries of the covariates. #' #' @param object Object returned from predict.disag_model #' @param ... Further arguments to \emph{summary} function. -#' +#' #' @return A list of the number of polyons, the number of covariates and summaries of the covariates. -#' +#' #' @method summary disag_prediction -#' +#' #' @export summary.disag_prediction <- function(object, ...) { - - number_realisations <- terra::nlyr(object$uncertainty_prediction$realisations) - max_mean <- max(object$mean_prediction$prediction@data@values) - min_mean <- min(object$mean_prediction$prediction@data@values) - max_iqr <- max((object$uncertainty_prediction$predictions_ci[[2]] - object$uncertainty_prediction$predictions_ci[[1]])@data@values) - min_iqr <- min((object$uncertainty_prediction$predictions_ci[[2]] - object$uncertainty_prediction$predictions_ci[[1]])@data@values) - + + number_realisations <- as.integer(terra::nlyr(object$uncertainty_prediction$realisations)) + max_mean <- max(terra::values(object$mean_prediction$prediction)) + min_mean <- min(terra::values(object$mean_prediction$prediction)) + max_iqr <- max((terra::values(object$uncertainty_prediction$predictions_ci[[2]]) - terra::values(object$uncertainty_prediction$predictions_ci[[1]]))) + min_iqr <- min((terra::values(object$uncertainty_prediction$predictions_ci[[2]]) - terra::values(object$uncertainty_prediction$predictions_ci[[1]]))) + cat('Predction from disaggregation model\n') cat('\n') cat('Components of the model: ') @@ -204,33 +204,33 @@ summary.disag_prediction <- function(object, ...) { cat('\n') cat(paste('The mean predicted values range from', signif(min_mean, 3), 'to', signif(max_mean, 3), '\n')) cat(paste('The predicted IQR takes values from', signif(min_iqr, 3), 'to', signif(max_iqr, 3), '\n')) - + summary <- list(number_realisations = number_realisations, range_mean_values = c(min_mean, max_mean), range_iqr_values = c(min_iqr, max_iqr)) - + return(invisible(summary)) - + } #' Print function for disaggregation prediction -#' +#' #' Function that prints the prediction from the disaggregation model. -#' +#' #' Prints the number of polyons and pixels, the number of pixels in the largest and smallest polygons and summaries of the covariates. #' #' @param x Object returned from predict.disag_model. #' @param ... Further arguments to \emph{print} function. -#' +#' #' @return NULL -#' +#' #' @method print disag_prediction -#' +#' #' @export print.disag_prediction <- function(x, ...){ - + cat('Predction from disaggregation model\n') cat('\n') cat('Components of the model: ') @@ -241,4 +241,4 @@ print.disag_prediction <- function(x, ...){ cat(paste0('There are ', terra::nlyr(x$uncertainty_prediction$realisations), ' uncertainty realisations')) return(invisible(x)) -} \ No newline at end of file +} From 268e7e0a287ebed2c9b8d2cdc7bb04abd52fd25b Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Fri, 13 Oct 2023 14:40:55 +0100 Subject: [PATCH 145/168] update prepare docs --- R/prepare_data.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index 96e132c..f849754 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -27,11 +27,11 @@ #' and sets covariate NAs pixels to the median value for the that covariate. #' #' @param x sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}). -#' @param covariate_rasters RasterStack of covariate rasters to be used in the model. -#' @param aggregation_raster Raster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used. -#' @param id_var Name of column in SpatialPolygonDataFrame object with the polygon id. -#' @param response_var Name of column in SpatialPolygonDataFrame object with the response data. -#' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. +#' @param covariate_rasters SpatRaster of covariate rasters to be used in the model. +#' @param aggregation_raster SpatRaster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used. +#' @param id_var Name of column in sf object with the polygon id. +#' @param response_var Name of column in sf object with the response data. +#' @param sample_size_var For survey data, name of column in sf object (if it exists) with the sample size data. #' @param mesh.args list of parameters that control the mesh structure with the same names as used by INLA. #' @param na.action logical. If TRUE, NAs in response will be removed, covariate NAs will be given the median value, aggregation NAs will be set to zero. Default FALSE (NAs in response or covariate data within the polygons will give errors). #' @param makeMesh logical. If TRUE, build INLA mesh, takes some time. Default TRUE. @@ -77,7 +77,7 @@ #' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) #' cov_rasters <- c(r, r2) #' -#' test_data <- prepare_data(x = spdf, +#' test_data <- prepare_data(polygon_shapefile = spdf, #' covariate_rasters = cov_rasters) #' } #' From 3d2cc960f656e1f05bc9dcae86667121a5310b83 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Fri, 13 Oct 2023 16:17:16 +0100 Subject: [PATCH 146/168] fix id_var handling in prepare_data --- R/prepare_data.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index f849754..cf5afcd 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -129,7 +129,12 @@ prepare_data <- function(polygon_shapefile, covariate_rasters <- c(covariate_rasters, aggregation_raster) covariate_data <- terra::extract(covariate_rasters, polygon_shapefile, cells=TRUE, na.rm=TRUE, ID=TRUE) - names(covariate_data)[1] <- id_var + #merge to transfer area_id and then tidy up + polygon_data$area_n <- 1:nrow(polygon_data) + covariate_data <- merge(covariate_data, polygon_data, by.x = "ID", by.y = "area_n") + covariate_data <- covariate_data[ , !(names(covariate_data) %in% c("ID", "cell", "response", "N"))] + colnames(covariate_data )[colnames(covariate_data ) == "area_id"] <- id_var + polygon_data <- polygon_data[ , !(names(polygon_data) %in% c("area_n"))] # Remove the aggregation raster covariate_rasters <- covariate_rasters[[seq(terra::nlyr(covariate_rasters) - 1)]] From e6a057640088e6633d43f5bbeca34c84cb951dc8 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Mon, 16 Oct 2023 17:06:04 +0100 Subject: [PATCH 147/168] switch plotting to use tidyterra and update some docs --- DESCRIPTION | 1 + R/plotting.R | 4 ++-- R/predict.R | 2 +- man/predict_model.Rd | 2 +- man/prepare_data.Rd | 12 ++++++------ tests/testthat/test-matching.R | 2 +- tests/testthat/test-plotting.R | 3 +-- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 21731a5..c24d931 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,6 +28,7 @@ Imports: cowplot, sparseMVN, fmesher, + tidyterra, terra, sf, utils diff --git a/R/plotting.R b/R/plotting.R index 2a0a568..8603e75 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -27,7 +27,7 @@ plot.disag_data <- function(x, which = c(1,2,3), ...) { if(2 %in% which) { stopifnot(inherits(x$covariate_rasters, c('SpatRaster'))) - plots$covariates <- plot(x$covariate_rasters) + plots$covariates <- ggplot2::ggplot() + tidyterra::geom_spatraster(data=x$covariate_rasters) + ggplot2::facet_wrap(~lyr) + tidyterra::scale_fill_terrain_c() titles <- c(titles, 'Covariate rasters') } @@ -135,7 +135,7 @@ plot.disag_prediction <- function(x, ...) { rasters_to_plot <- terra::rast(list(x$mean_prediction$prediction, x$uncertainty_prediction$predictions_ci)) names(rasters_to_plot) <- c('mean prediction', 'lower CI', 'upper CI') - plots <- plot(rasters_to_plot) + plots <- ggplot2::ggplot() + tidyterra::geom_spatraster(data=rasters_to_plot) + ggplot2::facet_wrap(~lyr) + tidyterra::scale_fill_terrain_c() print(plots) diff --git a/R/predict.R b/R/predict.R index f27bf2b..250cb7a 100644 --- a/R/predict.R +++ b/R/predict.R @@ -68,7 +68,7 @@ predict.disag_model <- function(object, newdata = NULL, predict_iid = FALSE, N = #' to the linear predictor. #' #' To predict over a different spatial extent to that used in the model, -#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. #' If this is not given predictions are made over the data used in the fit. #' #' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. diff --git a/man/predict_model.Rd b/man/predict_model.Rd index 3fbbc26..3e0387a 100644 --- a/man/predict_model.Rd +++ b/man/predict_model.Rd @@ -32,7 +32,7 @@ Function returns rasters of the mean predictions as well as the covariate and f to the linear predictor. To predict over a different spatial extent to that used in the model, -a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. If this is not given predictions are made over the data used in the fit. The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index 83792d2..0a8110b 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -18,15 +18,15 @@ prepare_data( ) } \arguments{ -\item{covariate_rasters}{RasterStack of covariate rasters to be used in the model.} +\item{covariate_rasters}{SpatRaster of covariate rasters to be used in the model.} -\item{aggregation_raster}{Raster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used.} +\item{aggregation_raster}{SpatRaster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used.} -\item{id_var}{Name of column in SpatialPolygonDataFrame object with the polygon id.} +\item{id_var}{Name of column in sf object with the polygon id.} -\item{response_var}{Name of column in SpatialPolygonDataFrame object with the response data.} +\item{response_var}{Name of column in sf object with the response data.} -\item{sample_size_var}{For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data.} +\item{sample_size_var}{For survey data, name of column in sf object (if it exists) with the sample size data.} \item{mesh.args}{list of parameters that control the mesh structure with the same names as used by INLA.} @@ -104,7 +104,7 @@ terra::ext(r2) <- terra::ext(spdf) r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) cov_rasters <- c(r, r2) -test_data <- prepare_data(x = spdf, +test_data <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_rasters) } diff --git a/tests/testthat/test-matching.R b/tests/testthat/test-matching.R index e22d433..42b152a 100644 --- a/tests/testthat/test-matching.R +++ b/tests/testthat/test-matching.R @@ -17,7 +17,7 @@ test_that("Getting start and end index returns the right object", { result <- getStartendindex(covs, response, 'id') save(result, file = paste0(tempdir(), '/test_startendindex.RData')) - + expect_is(result, "matrix") expect_equal(nrow(result), nrow(response)) expect_equal(ncol(result), 2) diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R index cb55e51..306317c 100644 --- a/tests/testthat/test-plotting.R +++ b/tests/testthat/test-plotting.R @@ -66,7 +66,6 @@ test_that("Check plot.disag_prediction function works as expected", { skip_on_cran() - fit_result <- disag_model(test_data, iterations = 1000, iid = TRUE, field = TRUE, @@ -85,7 +84,7 @@ test_that("Check plot.disag_prediction function works as expected", { pred <- predict(fit_result) p <- plot(pred) - expect_is(p, 'trellis') + expect_is(p, 'gg') }) From b1874a554518433dfdcccae4c229d34b8099377b Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Mon, 16 Oct 2023 17:19:07 +0100 Subject: [PATCH 148/168] update docs after migration --- R/build_mesh.R | 2 +- R/extract.R | 15 ++++++--------- R/plotting.R | 6 +++--- R/predict.R | 14 +++++++------- R/prepare_data.R | 10 +++++----- man/as.disag_data.Rd | 10 +++++----- man/build_mesh.Rd | 2 +- man/getCovariateRasters.Rd | 2 +- man/getPolygonData.Rd | 8 ++++---- man/predict.disag_model.Rd | 6 +++--- man/predict_uncertainty.Rd | 8 ++++---- man/prepare_data.Rd | 2 +- 12 files changed, 41 insertions(+), 44 deletions(-) diff --git a/R/build_mesh.R b/R/build_mesh.R index 1a14713..b005a55 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -1,6 +1,6 @@ #' Build mesh for disaggregaton model #' -#' \emph{build_mesh} function takes a SpatialPolygons object and mesh arguments to build an appropriate mesh for the spatial field. +#' \emph{build_mesh} function takes a sf object and mesh arguments to build an appropriate mesh for the spatial field. #' #' The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary #' and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest diff --git a/R/extract.R b/R/extract.R index 6bf26b4..dd7fd75 100644 --- a/R/extract.R +++ b/R/extract.R @@ -1,15 +1,15 @@ -#' Extract polygon id and response data into a data.frame from a SpatialPolygonsDataFrame +#' Extract polygon id and response data into a data.frame from a sf object #' -#' Returns a data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the +#' Returns a data.frame with a row for each polygon in the sf object and columns: area_id, response and N, containing the id of the #' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does #' not exist), this column will contain NAs. #' #' @param shape A sf object containing response data. #' @param id_var Name of column in shape object with the polygon id. Default 'area_id'. #' @param response_var Name of column in shape object with the response data. Default 'response'. -#' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. Default NULL. +#' @param sample_size_var For survey data, name of column in sf object (if it exists) with the sample size data. Default NULL. #' -#' @return A data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the +#' @return A data.frame with a row for each polygon in the sf object and columns: area_id, response and N, containing the id of the #' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does #' not exist), this column will contain NAs. #' @@ -48,7 +48,7 @@ getPolygonData <- function(shape, id_var = 'area_id', response_var = 'response', } -#' Get a RasterStack of covariates from a folder containing .tif files +#' Get a SpatRaster of covariates from a folder containing .tif files #' #' Looks in a specified folder for raster files. Returns a multi-layered SpatRaster of the rasters cropped to the extent specified by the shape parameter. #' @@ -83,7 +83,7 @@ getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { # Extract coordinates from raster to use constructing the INLA mesh # -# @param cov_rasters RasterStack of the covariate rasters. +# @param cov_rasters SpatRaster of the covariate rasters. # @param selectIds numeric vector containing cell ids to retain. Default NULL retains all cell ids in the covariate rasters. # # @return A matrix containing the coordinates used to make the mesh @@ -106,6 +106,3 @@ extractCoordsForMesh <- function(cov_rasters, selectIds = NULL) { return(coords) } - - - diff --git a/R/plotting.R b/R/plotting.R index 8603e75..f9b9a2b 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -143,7 +143,7 @@ plot.disag_prediction <- function(x, ...) { } -# Plot polygon data from SpatialPolygonDataFrame +# Plot polygon data from sf object # # @param x Object to be plotted # @param names list of 2 names: polygon id variable and response variable names @@ -152,10 +152,10 @@ plot.disag_prediction <- function(x, ...) { # # @name plot_polygon_data -plot_polygon_data <- function(x, names) { +plot_polygon_data <- function(polygon_shapefile, names) { # Rename the response variable for plotting - shp <- x + shp <- polygon_shapefile shp <- dplyr::rename(shp, 'response' = names$response_var) shp <- dplyr::rename(shp, 'area_id' = names$id_var) diff --git a/R/predict.R b/R/predict.R index 250cb7a..9f6713c 100644 --- a/R/predict.R +++ b/R/predict.R @@ -4,7 +4,7 @@ #' predicts mean and uncertainty maps. #' #' To predict over a different spatial extent to that used in the model, -#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. #' If this is not given predictions are made over the data used in the fit. #' #' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. @@ -30,8 +30,8 @@ #' }} #' \item{uncertainty_prediction: }{List of: #' \itemize{ -#' \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. -#' \item \emph{predictions_ci} RasterStack of the upper and lower credible intervals. Defined by argument \emph{CI}. +#' \item \emph{realisations} SpatRaster of realisations of predictions. Number of realisations defined by argument \emph{N}. +#' \item \emph{predictions_ci} SpatRaster of the upper and lower credible intervals. Defined by argument \emph{CI}. #' }} #' #' @@ -115,10 +115,10 @@ predict_model <- function(model_output, newdata = NULL, predict_iid = FALSE) { #' \emph{predict_uncertainty} function takes a \emph{disag_model} object created by #' \emph{disaggregation::disag_model} and predicts upper and lower credible interval maps. #' -#' Function returns a RasterStack of the realisations as well as the upper and lower credible interval rasters. +#' Function returns a SpatRaster of the realisations as well as the upper and lower credible interval rasters. #' #' To predict over a different spatial extent to that used in the model, -#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. #' If this is not given predictions are made over the data used in the fit. #' #' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. @@ -135,8 +135,8 @@ predict_model <- function(model_output, newdata = NULL, predict_iid = FALSE) { #' #' @return The uncertainty prediction, which is a list of: #' \itemize{ -#' \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. -#' \item \emph{predictions_ci} RasterStack of the upper and lower credible intervals. Defined by argument \emph{CI}. +#' \item \emph{realisations} SpatRaster of realisations of predictions. Number of realisations defined by argument \emph{N}. +#' \item \emph{predictions_ci} SpatRaster of the upper and lower credible intervals. Defined by argument \emph{CI}. #' } #' #' @name predict_uncertainty diff --git a/R/prepare_data.R b/R/prepare_data.R index cf5afcd..c653260 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -3,7 +3,7 @@ #' \emph{prepare_data} function is used to extract all the data required for fitting a disaggregation model. #' Designed to be used in the \emph{disaggregation::fit_model} function. #' -#' Takes a SpatialPolygonDataFrame with the response data and a RasterStack of covariates. +#' Takes a sf object with the response data and a SpatRaster of covariates. #' #' Extract the values of the covariates (as well as the aggregation raster, if given) at each pixel within the polygons #' (\emph{parallelExtract} function). This is done in parallel and \emph{n.cores} argument is used to set the number of cores @@ -201,9 +201,9 @@ prepare_data <- function(polygon_shapefile, #' Function to fit the disaggregation model #' -#' @param x SpatialPolygonDataFrame containing the response data +#' @param polygon_shapefile sf object containing the response data #' @param shapefile_names List of 2: polygon id variable name and response variable name from x -#' @param covariate_rasters RasterStack of covariates +#' @param covariate_rasters SpatRaster of covariates #' @param polygon_data data.frame with two columns: polygon id and response #' @param covariate_data data.frame with cell id, polygon id and covariate columns #' @param aggregation_pixels vector with value of aggregation raster at each pixel @@ -215,8 +215,8 @@ prepare_data <- function(polygon_shapefile, #' @return A list is returned of class \code{disag_data}. #' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. #' The list of class \code{disag_data} contains: -#' \item{x }{The SpatialPolygonDataFrame used as an input.} -#' \item{covariate_rasters }{The RasterStack used as an input.} +#' \item{x }{The sf object used as an input.} +#' \item{covariate_rasters }{The SpatRaster used as an input.} #' \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} #' \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} #' \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} diff --git a/man/as.disag_data.Rd b/man/as.disag_data.Rd index c8e92f9..803d050 100644 --- a/man/as.disag_data.Rd +++ b/man/as.disag_data.Rd @@ -18,9 +18,11 @@ as.disag_data( ) } \arguments{ +\item{polygon_shapefile}{sf object containing the response data} + \item{shapefile_names}{List of 2: polygon id variable name and response variable name from x} -\item{covariate_rasters}{RasterStack of covariates} +\item{covariate_rasters}{SpatRaster of covariates} \item{polygon_data}{data.frame with two columns: polygon id and response} @@ -35,15 +37,13 @@ as.disag_data( \item{startendindex}{matrix containing the start and end index for each polygon} \item{mesh}{inla.mesh object to use in the fit} - -\item{x}{SpatialPolygonDataFrame containing the response data} } \value{ A list is returned of class \code{disag_data}. The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. The list of class \code{disag_data} contains: - \item{x }{The SpatialPolygonDataFrame used as an input.} - \item{covariate_rasters }{The RasterStack used as an input.} + \item{x }{The sf object used as an input.} + \item{covariate_rasters }{The SpatRaster used as an input.} \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} diff --git a/man/build_mesh.Rd b/man/build_mesh.Rd index fb0df25..77561ca 100644 --- a/man/build_mesh.Rd +++ b/man/build_mesh.Rd @@ -17,7 +17,7 @@ with the parameters having the same meaning as in the INLA functions \emph{inla. An inla.mesh object } \description{ -\emph{build_mesh} function takes a SpatialPolygons object and mesh arguments to build an appropriate mesh for the spatial field. +\emph{build_mesh} function takes a sf object and mesh arguments to build an appropriate mesh for the spatial field. } \details{ The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary diff --git a/man/getCovariateRasters.Rd b/man/getCovariateRasters.Rd index fcac9b0..c39619b 100644 --- a/man/getCovariateRasters.Rd +++ b/man/getCovariateRasters.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/extract.R \name{getCovariateRasters} \alias{getCovariateRasters} -\title{Get a RasterStack of covariates from a folder containing .tif files} +\title{Get a SpatRaster of covariates from a folder containing .tif files} \usage{ getCovariateRasters(directory, file_pattern = ".tif$", shape) } diff --git a/man/getPolygonData.Rd b/man/getPolygonData.Rd index e6ec032..27759e6 100644 --- a/man/getPolygonData.Rd +++ b/man/getPolygonData.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/extract.R \name{getPolygonData} \alias{getPolygonData} -\title{Extract polygon id and response data into a data.frame from a SpatialPolygonsDataFrame} +\title{Extract polygon id and response data into a data.frame from a sf object} \usage{ getPolygonData( shape, @@ -18,15 +18,15 @@ getPolygonData( \item{response_var}{Name of column in shape object with the response data. Default 'response'.} -\item{sample_size_var}{For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. Default NULL.} +\item{sample_size_var}{For survey data, name of column in sf object (if it exists) with the sample size data. Default NULL.} } \value{ -A data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the +A data.frame with a row for each polygon in the sf object and columns: area_id, response and N, containing the id of the polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does not exist), this column will contain NAs. } \description{ -Returns a data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the +Returns a data.frame with a row for each polygon in the sf object and columns: area_id, response and N, containing the id of the polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does not exist), this column will contain NAs. } diff --git a/man/predict.disag_model.Rd b/man/predict.disag_model.Rd index edea4ac..f85ed7a 100644 --- a/man/predict.disag_model.Rd +++ b/man/predict.disag_model.Rd @@ -31,8 +31,8 @@ An object of class \emph{disag_prediction} which consists of a list of two objec }} \item{uncertainty_prediction: }{List of: \itemize{ - \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. - \item \emph{predictions_ci} RasterStack of the upper and lower credible intervals. Defined by argument \emph{CI}. + \item \emph{realisations} SpatRaster of realisations of predictions. Number of realisations defined by argument \emph{N}. + \item \emph{predictions_ci} SpatRaster of the upper and lower credible intervals. Defined by argument \emph{CI}. }} } \description{ @@ -41,7 +41,7 @@ predicts mean and uncertainty maps. } \details{ To predict over a different spatial extent to that used in the model, -a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. If this is not given predictions are made over the data used in the fit. The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. diff --git a/man/predict_uncertainty.Rd b/man/predict_uncertainty.Rd index 5297230..ccf361d 100644 --- a/man/predict_uncertainty.Rd +++ b/man/predict_uncertainty.Rd @@ -27,8 +27,8 @@ If this is a raster stack or brick, predictions will be made over this data. Def \value{ The uncertainty prediction, which is a list of: \itemize{ - \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. - \item \emph{predictions_ci} RasterStack of the upper and lower credible intervals. Defined by argument \emph{CI}. + \item \emph{realisations} SpatRaster of realisations of predictions. Number of realisations defined by argument \emph{N}. + \item \emph{predictions_ci} SpatRaster of the upper and lower credible intervals. Defined by argument \emph{CI}. } } \description{ @@ -36,10 +36,10 @@ The uncertainty prediction, which is a list of: \emph{disaggregation::disag_model} and predicts upper and lower credible interval maps. } \details{ -Function returns a RasterStack of the realisations as well as the upper and lower credible interval rasters. +Function returns a SpatRaster of the realisations as well as the upper and lower credible interval rasters. To predict over a different spatial extent to that used in the model, -a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. If this is not given predictions are made over the data used in the fit. The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index 0a8110b..c310421 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -57,7 +57,7 @@ The list of class \code{disag_data} contains: Designed to be used in the \emph{disaggregation::fit_model} function. } \details{ -Takes a SpatialPolygonDataFrame with the response data and a RasterStack of covariates. +Takes a sf object with the response data and a SpatRaster of covariates. Extract the values of the covariates (as well as the aggregation raster, if given) at each pixel within the polygons (\emph{parallelExtract} function). This is done in parallel and \emph{n.cores} argument is used to set the number of cores From 225c86681a33476a35ac4f4cc842da8bfbaa3bb4 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Mon, 23 Oct 2023 16:40:55 +0100 Subject: [PATCH 149/168] Update prepare_data.R more robust removal of aggregation data --- R/prepare_data.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index c653260..2466398 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -128,7 +128,7 @@ prepare_data <- function(polygon_shapefile, covariate_rasters <- c(covariate_rasters, aggregation_raster) - covariate_data <- terra::extract(covariate_rasters, polygon_shapefile, cells=TRUE, na.rm=TRUE, ID=TRUE) + covariate_data <- terra::extract(covariate_rasters, terra::vect(polygon_shapefile), cells=TRUE, na.rm=TRUE, ID=TRUE) #merge to transfer area_id and then tidy up polygon_data$area_n <- 1:nrow(polygon_data) covariate_data <- merge(covariate_data, polygon_data, by.x = "ID", by.y = "area_n") @@ -137,8 +137,8 @@ prepare_data <- function(polygon_shapefile, polygon_data <- polygon_data[ , !(names(polygon_data) %in% c("area_n"))] # Remove the aggregation raster - covariate_rasters <- covariate_rasters[[seq(terra::nlyr(covariate_rasters) - 1)]] - + cov_filter <- !(names(covariate_data) %in% c('aggregation_raster')) + covariate_rasters <- covariate_rasters[[cov_filter]] names(covariate_rasters) <- cov_names agg_filter <- names(covariate_data) %in% c('aggregation_raster') From 5ff89dc38823e9fab055d58464e3f9e3aa23c053 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Mon, 23 Oct 2023 16:52:15 +0100 Subject: [PATCH 150/168] Update fit_model.R fix cov_matrix column selection bug --- R/fit_model.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/fit_model.R b/R/fit_model.R index 5fc7966..b6fefd6 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -358,7 +358,7 @@ make_model_object <- function(data, Apix <- INLA::inla.mesh.project(data$mesh, loc = data$coordsForFit)$A n_s <- nrow(spde$M0) - cov_matrix <- as.matrix(data$covariate_data[, -c(1:2)]) + cov_matrix <- as.matrix(data$covariate_data[, -c("area_id")]) # If we have exactly one column we don't have to transpose. Sure this # this could be cleaner but I don't know how. if(ncol(cov_matrix) == 1){ From 1db8d22b00e4914b61e6000d3c626afc4e9cc012 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Mon, 23 Oct 2023 16:58:32 +0100 Subject: [PATCH 151/168] migrate INLA::inla.mesh.project to fmesher::fm_evaluate --- R/fit_model.R | 3 ++- R/predict.R | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/R/fit_model.R b/R/fit_model.R index b6fefd6..7e957b7 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -355,7 +355,8 @@ make_model_object <- function(data, nu = 1 # Sort out mesh bits spde <- (INLA::inla.spde2.matern(data$mesh, alpha = nu + 1)$param.inla)[c("M0", "M1", "M2")] - Apix <- INLA::inla.mesh.project(data$mesh, loc = data$coordsForFit)$A + # Apix <- INLA::inla.mesh.project(data$mesh, loc = data$coordsForFit)$A + Apix <- fmesher::fm_evaluate(data$mesh, loc = data$coordsForFit)$A n_s <- nrow(spde$M0) cov_matrix <- as.matrix(data$covariate_data[, -c("area_id")]) diff --git a/R/predict.R b/R/predict.R index 9f6713c..6880874 100644 --- a/R/predict.R +++ b/R/predict.R @@ -231,8 +231,8 @@ getAmatrix <- function(mesh, coords) { spde <- (INLA::inla.spde2.matern(mesh, alpha = 2)$param.inla)[c("M0", "M1", "M2")] n_s <- nrow(spde$M0) - Amatrix <- INLA::inla.mesh.project(mesh, loc = as.matrix(coords))$A - + # Amatrix <- INLA::inla.mesh.project(mesh, loc = as.matrix(coords))$A + Amatrix <- fmesher::fm_evaluate(mesh, loc = as.matrix(coords))$A return(Amatrix) } From 9d66d11aeac0e7cdf242d66106b6a06796e6d1ac Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Mon, 23 Oct 2023 17:11:11 +0100 Subject: [PATCH 152/168] remove getamatrix function --- R/predict.R | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/R/predict.R b/R/predict.R index 6880874..0c2eaa8 100644 --- a/R/predict.R +++ b/R/predict.R @@ -217,25 +217,6 @@ getCoords <- function(data) { return(coords) } -# Get Amatrix for field -# -# @param mesh mesh used in the model fitting -# @param coords coordinates extracted from raster -# -# @return An Amatrix object for the field -# -# @name getAmatrix - -getAmatrix <- function(mesh, coords) { - - spde <- (INLA::inla.spde2.matern(mesh, alpha = 2)$param.inla)[c("M0", "M1", "M2")] - n_s <- nrow(spde$M0) - - # Amatrix <- INLA::inla.mesh.project(mesh, loc = as.matrix(coords))$A - Amatrix <- fmesher::fm_evaluate(mesh, loc = as.matrix(coords))$A - return(Amatrix) -} - # Helper to check and sort out new raster data. check_newdata <- function(newdata, model_output){ if(is.null(newdata)) return(NULL) @@ -280,7 +261,7 @@ setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { } else { coords <- getCoords(data) } - Amatrix <- getAmatrix(data$mesh, coords) + Amatrix <- fmesher::fm_evaluate(data$mesh, loc = as.matrix(coords))$A field_objects <- list(coords = coords, Amatrix = Amatrix) } else { field_objects <- NULL From ad953faab683fd56bdc34df7420c07fe804c279f Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 24 Oct 2023 10:17:18 +0100 Subject: [PATCH 153/168] tweaks to yesterdays changes --- R/fit_model.R | 5 ++--- R/predict.R | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/R/fit_model.R b/R/fit_model.R index 7e957b7..47dacab 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -355,11 +355,10 @@ make_model_object <- function(data, nu = 1 # Sort out mesh bits spde <- (INLA::inla.spde2.matern(data$mesh, alpha = nu + 1)$param.inla)[c("M0", "M1", "M2")] - # Apix <- INLA::inla.mesh.project(data$mesh, loc = data$coordsForFit)$A - Apix <- fmesher::fm_evaluate(data$mesh, loc = data$coordsForFit)$A + Apix <- fmesher::fm_evaluator(data$mesh, loc = data$coordsForFit)$proj$A n_s <- nrow(spde$M0) - cov_matrix <- as.matrix(data$covariate_data[, -c("area_id")]) + cov_matrix <- as.matrix(data$covariate_data[, (names(data$covariate_data) %in% names(data$covariate_rasters))]) # If we have exactly one column we don't have to transpose. Sure this # this could be cleaner but I don't know how. if(ncol(cov_matrix) == 1){ diff --git a/R/predict.R b/R/predict.R index 0c2eaa8..6cbfc67 100644 --- a/R/predict.R +++ b/R/predict.R @@ -261,7 +261,7 @@ setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { } else { coords <- getCoords(data) } - Amatrix <- fmesher::fm_evaluate(data$mesh, loc = as.matrix(coords))$A + Amatrix <- fmesher::fm_evaluator(data$mesh, loc = as.matrix(coords))$proj$A field_objects <- list(coords = coords, Amatrix = Amatrix) } else { field_objects <- NULL From bedcb09902836a0151441ae8b02c6e37bc4095cf Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 24 Oct 2023 15:16:32 +0100 Subject: [PATCH 154/168] deprectate ncores --- R/prepare_data.R | 7 +++++-- man/prepare_data.Rd | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index 2466398..716e294 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -35,7 +35,7 @@ #' @param mesh.args list of parameters that control the mesh structure with the same names as used by INLA. #' @param na.action logical. If TRUE, NAs in response will be removed, covariate NAs will be given the median value, aggregation NAs will be set to zero. Default FALSE (NAs in response or covariate data within the polygons will give errors). #' @param makeMesh logical. If TRUE, build INLA mesh, takes some time. Default TRUE. -#' @param ncores Number of cores used to perform covariate extraction. +#' @param ncores Deprecated. #' #' @return A list is returned of class \code{disag_data}. #' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. @@ -94,7 +94,10 @@ prepare_data <- function(polygon_shapefile, mesh.args = NULL, na.action = FALSE, makeMesh = TRUE, - ncores = 2) { + ncores = NULL) { + + if (!missing("ncores")) + warning("The ncores argument has been deprecated") stopifnot(inherits(polygon_shapefile, 'sf')) stopifnot(inherits(covariate_rasters, 'SpatRaster')) diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index c310421..150ca58 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -14,7 +14,7 @@ prepare_data( mesh.args = NULL, na.action = FALSE, makeMesh = TRUE, - ncores = 2 + ncores = NULL ) } \arguments{ @@ -34,7 +34,7 @@ prepare_data( \item{makeMesh}{logical. If TRUE, build INLA mesh, takes some time. Default TRUE.} -\item{ncores}{Number of cores used to perform covariate extraction.} +\item{ncores}{Deprecated.} \item{x}{sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}).} } From dc94f2876c1c6f05279c00790fb589e9eae76ce1 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 31 Oct 2023 09:39:27 +0000 Subject: [PATCH 155/168] fix prepare bug --- R/prepare_data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index 716e294..2d970ff 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -135,7 +135,7 @@ prepare_data <- function(polygon_shapefile, #merge to transfer area_id and then tidy up polygon_data$area_n <- 1:nrow(polygon_data) covariate_data <- merge(covariate_data, polygon_data, by.x = "ID", by.y = "area_n") - covariate_data <- covariate_data[ , !(names(covariate_data) %in% c("ID", "cell", "response", "N"))] + covariate_data <- covariate_data[ , !(names(covariate_data) %in% c("ID", "response", "N"))] colnames(covariate_data )[colnames(covariate_data ) == "area_id"] <- id_var polygon_data <- polygon_data[ , !(names(polygon_data) %in% c("area_n"))] From 4df4c4df7140d35b890f0bc1a77320418fe1e50b Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 31 Oct 2023 10:22:37 +0000 Subject: [PATCH 156/168] Update vignette --- vignettes/disaggregation.Rmd | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 1e74593..ace52cf 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( isINLA <- requireNamespace('INLA', quietly = TRUE) ``` -The **disaggregation** package contains functions to run Bayesian disaggregation models. Aggregated response data over large heterogenous regions can be used alongside fine-scale covariate information to predict fine-scale response across the region. The github page for this package can be found [here](https://github.com/aknandi/disaggregation). +The **disaggregation** package contains functions to run Bayesian disaggregation models. Aggregated response data over large heterogeneous regions can be used alongside fine-scale covariate information to predict fine-scale response across the region. The github page for this package can be found [here](https://github.com/aknandi/disaggregation). Install **disaggregation** using: @@ -39,7 +39,7 @@ devtools::install_github("aknandi/disaggregation") The key functions are `prepare_data`, `fit_model` and `predict`. The `prepare_data` function takes the aggregated data and covariate data to be used in the model and produces an object to be use by `fit_model`. This functions runs the disaggregation model and the out can be passed to `predict` to produce fine-scale predicted maps of the response variable. -To use the disaggregation `prepare_data` function, you must have the aggregated data as a `SpatialPolygonDataFrame` object and a `RasterStack` of the covariate data to be used in the model. +To use the disaggregation `prepare_data` function, you must have the aggregated data as a `sf` object and a `SpatRaster` of the covariate data to be used in the model. ## Example @@ -62,7 +62,7 @@ ggplot() + geom_sf(data = df, aes(fill = cases / population)) ``` -Now we simulate two covariate rasters for the area of interest and make a `RasterStack`. They are simulated at the resolution of approximately 1km2. +Now we simulate two covariate rasters for the area of interest and make a two-layered `SpatRaster`. They are simulated at the resolution of approximately 1km2. ```{r, fig.show='hold'} @@ -103,22 +103,17 @@ pop_raster <- terra::rasterize(terra::vect(df), cov_stack, field = 'pop_per_cell ``` To correct small inconsistencies in the polygon geometry, we run the code below. -We are a bit inbetween frameworks at the moment. -Most of the package is built on sp. But as rgeos has been depreciated we have to -switch the polygons to simple features and back again. ```{r, fig.show='hold'} -polygon_data <- sf:::as_Spatial(st_buffer(st_as_sf(polygon_data), dist = 0)) - df <- sf::st_buffer(df, dist = 0) ``` -Now we have setup the data we can use the `prepare_data` function to create the objects needed to run the disaggregation model. The name of the response variable and id variable in the `SpatialPolygonsDataFrame` should be specified. +Now we have setup the data we can use the `prepare_data` function to create the objects needed to run the disaggregation model. The name of the response variable and id variable in the `sf` object should be specified. -The user can also control the parameters of the mesh that is used to create the spatial field. The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. The mesh parameters: `concave`, `convex` and `resolution` refer to the parameters used to create the mesh boundary using the [inla.noncovex.hull function](https://rdrr.io/github/andrewzm/INLA/man/inla.nonconvex.hull.html), while the mesh parameters `max.edge`, `cut` and `offset` refer to the parameters used to create the mesh using the [inla.mesh.2d function](https://rdrr.io/github/andrewzm/INLA/man/inla.mesh.2d.html). +The user can also control the parameters of the mesh that is used to create the spatial field. The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. The mesh parameters: `concave`, `convex` and `resolution` refer to the parameters used to create the mesh boundary using the [fm_nonconvex_hull_inla function](https://rdrr.io/cran/fmesher/man/fm_nonconvex_hull_inla.html), while the mesh parameters `max.edge`, `cut` and `offset` refer to the parameters used to create the mesh using the [fm_mesh_2d function](https://rdrr.io/cran/fmesher/man/fm_mesh_2d.html). ```{r, fig.show='hold', eval= isINLA} -data_for_model <- prepare_data(x = df, +data_for_model <- prepare_data(polygon_shapefile = df, covariate_rasters = cov_stack, aggregation_raster = pop_raster, response_var = 'cases', @@ -127,22 +122,21 @@ data_for_model <- prepare_data(x = df, offset = c(0.1, 0.5), max.edge = c(0.1, 0.2), resolution = 250), - na.action = TRUE, - ncores = 1) + na.action = TRUE) ``` ```{r, fig.show='hold', eval= isINLA} plot(data_for_model) ``` -Now have our data object we are ready to run the model. Here we can specify +Now we have our data object we are ready to run the model. Here we can specify the likelihood function as Gaussian, binomial or poisson, and we can specify the link function as logit, log or identity. The disaggregation model makes predictions at the pixel level: $link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i$ -where $X$ are the covariates, $GP$ is the gaussian random field and $u_i$ is the iid random effect. The pixel predictions are then aggregated to the polygon level using the weighted sum (via the aggregation raster, $agg_i$): +where $X$ are the covariates, $GP$ is the Gaussian random field and $u_i$ is the iid random effect. The pixel predictions are then aggregated to the polygon level using the weighted sum (via the aggregation raster, $agg_i$): $cases_j = \sum_{i \epsilon j} pred_i \times agg_i$ From ee0a1c07857538344744faa866ca5eedfd12b1f7 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 31 Oct 2023 11:16:20 +0000 Subject: [PATCH 157/168] update readme --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index cc08ed4..c0dd5ac 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Overview ## Data preparation -Function prepare_data takes in SpatialPolygonDataFrame (response) and RasterStack (covariates) to produce a data structure required for the disaggregation modelling. Calls functions to extract covariate data, polygon data, aggregation (population data), match points to polygons and build an INLA mesh for the spatial field (build_mesh) +Function prepare_data takes in sf (response) and SpatRaster (covariates) to produce a data structure required for the disaggregation modelling. Calls functions to extract covariate data, polygon data, aggregation (population data), match points to polygons and build an INLA mesh for the spatial field (build_mesh) ```R data_for_model <- prepare_data(polygon_shapefile = shps, @@ -33,9 +33,9 @@ data_for_model <- prepare_data(polygon_shapefile = shps, ### Input data -* A RasterStack of covariate rasters to be used in the model (covariate_rasters) -* A SpatialPolygonsDataFrame (polygon_shapefile) containing at least two columns: one with the id for the polygons (id_var) and one with the response count data (response_var); for binomial data, i.e survey data, it can also contain a sample size column (sample_size_var). -* (Optional) Raster used to aggregate the pixel level predictions (aggregation_raster) to polygon level (usually population). If this is not supplied a uniform raster will be used +* A SpatRaster of covariate rasters to be used in the model (covariate_rasters) +* A sf (polygon_shapefile) containing at least two columns: one with the id for the polygons (id_var) and one with the response count data (response_var); for binomial data, i.e survey data, it can also contain a sample size column (sample_size_var). +* (Optional) SpatRaster used to aggregate the pixel level predictions (aggregation_raster) to polygon level (usually population). If this is not supplied a uniform raster will be used ### Controlling the mesh From ca2a9d1207f3515a74529757be0b54192993cf48 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 31 Oct 2023 11:21:35 +0000 Subject: [PATCH 158/168] update description --- DESCRIPTION | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c24d931..8906a47 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,12 +1,13 @@ Package: disaggregation Type: Package Title: Disaggregation Modelling -Version: 0.2.1 +Version: 0.3.0 Authors@R: c( person("Anita", "Nandi", email = "anita.k.nandi@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-5087-2494")), person("Tim", "Lucas", email = "timcdlucas@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4694-8107")), person("Rohan", "Arambepola", email = "rarambepola@gmail.com", role = "aut"), - person("Andre", "Python", email = "python.andre@gmail.com", role = "aut", comment = c(ORCID = "0000-0001-8094-7226")) + person("Andre", "Python", email = "python.andre@gmail.com", role = "aut", comment = c(ORCID = "0000-0001-8094-7226")), + person("Simon", "Smart", email = "simon.smart@cantab.net", role = "ctb") ) Description: Fits disaggregation regression models using 'TMB' ('Template Model Builder'). When the response data are aggregated to polygon level but From ffd17111dfbe3989c492b47cc9fa63b00e9ec388 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 31 Oct 2023 12:24:11 +0000 Subject: [PATCH 159/168] adjust test-extract --- tests/testthat/test-extract.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-extract.R b/tests/testthat/test-extract.R index 74e16f5..84ba7b4 100644 --- a/tests/testthat/test-extract.R +++ b/tests/testthat/test-extract.R @@ -45,10 +45,10 @@ test_that("extractCoordsForMesh function behaves as it should", { skip_on_cran() - cov_data <- terra::extract(cov_stack, spdf, cells=TRUE, na.rm=TRUE, ID=TRUE) + cov_data <- terra::extract(cov_stack, spdf, cells = TRUE, na.rm = TRUE, ID = TRUE) names(cov_data)[1] <- 'area_id' - result <- extractCoordsForMesh(cov_stack, cov_data$cellid) + result <- extractCoordsForMesh(cov_stack, cov_data$cell) result2 <- extractCoordsForMesh(cov_stack) From 7ef6593a4b6af7d5f0dcbae06ce6628ab91663df Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 31 Oct 2023 12:29:17 +0000 Subject: [PATCH 160/168] replace skip_if_not_installed('INLA') --- tests/testthat/test-fit-model.R | 1 + tests/testthat/test-plotting.R | 3 +++ tests/testthat/test-predict-model.R | 4 ++++ tests/testthat/test-prepare-data.R | 1 - 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index 4c7a384..df4146d 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -3,6 +3,7 @@ context("Fitting model") test_that("disag_model produces errors when expected", { + skip_if_not_installed('INLA') skip_on_cran() expect_error(disag_model(list())) diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R index 306317c..ab0e0e6 100644 --- a/tests/testthat/test-plotting.R +++ b/tests/testthat/test-plotting.R @@ -15,6 +15,7 @@ test_that("Check plot_polygon_data function works as expected", { test_that("Check plot.disag.data function works as expected", { + skip_if_not_installed('INLA') skip_on_cran() test_data2 <- prepare_data(polygon_shapefile = spdf2, @@ -43,6 +44,7 @@ test_that("Check plot.disag.data function works as expected", { test_that("Check plot.disag_model function works as expected", { + skip_if_not_installed('INLA') skip_on_cran() fit_result <- disag_model(test_data, iterations = 10) @@ -64,6 +66,7 @@ test_that("Check plot.disag_model function works as expected", { test_that("Check plot.disag_prediction function works as expected", { + skip_if_not_installed('INLA') skip_on_cran() fit_result <- disag_model(test_data, iterations = 1000, diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index ae95e13..33ac3ef 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -2,6 +2,7 @@ context("Predict model") test_that("Check predict.disag_model function works as expected", { + skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, iterations = 1000, @@ -95,6 +96,7 @@ test_that("Check predict.disag_model function works as expected", { test_that("Check predict.disag_model function works with newdata", { + skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, field = FALSE, iid = TRUE, iterations = 100, @@ -141,6 +143,7 @@ test_that("Check predict.disag_model function works with newdata", { test_that('Check that check_newdata works', { + skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, field = FALSE, iterations = 100) @@ -213,6 +216,7 @@ test_that('Check that setup_objects works', { test_that('Check that predict_single_raster works', { + skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, iterations = 100, diff --git a/tests/testthat/test-prepare-data.R b/tests/testthat/test-prepare-data.R index bef721e..f71c4dd 100644 --- a/tests/testthat/test-prepare-data.R +++ b/tests/testthat/test-prepare-data.R @@ -2,7 +2,6 @@ context("Preparing data") test_that("Check prepare_data function works as expected", { - skip_if_not_installed('INLA') skip_on_cran() result <- prepare_data(polygon_shapefile = spdf, From 17aa2ca57a53b71505d301c4b7ffdc59be700ad5 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Wed, 1 Nov 2023 09:43:11 +0000 Subject: [PATCH 161/168] add missing skip inla and update prepare docs --- R/prepare_data.R | 2 +- man/prepare_data.Rd | 4 ++-- tests/testthat/test-fit-model.R | 5 +---- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index 2d970ff..dc5029b 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -26,7 +26,7 @@ #' will automatically deal with NAs. It removes any polygons that have NAs as a response, sets any aggregation pixels with NA to zero #' and sets covariate NAs pixels to the median value for the that covariate. #' -#' @param x sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}). +#' @param polygon_shapefile sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}). #' @param covariate_rasters SpatRaster of covariate rasters to be used in the model. #' @param aggregation_raster SpatRaster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used. #' @param id_var Name of column in sf object with the polygon id. diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index 150ca58..5b5d5ab 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -18,6 +18,8 @@ prepare_data( ) } \arguments{ +\item{polygon_shapefile}{sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}).} + \item{covariate_rasters}{SpatRaster of covariate rasters to be used in the model.} \item{aggregation_raster}{SpatRaster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used.} @@ -35,8 +37,6 @@ prepare_data( \item{makeMesh}{logical. If TRUE, build INLA mesh, takes some time. Default TRUE.} \item{ncores}{Deprecated.} - -\item{x}{sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}).} } \value{ A list is returned of class \code{disag_data}. diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index df4146d..a3a7774 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -17,6 +17,7 @@ test_that("disag_model produces errors when expected", { test_that("disag_model behaves as expected", { + skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, iterations = 100, iid = FALSE) @@ -26,12 +27,8 @@ test_that("disag_model behaves as expected", { expect_equal(length(result$sd_out$par.fixed), terra::nlyr(test_data$covariate_rasters) + 4) expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) - }) - - - test_that("disag_model with 1 covariate behaves as expected", { skip_if_not_installed('INLA') From fa9f1d552522bf99ae9c7170de276772a077d81a Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Wed, 1 Nov 2023 16:04:16 +0000 Subject: [PATCH 162/168] another missing INLA skip --- tests/testthat/test-predict-model.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index 33ac3ef..d2cd848 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -170,6 +170,7 @@ test_that('Check that check_newdata works', { test_that('Check that setup_objects works', { + skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, iterations = 100, From e867911c322e3735a4c2f6542f50979831505437 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 7 Nov 2023 08:30:49 +0000 Subject: [PATCH 163/168] remove INLA requirement from prepare data --- R/prepare_data.R | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index dc5029b..f95cd31 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -174,13 +174,8 @@ prepare_data <- function(polygon_shapefile, startendindex <- getStartendindex(covariate_data, polygon_data, id_var = id_var) if(makeMesh) { - if(!requireNamespace('INLA', quietly = TRUE)) { - mesh <- NULL - message("Cannot build mesh as INLA is not installed. If you need a spatial field in your model, you must install INLA.") - } else { mesh <- build_mesh(polygon_shapefile, mesh.args) - } - } else { + } else { mesh <- NULL message("A mesh is not being built. You will not be able to run a spatial model without a mesh.") } From 0e2031c73a1a60dd2de715d00b63c80a3e826926 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 7 Nov 2023 09:07:38 +0000 Subject: [PATCH 164/168] remove foreach dependency --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8906a47..85351f8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,6 @@ Encoding: UTF-8 LazyData: true RoxygenNote: 7.2.3 Imports: - foreach, splancs, Matrix, stats, From 8e0e63a244daeca635b541a095ab9d147a563692 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 7 Nov 2023 09:15:32 +0000 Subject: [PATCH 165/168] fix long lines in docs --- R/fit_model.R | 6 ++++-- man/fit_model.Rd | 3 ++- man/make_model_object.Rd | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/fit_model.R b/R/fit_model.R index 47dacab..e448890 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -94,7 +94,8 @@ #' # Create raster stack #' r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) #' terra::ext(r) <- terra::ext(spdf) -#' r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +#' r[] <- sapply(1:terra::ncell(r), function(x){ +#' rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3))} #' r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) #' terra::ext(r2) <- terra::ext(spdf) #' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) @@ -291,7 +292,8 @@ disag_model <- function(data, #' # Create raster stack #' r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) #' terra::ext(r) <- terra::ext(spdf) -#' r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +#' r[] <- sapply(1:terra::ncell(r), function(x){ +#' rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3))} #' r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) #' terra::ext(r2) <- terra::ext(spdf) #' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) diff --git a/man/fit_model.Rd b/man/fit_model.Rd index f06c963..771fac7 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -133,7 +133,8 @@ spdf <- sf::st_sf(response_df, geometry = polys) # Create raster stack r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) terra::ext(r) <- terra::ext(spdf) -r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x \%\% n_pixels_per_side != 0, x \%\% n_pixels_per_side, n_pixels_per_side), 3)) +r[] <- sapply(1:terra::ncell(r), function(x){ +rnorm(1, ifelse(x \%\% n_pixels_per_side != 0, x \%\% n_pixels_per_side, n_pixels_per_side), 3))} r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) terra::ext(r2) <- terra::ext(spdf) r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) diff --git a/man/make_model_object.Rd b/man/make_model_object.Rd index 1f12a97..25022c7 100644 --- a/man/make_model_object.Rd +++ b/man/make_model_object.Rd @@ -113,7 +113,8 @@ spdf <- sf::st_sf(response_df, geometry = polys) # Create raster stack r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) terra::ext(r) <- terra::ext(spdf) -r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x \%\% n_pixels_per_side != 0, x \%\% n_pixels_per_side, n_pixels_per_side), 3)) +r[] <- sapply(1:terra::ncell(r), function(x){ +rnorm(1, ifelse(x \%\% n_pixels_per_side != 0, x \%\% n_pixels_per_side, n_pixels_per_side), 3))} r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) terra::ext(r2) <- terra::ext(spdf) r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) From 166b08509e3724718aed564777afde47ddd19202 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 7 Nov 2023 09:39:22 +0000 Subject: [PATCH 166/168] improve vignette plot --- vignettes/disaggregation.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index ace52cf..9892b98 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -57,8 +57,8 @@ polygons <- sf::st_as_sf(NYleukemia$spatial.polygon) df <- cbind(polygons, NYleukemia$data) -ggplot() + geom_sf(data = df, aes(fill = cases / population)) +ggplot() + geom_sf(data = df, aes(fill = cases / population)) + scale_fill_viridis_c(lim = c(0, 0.003)) ``` From 3b80068ec213e373c1c73f911af597b09cf13b24 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 7 Nov 2023 12:00:13 +0000 Subject: [PATCH 167/168] add missing suggest --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 85351f8..c49bce4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,6 +35,7 @@ Imports: Additional_repositories: https://inla.r-inla-download.org/R/stable Suggests: testthat, + INLA, knitr, rmarkdown, SpatialEpi From d8a5e1ba7b38ccbb4df3628c5bcf2eefb6596bc8 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 7 Nov 2023 13:42:47 +0000 Subject: [PATCH 168/168] tweak priors --- tests/testthat/test-summary.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-summary.R b/tests/testthat/test-summary.R index 56961be..0538441 100644 --- a/tests/testthat/test-summary.R +++ b/tests/testthat/test-summary.R @@ -74,7 +74,7 @@ test_that("Check summary.disag_predictions function works as expected", { prior_rho_prob = 0.01, prior_sigma_max = 0.1, prior_sigma_prob = 0.01, - prior_iideffect_sd_max = 0.0001, + prior_iideffect_sd_max = 0.00001, prior_iideffect_sd_prob = 0.01)) pred <- predict(result)