From 76e252f3c618fbb2e3fd62e0f4f112815393752c Mon Sep 17 00:00:00 2001 From: Rohan Sasne Date: Tue, 11 Jul 2023 07:42:32 -0500 Subject: [PATCH 01/68] ADd --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3c27cf84d80..13cec351f3b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,6 +9,7 @@ on: tags: - '*' + pull_request: merge_group: From 3a654067c61f932bc3fe45b165ffaedf0d478f58 Mon Sep 17 00:00:00 2001 From: Rohan Sasne Date: Tue, 11 Jul 2023 07:45:08 -0500 Subject: [PATCH 02/68] ADd --- .github/workflows/ci.yml | 124 ++------------------------------------- 1 file changed, 6 insertions(+), 118 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 13cec351f3b..0d877dae00d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,12 +5,13 @@ on: branches: - master - develop + - optimiseCI tags: - '*' - - pull_request: + + pull_request: merge_group: @@ -83,70 +84,14 @@ jobs: # ---------------------------------------------------------------------- # R CHECK # ---------------------------------------------------------------------- - check_base: - if: github.event_name != 'issue_comment' || startsWith(github.event.comment.body, '/build') - runs-on: ubuntu-latest - - strategy: - fail-fast: false - matrix: - R: - - "4.0" - - "4.1" - - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - _R_CHECK_LENGTH_1_CONDITION_: true - _R_CHECK_LENGTH_1_LOGIC2_: true - # Avoid compilation check warnings that come from the system Makevars - # See https://stat.ethz.ch/pipermail/r-package-devel/2019q2/003898.html - _R_CHECK_COMPILATION_FLAGS_KNOWN_: -Wformat -Werror=format-security -Wdate-time - # Keep R checks from trying to consult the very flaky worldclockapi.com - _R_CHECK_SYSTEM_CLOCK_: 0 - - container: - image: pecan/depends:R${{ matrix.R }} - - steps: - # checkout source code - - name: work around https://github.com/actions/checkout/issues/766 - run: git config --global --add safe.directory "$GITHUB_WORKSPACE" - - uses: actions/checkout@v3 - with: - set-safe-directory: false - - # Forbid spaces in names. Yes, *we* know it's not 1980 anymore, but Make doesn't. - - name: check for filenames that would confuse Make - run: | - SPACENAMES=`find . -name '* *'` - if [ -n "$SPACENAMES" ]; then - echo "::error file=${SPACENAMES}::Spaces in filename(s): ${SPACENAMES}. Please rename these files by converting spaces to underscores." - exit 1 - fi - - # install additional tools needed - - name: install utils - run: apt-get update && apt-get install -y postgresql-client qpdf - - name: install new dependencies - run: Rscript scripts/generate_dependencies.R && Rscript docker/depends/pecan.depends.R - - # run PEcAn checks - - name: check - run: make -j1 check_base - env: - REBUILD_DOCS: "FALSE" - RUN_TESTS: "FALSE" - - - name: check for out-of-date files - uses: infotroph/tree-is-clean@v1 - - check_modules: + check: if: github.event_name != 'issue_comment' || startsWith(github.event.comment.body, '/build') runs-on: ubuntu-latest strategy: fail-fast: false matrix: + package: [check_base, check_modules, check_models] R: - "4.0" - "4.1" @@ -189,7 +134,7 @@ jobs: # run PEcAn checks - name: check - run: make -j1 check_modules + run: make -j1 ${{ matrix.package }} env: REBUILD_DOCS: "FALSE" RUN_TESTS: "FALSE" @@ -197,63 +142,6 @@ jobs: - name: check for out-of-date files uses: infotroph/tree-is-clean@v1 - check_models: - if: github.event_name != 'issue_comment' || startsWith(github.event.comment.body, '/build') - runs-on: ubuntu-latest - - strategy: - fail-fast: false - matrix: - R: - - "4.0" - - "4.1" - - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - _R_CHECK_LENGTH_1_CONDITION_: true - _R_CHECK_LENGTH_1_LOGIC2_: true - # Avoid compilation check warnings that come from the system Makevars - # See https://stat.ethz.ch/pipermail/r-package-devel/2019q2/003898.html - _R_CHECK_COMPILATION_FLAGS_KNOWN_: -Wformat -Werror=format-security -Wdate-time - # Keep R checks from trying to consult the very flaky worldclockapi.com - _R_CHECK_SYSTEM_CLOCK_: 0 - - container: - image: pecan/depends:R${{ matrix.R }} - - steps: - # checkout source code - - name: work around https://github.com/actions/checkout/issues/766 - run: git config --global --add safe.directory "$GITHUB_WORKSPACE" - - uses: actions/checkout@v3 - with: - set-safe-directory: false - - # Forbid spaces in names. Yes, *we* know it's not 1980 anymore, but Make doesn't. - - name: check for filenames that would confuse Make - run: | - SPACENAMES=`find . -name '* *'` - if [ -n "$SPACENAMES" ]; then - echo "::error file=${SPACENAMES}::Spaces in filename(s): ${SPACENAMES}. Please rename these files by converting spaces to underscores." - exit 1 - fi - - # install additional tools needed - - name: install utils - run: apt-get update && apt-get install -y postgresql-client qpdf - - name: install new dependencies - run: Rscript scripts/generate_dependencies.R && Rscript docker/depends/pecan.depends.R - - # run PEcAn checks - - name: check - run: make -j1 check_models - env: - REBUILD_DOCS: "FALSE" - RUN_TESTS: "FALSE" - - name: check for out-of-date files - uses: infotroph/tree-is-clean@v1 - - # ---------------------------------------------------------------------- # SIPNET TESTS # ---------------------------------------------------------------------- From a76d0b08c97bcbeb8afbf16ab009f44288a4020a Mon Sep 17 00:00:00 2001 From: Rohan Sasne Date: Tue, 11 Jul 2023 08:05:45 -0500 Subject: [PATCH 03/68] Optimize CI pipeline --- .github/workflows/ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0d877dae00d..b41779ce615 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,12 +5,10 @@ on: branches: - master - develop - - optimiseCI tags: - '*' - pull_request: merge_group: @@ -133,6 +131,7 @@ jobs: run: Rscript scripts/generate_dependencies.R && Rscript docker/depends/pecan.depends.R # run PEcAn checks + # The package names of base, modules, and models are passed as matrix variables to avoid repeatability of code - name: check run: make -j1 ${{ matrix.package }} env: From b82af96a93e448f4eecc80ee3494d4c032558754 Mon Sep 17 00:00:00 2001 From: Rohan Sasne Date: Tue, 11 Jul 2023 08:06:33 -0500 Subject: [PATCH 04/68] Optimize CI pipeline --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b41779ce615..7b9c0f6a6e5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,7 +9,7 @@ on: tags: - '*' - pull_request: + pull_request: merge_group: From 363b42c19f62915a964de675aa1ef2415d95e94a Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 14:23:13 -0400 Subject: [PATCH 05/68] Add the matrix_network and the GrabFillMatrix functions into the utils package. --- base/utils/NAMESPACE | 2 + base/utils/R/matrix_operation.R | 77 ++++++++++++++++++++++++++++++++ base/utils/man/GrabFillMatrix.Rd | 24 ++++++++++ base/utils/man/matrix_network.Rd | 20 +++++++++ 4 files changed, 123 insertions(+) create mode 100644 base/utils/R/matrix_operation.R create mode 100644 base/utils/man/GrabFillMatrix.Rd create mode 100644 base/utils/man/matrix_network.Rd diff --git a/base/utils/NAMESPACE b/base/utils/NAMESPACE index 300bd155fbb..fc0b6306428 100644 --- a/base/utils/NAMESPACE +++ b/base/utils/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(GrabFillMatrix) export(arrhenius.scaling) export(as.sequence) export(bugs.rdist) @@ -27,6 +28,7 @@ export(listToArgString) export(load.modelpkg) export(load_local) export(match_file) +export(matrix_network) export(mcmc.list2init) export(misc.are.convertible) export(misc.convert) diff --git a/base/utils/R/matrix_operation.R b/base/utils/R/matrix_operation.R new file mode 100644 index 00000000000..5567e153aac --- /dev/null +++ b/base/utils/R/matrix_operation.R @@ -0,0 +1,77 @@ +##' @title GrabFillMatrix +##' @name GrabFillMatrix +##' @author Dongchen Zhang +##' +##' @param M source matrix that will be either subtracted or filled in. +##' @param ind vector of index that of where to be subtracted or filled in. +##' @param M1 additional matrix used to fill in the source matrix, the default it NULL. +##' @details This function helps subtract or fill in a matrix given the index. +##' +##' @export +GrabFillMatrix <- function (M, ind, M1 = NULL) { + if (is.null(M1)) { + #grab a sub-matrix + m <- matrix(NA, length(ind), length(ind)) + for (i in seq_along(ind)) { + for (j in seq_along(ind)) { + m[i, j] <- M[ind[i], ind[j]] + } + } + } else { + #fill into a larger matrix + m <- M + for (i in seq_along(ind)) { + for (j in seq_along(ind)) { + m[ind[i], ind[j]] <- M1[i, j] + } + } + } + m +} + +##' @title matrix_network +##' @name matrix_network +##' @author Dongchen Zhang +##' +##' @param mat a boolean matrix representing the interactions between any sites. +##' +##' @return It returns lists of index representing each network. +##' +##' @export +matrix_network <- function (mat) { + #initialize the final returned list. + vec_group <- vector("list", ncol(mat)) + #initialize the vector for sites that are completed. + sites.complete <- c() + for (i in 1:ncol(mat)) { + #if we already completed the ith site, go next. + if (i %in% sites.complete) { + next + } + #initialize the arguments for the while loop. + vec <- c() + stop <- FALSE + inits <- i + #while loop + while (!stop) { + Inits <- c() + for (init in inits) { + Inits <- c(Inits, which(mat[init,])) + } + Inits <- Inits[which(!Inits %in% vec)] + vec <- c(vec, Inits) + #if we don't have any new site that belongs to this network. + if (length(Inits) == 0) { + #then stop. + stop <- !stop + } else { + #else we initialize a new round of searching by new sites. + inits <- Inits + } + } + sites.complete <- c(sites.complete, vec) + vec_group[[i]] <- sort(vec) + } + vec_group[sapply(vec_group, is.null)] <- NULL + return(vec_group) +} \ No newline at end of file diff --git a/base/utils/man/GrabFillMatrix.Rd b/base/utils/man/GrabFillMatrix.Rd new file mode 100644 index 00000000000..1f1ee4f6c58 --- /dev/null +++ b/base/utils/man/GrabFillMatrix.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/matrix_operation.R +\name{GrabFillMatrix} +\alias{GrabFillMatrix} +\title{GrabFillMatrix} +\usage{ +GrabFillMatrix(M, ind, M1 = NULL) +} +\arguments{ +\item{M}{source matrix that will be either subtracted or filled in.} + +\item{ind}{vector of index that of where to be subtracted or filled in.} + +\item{M1}{additional matrix used to fill in the source matrix, the default it NULL.} +} +\description{ +GrabFillMatrix +} +\details{ +This function helps subtract or fill in a matrix given the index. +} +\author{ +Dongchen Zhang +} diff --git a/base/utils/man/matrix_network.Rd b/base/utils/man/matrix_network.Rd new file mode 100644 index 00000000000..d07b08bb94f --- /dev/null +++ b/base/utils/man/matrix_network.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/matrix_operation.R +\name{matrix_network} +\alias{matrix_network} +\title{matrix_network} +\usage{ +matrix_network(mat) +} +\arguments{ +\item{mat}{a boolean matrix representing the interactions between any sites.} +} +\value{ +It returns lists of index representing each network. +} +\description{ +matrix_network +} +\author{ +Dongchen Zhang +} From ee5ce6736c1b9987c34ebc1807029cfcbb7ad643 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 14:23:50 -0400 Subject: [PATCH 06/68] Update the documentation. --- .../02_hidden_analyses.Rmd | 160 +++++++++++++++--- book_source/03_topical_pages/03_pecan_xml.Rmd | 2 + 2 files changed, 142 insertions(+), 20 deletions(-) diff --git a/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd b/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd index dd60e020fc6..b134bf850eb 100644 --- a/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd +++ b/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd @@ -292,8 +292,49 @@ where we calculate the mean of the process covariance matrix $\left(\bar{\boldsy Users have control over how they think is the best way to estimate $Q$. Our code will look for the tag `q.type` in the XML settings under `state.data.assimilation` which can take 3 values of Single, PFT or Site. If `q.type` is set to single then one value of process variance will be estimated across all different sites or PFTs. On the other hand, when q.type` is set to Site or PFT then a process variance will be estimated for each site or PFT at a cost of more time and computation power. #### Multi-site State data assimilation. +`sda.enkf.multisite` is housed within: `/pecan/modules/assim.sequential/R` + +The 4-site tutorial is housed within: `~/pecan/modules/assim.sequential/inst/MultiSite-Exs` + +#### **sda.enkf.multisite.R Description** `sda.enkf.multisite` function allows for assimilation of observed data at multiple sites at the same time. In order to run a multi-site SDA, one needs to send a multisettings pecan xml file to this function. This multisettings xml file needs to contain information required for running at least two sites under `run` tag. The code will automatically run the ensembles for all the sites and reformats the outputs matching the required formats for analysis step. +#### **sda.enkf.multisite.R Arguments** +* settings - (required) [State Data Assimilation Tags Example] settings object + +* obs.mean - (required) List of sites named by site ids, which contains dataframe for observation means, named with observation datetime. + +* obs.cov - (required) List of sites named by site ids, which contains dataframe for observation covariance matrix, named with observation datetime. + +* Q - (optional) Process covariance matrix given if there is no data to estimate it. Defualt is NULL. + +* restart - (optional) Used for iterative updating previous forecasts. Default NULL. List object includes file path to previous runs and start date for SDA. Defualt is NULL. + +* pre_enkf_params - (optional) Used for carrying out SDA with pre-existed enkf.params, in which the Pf, aqq, and bqq can be used for the analysis step. Defualt is NULL. +* ensemble.samples - (optional) Pass ensemble.samples from outside to avoid GitHub check issues. Defualt is NULL. + +* control - (optional) List of flags controlling the behaviour of the SDA. Default is as follows: +``` +control=list(trace = TRUE, + FF = FALSE, + interactivePlot = FALSE, + TimeseriesPlot = FALSE, + BiasPlot = FALSE, + plot.title = NULL, + facet.plots = FALSE, + debug = FALSE, + pause = FALSE, + Profiling = FALSE, + OutlierDetection=FALSE, + parallel_qsub = TRUE, + free_run = FALSE, + send_email = NULL, + keepNC = TRUE, + forceRun = TRUE, + run_parallel = TRUE) +``` + +#### **obs.mean and obs.cov Description** The observed mean and cov needs to be formatted as list of different dates with observations. For each element of this list also there needs to be a list with mean and cov matrices of different sites named by their siteid. In case that zero variance was estimated for a variable inside the obs.cov, the SDA code will automatically replace that with half of the minimum variance from other non-zero variables in that time step. This would look like something like this: @@ -324,34 +365,113 @@ $`2010/12/31`$`1000000651` [1,] 15.2821691 0.513584319 [2,] 0.1213583 0.001162113 ``` -An example of multi-settings pecan xml file also may look like below: +#### Anlysis SDA workflow +Before running the SDA analysis functions, the ensemble forecast results have to be generated, and arguments such as H matrix, MCMC arguments, and multi-site Y and R (by `Construct.R` function) have to be generated as well. + +* Decide which analysis function to be used. Here we have three options: 1) traditional ensemble Kalman Filter (`EnKF.MultiSite`) with analytical solution; 2) generalized ensemble Kalman Filter (`GEF.MultiSite`); and 3) block-based generalized ensemble Kalman Filter (`analysis_sda_block`). The latter two methods support the process variance across space and time. To choose the analysis method 1, we need to set the `process.variance` as FALSE. Otherwise, if we set the `process.variance` as TRUE and provide the `q.type` as either `SINGLE` or `SITE` the 2 method will be used, and if we provide the `q.type` as either `vector` or `wishart` the 3 method will be used. + +* If we decide to use `EnKF.MultiSite`, then the analysis results will be calculated based on equations. + +* If we decide to use `GEF.MultiSite`, then it will first do censoring process based on the how you setup the `censored.data` flag within settings xml file. Then, if t equals to 1, we will first initialize the `aqq`, `bqq`, `aq`, and `bq` based on how you setup the `q.type` argument within settings xml file. After preparing the initial conditions (ICs), data, and constants for the `GEF.MultiSite.Nimble` nimble model, the MCMC sampling will happen afterwards. Finally, the process variance and the analysis results will be calculated and updated and returned to the `sda.enkf.multisite` function. + +*If we decide to use `analysis_sda_block`, then if t equals to 1, we will need to initialize the overall block-based MCMC lists (`block.list.all`). After that, we will need to specify the `MCMC_args` either from the `control` list or by default (see bellow). Next, within the `analysis_sda_block` function, it will first use the `build.block.xy` function to initialize the block lists based on each site or underlying networks between sites, and then split long vectors or large matrices (`mu.f`, `y.censored`, or `Pf`) into each block and fill in the data (except for process variance) and constants for each block. Then, it will update the process variance using the `update_q` function, if t equals to 1, then it will initialize the arguments for the process variance, otherwilse, it will grab the previous updated process variance. After that, in the `MCMC_Init` function, it will creates the initial conditions (ICs) for the MCMC sampling based on randomly sampled data. Then, the completed block-based arguments (`block.list.all`) will be used by the `MCMC_block_function` function under the parallel mode. Finally, the block-based results will be converted into long vectors and large matrices by the `block.2.vector` function and values such as `block.list.all`, `mu.f`, `mu.a`, `Pf`, `Pa` will be returned back to the `sda.enkf.multisite` function. +``` +MCMC.args <- list(niter = 1e5, + nthin = 10, + nchain = 1, + nburnin = 1e4) +``` + + +#### Example of multi-settings pecan xml file ``` - FALSE - TRUE - - 1000000040 - 1000013298 - - + TRUE + FALSE + TRUE + FALSE + TRUE + FALSE + sipnet.out + SINGLE + FALSE + Local.support + 1 + 5 + - GWBI - KgC/m^2 - 0 - 9999 + AbvGrndWood + MgC/ha + 0 + 9999 - AbvGrndWood - KgC/m^2 - 0 - 9999 + LAI + + 0 + 9999 - - 1960/01/01 - 2000/12/31 - + + SoilMoistFrac + + 0 + 100 + + + TotSoilCarb + kg/m^2 + 0 + 9999 + + + + + /projectnb/dietzelab/dongchen/Multi-site/download_500_sites/AGB + TRUE + TRUE + + year + 1 + + + + 30 + TRUE + TRUE + + year + 1 + + + + 30 + TRUE + FALSE + + year + 1 + + + + + year + 1 + + + /projectnb/dietzelab/dongchen/All_NEON_SDA/test_OBS + 2012-07-15 + 2021-07-15 + + + 2004/01/01 + 2006/12/31 + + 1 + 2004/01/01 + 2006/12/31 + -1 diff --git a/book_source/03_topical_pages/03_pecan_xml.Rmd b/book_source/03_topical_pages/03_pecan_xml.Rmd index 442c3189d37..57aef029e4b 100644 --- a/book_source/03_topical_pages/03_pecan_xml.Rmd +++ b/book_source/03_topical_pages/03_pecan_xml.Rmd @@ -757,6 +757,7 @@ The following tags can be used for state data assimilation. More detailed inform FALSE sipnet.out SINGLE + FALSE Local.support 1 5 @@ -842,6 +843,7 @@ The following tags can be used for state data assimilation. More detailed inform * **NC.Overwrite** : [optional] Bool variable decide if you want to overwrite the previous netcdf file when there is a overlap in time, the default is FALSE. * **NC.Prefix** : [optional] The prefix for the generation of the full-year netcdf file, the default is sipnet.out. * **q.type** : [optional] The type of process variance that will be estimated, the default is SINGLE. +* **by.site** : [optional] The flag, that is exclusively used by the `analysis_sda_block` function, decide if we want to consider the underlying networks between sites. The default is FALSE. * **Localization.FUN** : [optional] The localization function name for the localization operation, the default is Local.support. * **scalef** : [optional] The scale parameter used for the localization operation, the smaller the value is, the sites are more isolated. * **chains** : [optional] The number of chains needed to be estimated during the MCMC sampling process. From 5282ab785efa5bb15049f8de5f24aec6389373f1 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 14:25:41 -0400 Subject: [PATCH 07/68] Add the namespace for the assimSeqential package and the construct_nimble_H function. --- modules/assim.sequential/NAMESPACE | 3 + .../R/Multi_Site_Constructors.R | 93 +++++++++++++++++++ .../man/construct_nimble_H.Rd | 29 ++++++ 3 files changed, 125 insertions(+) create mode 100644 modules/assim.sequential/man/construct_nimble_H.Rd diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE index 6c567538bce..c352ba52faa 100644 --- a/modules/assim.sequential/NAMESPACE +++ b/modules/assim.sequential/NAMESPACE @@ -25,6 +25,7 @@ export(alr) export(assessParams) export(block_matrix) export(conj_wt_wishart_sampler) +export(construct_nimble_H) export(dwtmnorm) export(generate_colors_sda) export(get_ensemble_weights) @@ -57,6 +58,8 @@ export(y_star_create) import(furrr) import(lubridate) import(nimble) +importFrom(dplyr,"%>%") +importFrom(foreach,"%dopar%") importFrom(lubridate,"%m+%") importFrom(magrittr,"%>%") importFrom(rlang,.data) diff --git a/modules/assim.sequential/R/Multi_Site_Constructors.R b/modules/assim.sequential/R/Multi_Site_Constructors.R index c8eeba37af3..1e417ec26e2 100755 --- a/modules/assim.sequential/R/Multi_Site_Constructors.R +++ b/modules/assim.sequential/R/Multi_Site_Constructors.R @@ -200,3 +200,96 @@ Construct.H.multisite <- function(site.ids, var.names, obs.t.mean){ } H } + +##' @title construct_nimble_H +##' @name construct_nimble_H +##' @author Dongchen Zhang +##' +##' @param site.ids a vector name of site ids +##' @param var.names vector names of state variable names +##' @param obs.t list of vector of means for the time t for different sites. +##' @param pft.path physical path to the pft.csv file. +##' @param by criteria, it supports by variable, site, pft, all, and single Q. +##' +##' @description This function is an upgrade to the Construct.H.multisite function which provides the index by different criteria. +##' +##' @return Returns one vector containing index for which Q to be estimated for which variable, +##' and the other vector gives which state variable has which observation (= element.W.Data). +##' @export +construct_nimble_H <- function(site.ids, var.names, obs.t, pft.path = NULL, by = "single"){ + if(by == "pft" | by == "block_pft_var" & is.null(pft.path)){ + PEcAn.logger::logger.info("please provide pft path.") + return(0) + } + H <- Construct.H.multisite(site.ids, var.names, obs.t) + if (by == "var") { + total_var_name <- rep(var.names, length(site.ids)) + Ind <- rep(0, dim(H)[2]) + for (i in seq_along(var.names)) { + Ind[which(total_var_name == var.names[i])] <- i + } + } else if (by == "site") { + total_site_id <- rep(site.ids, each = length(var.names)) + Ind <- rep(0, dim(H)[2]) + for (i in seq_along(site.ids)) { + Ind[which(total_site_id == site.ids[i])] <- i + } + } else if (by == "pft") { + pft <- read.csv(pft.path) + rownames(pft) <- pft$site + total_site_id <- rep(site.ids, each = length(var.names)) + total_pft <- pft[total_site_id, 2] + Ind <- rep(0, dim(H)[2]) + pft.names <- sort(unique(pft$pft)) + for (i in seq_along(pft.names)) { + Ind[which(total_pft == pft.names[i])] <- i + } + } else if (by == "block_pft_var") { + #by pft + pft <- read.csv(pft.path) + rownames(pft) <- pft$site + total_site_id <- rep(site.ids, each = length(var.names)) + total_pft <- pft[total_site_id, 2] + Ind_pft <- rep(0, dim(H)[2]) + pft.names <- sort(unique(pft$pft)) + for (i in seq_along(pft.names)) { + Ind_pft[which(total_pft == pft.names[i])] <- i + } + #by var + total_var_name <- rep(var.names, length(site.ids)) + Ind_var <- rep(0, dim(H)[2]) + for (i in seq_along(var.names)) { + Ind_var[which(total_var_name == var.names[i])] <- i + } + #by site + total_site_id <- rep(site.ids, each = length(var.names)) + Ind_site <- rep(0, dim(H)[2]) + for (i in seq_along(site.ids)) { + Ind_site[which(total_site_id == site.ids[i])] <- i + } + # #create reference to which block and which var + # #Ind for which site should use which block + # block.index <- var.index <- Ind_site + # for (i in seq_along(Ind_site)) { + # Ind_block[i] <- Ind_pft[i] + # } + } else if (by == "all") { + Ind <- 1:dim(H)[2] + } else if (by == "single") { + Ind <- rep(1, dim(H)[2]) + } else { + PEcAn.logger::logger.info("Couldn't find the proper by argument!") + return(0) + } + if (by == "block_pft_var") { + return(list(Ind_pft = Ind_pft[which(apply(H, 2, sum) == 1)], + Ind_site = Ind_site[which(apply(H, 2, sum) == 1)], + Ind_var = Ind_var[which(apply(H, 2, sum) == 1)], + H.ind = which(apply(H, 2, sum) == 1))) + } else { + return(list(Q.ind = Ind[which(apply(H, 2, sum) == 1)], + H.ind = which(apply(H, 2, sum) == 1), + H.matrix = H)) + } + +} \ No newline at end of file diff --git a/modules/assim.sequential/man/construct_nimble_H.Rd b/modules/assim.sequential/man/construct_nimble_H.Rd new file mode 100644 index 00000000000..f7705188a1b --- /dev/null +++ b/modules/assim.sequential/man/construct_nimble_H.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Multi_Site_Constructors.R +\name{construct_nimble_H} +\alias{construct_nimble_H} +\title{construct_nimble_H} +\usage{ +construct_nimble_H(site.ids, var.names, obs.t, pft.path = NULL, by = "single") +} +\arguments{ +\item{site.ids}{a vector name of site ids} + +\item{var.names}{vector names of state variable names} + +\item{obs.t}{list of vector of means for the time t for different sites.} + +\item{pft.path}{physical path to the pft.csv file.} + +\item{by}{criteria, it supports by variable, site, pft, all, and single Q.} +} +\value{ +Returns one vector containing index for which Q to be estimated for which variable, +and the other vector gives which state variable has which observation (= element.W.Data). +} +\description{ +This function is an upgrade to the Construct.H.multisite function which provides the index by different criteria. +} +\author{ +Dongchen Zhang +} From 57d0b07246a4ff474045011a11e5cb703c892890 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 14:26:35 -0400 Subject: [PATCH 08/68] Add the script for the block-based SDA workflow. --- .../assim.sequential/R/Analysis_sda_block.R | 493 ++++++++++++++++++ 1 file changed, 493 insertions(+) create mode 100644 modules/assim.sequential/R/Analysis_sda_block.R diff --git a/modules/assim.sequential/R/Analysis_sda_block.R b/modules/assim.sequential/R/Analysis_sda_block.R new file mode 100644 index 00000000000..b24ff3881b8 --- /dev/null +++ b/modules/assim.sequential/R/Analysis_sda_block.R @@ -0,0 +1,493 @@ +##' @title analysis_sda_block +##' @name analysis_sda_block +##' @author Dongchen Zhang +##' +##' @param settings pecan standard multi-site settings list. +##' @param block.list.all List contains nt empty sub-elements. +##' @param X A matrix contains ensemble forecasts. +##' @param obs.mean List of dataframe of observation means, named with observation datetime. +##' @param obs.cov List of covariance matrices of state variables , named with observation datetime. +##' @param t time point. +##' @param nt total length of time steps. +##' @param MCMC.args arguments for the MCMC sampling. +##' @details This function will add data and constants into each block that are needed for the MCMC sampling. +##' +##' @description This function provides the block-based MCMC sampling approach. +##' +##' @return It returns the `build.block.xy` object and the analysis results. +##' @importFrom foreach %dopar% +##' @importFrom dplyr %>% +analysis_sda_block <- function (settings, block.list.all, X, obs.mean, obs.cov, t, nt, MCMC.args) { + #convert from vector values to block lists. + if ("try-error" %in% class(try(block.results <- build.block.xy(settings = settings, + block.list.all = block.list.all, + X = X, + obs.mean = obs.mean, + obs.cov = obs.cov, + t = t)))) { + PEcAn.logger::logger.error("Something wrong within the build.block.xy function.") + } + #grab block.list and H from the results. + block.list.all <- block.results[[1]] + H <- block.results[[2]] + Y <- block.results[[3]] + R <- block.results[[4]] + + #update q. + if ("try-error" %in% class(try(block.list.all <- update_q(block.list.all, t, nt)))) { + PEcAn.logger::logger.error("Something wrong within the update_q function.") + } + + #add initial conditions. + if ("try-error" %in% class(try(block.list.all[[t]] <- MCMC_Init(block.list.all[[t]], X)))) { + PEcAn.logger::logger.error("Something wrong within the MCMC_Init function.") + } + + #update MCMC args. + block.list.all[[t]] <- block.list.all[[t]] %>% + purrr::map(function(l){ + l$MCMC <- MCMC.args + l + }) + + #parallel for loop over each block. + #initialize parallel settings. + cores <- parallel::detectCores() + cl <- parallel::makeCluster(cores) + doSNOW::registerDoSNOW(cl) + + #progress bar + pb <- utils::txtProgressBar(min=1, max=length(block.list.all[[t]]), style=3) + progress <- function(n) utils::setTxtProgressBar(pb, n) + opts <- list(progress=progress) + PEcAn.logger::logger.info(paste0("Running MCMC ", "for ", length(block.list.all[[t]]), " blocks")) + + if ("try-error" %in% class(try(block.list.all[[t]] <- foreach::foreach(block = block.list.all[[t]], .packages="Kendall", .options.snow=opts) %dopar% { + library(nimble) + return(MCMC_block_function(block)) + }))) { + PEcAn.logger::logger.error("Something wrong within the MCMC_block_function function.") + } + + #convert from block lists to vector values. + V <- block.2.vector(block.list.all[[t]], X, H) + + #return values + return(list(block.list.all = block.list.all, + mu.f = V$mu.f, + Pf = V$Pf, + mu.a = V$mu.a, + Pa = V$Pa, + Y = Y, + R = R)) +} + +##' @title build.block.xy +##' @name build.block.xy +##' @author Dongchen Zhang +##' +##' @param settings pecan standard multi-site settings list. +##' @param block.list.all List contains nt empty sub-elements. +##' @param X A matrix contains ensemble forecasts. +##' @param obs.mean List of dataframe of observation means, named with observation datetime. +##' @param obs.cov List of covariance matrices of state variables , named with observation datetime. +##' @param t time point. +##' @details This function will add data and constants into each block that are needed for the MCMC sampling. +##' +##' @description This function split long vector and covariance matrix into blocks corresponding to the localization. +##' +##' @return It returns the `build.block.xy` object with data and constants filled in. +build.block.xy <- function(settings, block.list.all, X, obs.mean, obs.cov, t) { + #set q.type from settings. + if (settings$state.data.assimilation$q.type == "vector") { + q.type <- 1 + } else if (settings$state.data.assimilation$q.type == "wishart") { + q.type <- 2 + } + + #grab basic arguments based on X. + site.ids <- unique(attributes(X)$Site) + var.names <- unique(attributes(X)$dimnames[[2]]) + mu.f <- colMeans(X) + Pf <- stats::cov(X) + diag(Pf)[which(diag(Pf)==0)] <- min(diag(Pf)[which(diag(Pf) != 0)])/5 #fixing det(Pf)==0 + + #distance calculations and localization + if (!is.null(settings$state.data.assimilation$Localization.FUN)) { + Localization.FUN <- get(settings$state.data.assimilation$Localization.FUN) + site.locs <- settings$run %>% + purrr::map('site') %>% + purrr::map_dfr(~c(.x[['lon']],.x[['lat']]) %>% as.numeric)%>% + t %>% + `colnames<-`(c("Lon","Lat")) %>% + `rownames<-`(site.ids) + #Finding the distance between the sites + dis.matrix <- sp::spDists(site.locs, longlat = TRUE) + #turn that into a blocked matrix format + blocked.dis <- block_matrix(dis.matrix %>% as.numeric(), rep(length(var.names), length(site.ids))) + Pf <- Localization.FUN(Pf, blocked.dis, settings$state.data.assimilation$scalef %>% as.numeric()) + } + + #Handle observation + Obs.cons <- Construct.R(site.ids, var.names, obs.mean[[t]], obs.cov[[t]]) + Y <- Obs.cons$Y + R <- Obs.cons$R + #create matrix the describes the support for each observed state variable at time t + min_max <- settings$state.data.assimilation$state.variables %>% + purrr::map(function(state.variable){ + c(as.numeric(state.variable$min_value), + as.numeric(state.variable$max_value)) + }) %>% unlist() %>% as.vector() %>% + matrix(length(settings$state.data.assimilation$state.variables), 2, byrow = T) %>% + `rownames<-`(var.names) + #Create y.censored and y.ind + #describing if the obs are within the defined range. + y.ind <- y.censored <- c() + for (i in seq_along(Y)) { + if (Y[i] > min_max[names(Y[i]), 1]) { + y.ind[i] = 1; y.censored[i] = Y[i] + } else {y.ind[i] <- y.censored[i] <- 0} + } + #observation number per site + obs_per_site <- obs.mean[[t]] %>% + purrr::map(function(site.obs){length(site.obs)}) %>% + unlist() + #create H + H <- construct_nimble_H(site.ids = site.ids, + var.names = var.names, + obs.t = obs.mean[[t]], + pft.path = settings[[1]]$run$inputs$pft.site$path, + by = "block_pft_var") + + #start the blocking process + #should we consider interactions between sites? + if(as.logical(settings$state.data.assimilation$by.site)){ + block.list <- vector("list", length(site.ids)) + #loop over sites + for (i in seq_along(site.ids)) { + #store which block contains which sites. + block.list[[i]]$sites.per.block <- i + block.list[[i]]$site.ids <- site.ids[i] + block.list[[i]]$t <- t + + #fill in mu.f and Pf + f.start <- (i - 1) * length(var.names) + 1 + f.end <- i * length(var.names) + block.list[[i]]$data$muf <- mu.f[f.start:f.end] + block.list[[i]]$data$pf <- Pf[f.start:f.end, f.start:f.end] + + #fill in y and r + y.start <- obs_per_site[i] * (i - 1) + 1 + y.end <- obs_per_site[i] * i + block.list[[i]]$data$y.censored <- y.censored[y.start:y.end] + block.list[[i]]$data$r <- solve(R[y.start:y.end, y.start:y.end]) + + #fill in constants. + block.h <- Construct.H.multisite(site.ids[i], var.names, obs.mean[[t]]) + block.list[[i]]$constant$H <- which(apply(block.h, 2, sum) == 1) + block.list[[i]]$constant$N <- length(f.start:f.end) + block.list[[i]]$constant$YN <- length(y.start:y.end) + block.list[[i]]$constant$q.type <- q.type + } + } else { + #find networks given TRUE/FALSE matrix representing sites' interactions. + block.vec <- PEcAn.utils::matrix_network(dis.matrix <= as.numeric(settings$state.data.assimilation$scalef)) + block.list <- vector("list", length(block.vec)) + #loop over sites + for (i in seq_along(block.vec)) {#i is site index + #store which block contains which sites. + ids <- block.vec[[i]] + block.list[[i]]$sites.per.block <- ids + block.list[[i]]$site.ids <- site.ids[ids] + block.list[[i]]$t <- t + y.ind <- f.ind <- c() + for (j in seq_along(ids)) { + f.start <- (ids[j] - 1) * length(var.names) + 1 + f.end <- ids[j] * length(var.names) + y.start <- obs_per_site[ids[j]] * (ids[j] - 1) + 1 + y.end <- obs_per_site[ids[j]] * ids[j] + + f.ind <- c(f.ind, f.start:f.end) + y.ind <- c(y.ind, y.start:y.end) + } + #fill in mu.f and Pf + block.list[[i]]$data$muf <- mu.f[f.ind] + block.list[[i]]$data$pf <- PEcAn.utils::GrabFillMatrix(Pf, f.ind) + + #fill in y and R + block.list[[i]]$data$y.censored <- y.censored[y.ind] + block.list[[i]]$data$r <- PEcAn.utils::GrabFillMatrix(solve(R), y.ind) + + #fill in constants + block.h <- Construct.H.multisite(site.ids[ids], var.names, obs.mean[[t]]) + block.list[[i]]$constant$H <- which(apply(block.h, 2, sum) == 1) + block.list[[i]]$constant$N <- length(f.ind) + block.list[[i]]$constant$YN <- length(y.ind) + block.list[[i]]$constant$q.type <- q.type + } + } + + #return values. + block.list.all[[t]] <- block.list + return(list(block.list.all = block.list.all, H = H, Y = Y, R = R)) +} + +##' @title MCMC_Init +##' @name MCMC_Init +##' @author Dongchen Zhang +##' +##' @param block.list lists of blocks generated by the `build.block.xy` function. +##' @param X A matrix contains ensemble forecasts. +##' @details This function helps create initial conditions for the MCMC sampling. +##' +##' @return It returns the `block.list` object with initial conditions filled in. +MCMC_Init <- function (block.list, X) { + #sample mu.f from X. + sample.mu.f <- X[sample(seq_along(1:nrow(X)), 1),] + for (i in seq_along(block.list)) { + #number of observations. + num.obs <- length(block.list[[i]]$data$y.censored) + #loop over each site within each block + for (j in seq_along(block.list[[i]]$sites.per.block)) { + #initialize mu.f + start <- (block.list[[i]]$sites.per.block[j] - 1) * length(var.names) + 1 + end <- (block.list[[i]]$sites.per.block[j]) * length(var.names) + block.list[[i]]$Inits$X.mod <- c(block.list[[i]]$Inits$X.mod, sample.mu.f[start:end]) + #initialize X + block.list[[i]]$Inits$X <- block.list[[i]]$Inits$X.mod[block.list[[i]]$constant$H] + #initialize Xs + block.list[[i]]$Inits$Xs <- block.list[[i]]$Inits$X.mod[block.list[[i]]$constant$H] + } + #initialize q. + #if we want the vector q. + if (block.list[[i]]$constant$q.type == 1) { + for (j in seq_along(block.list[[i]]$data$y.censored)) { + block.list[[i]]$Inits$q <- c(block.list[[i]]$Inits$q, rgamma(1, shape = block.list[[i]]$data$aq[j], rate = block.list[[i]]$data$bq[j])) + } + } else if (block.list[[i]]$constant$q.type == 2) { + #if we want the wishart Q. + if ("try-error" %in% class(try(block.list[[i]]$Inits$q <- + rWishart(1, df = block.list[[i]]$data$bq, Sigma = block.list[[i]]$data$aq)[,,1], silent = T))) { + block.list[[i]]$Inits$q <- + rWishart(1, df = block.list[[i]]$data$bq, Sigma = toeplitz((block.list[[i]]$constant$YN:1)/block.list[[i]]$constant$YN))[,,1] + } + } + } + #return values. + return(block.list) +} + +##' @title MCMC_block_function +##' @name MCMC_block_function +##' @author Dongchen Zhang +##' +##' @param block each block within the `block.list` lists. +##' +##' @return It returns the `block` object with analysis results filled in. +MCMC_block_function <- function(block) { + #build nimble model + model_pred <- nimble::nimbleModel(GEF.Block.Nimble, + data = block$data, + inits = block$Inits, + constants = block$constant, + name = 'base') + #configure MCMC + conf <- nimble::configureMCMC(model_pred, print=FALSE) + conf$setMonitors(c("X", "X.mod", "q")) + + #Handle samplers + #hear we change the RW_block sampler to the ess sampler + #because it has a better performance of MVN sampling + samplerLists <- conf$getSamplers() + if (block$constant$q.type == 1) { + #if we have vector q + #only X.mod should be sampled with ess sampler. + X.mod.ind <- which(grepl("X.mod", samplerLists %>% purrr::map(~ .x$target) %>% unlist())) + samplerLists[[X.mod.ind]]$setName("ess") + } else if (block$constant$q.type == 2) { + #if we have wishart q + #everything should be sampled with ess sampler. + samplerLists %>% purrr::map(function(l){l$setName("ess")}) + } + conf$setSamplers(samplerLists) + + #compile MCMC + Rmcmc <- nimble::buildMCMC(conf) + Cmodel <- nimble::compileNimble(model_pred) + Cmcmc <- nimble::compileNimble(Rmcmc, project = model_pred, showCompilerOutput = FALSE) + + #run MCMC + dat <- runMCMC(Cmcmc, niter = block$MCMC$niter, nburnin = block$MCMC$nburnin, thin = block$MCMC$nthin, nchains = block$MCMC$nchain) + + #update aq, bq, mua, and pa + M <- colMeans(dat) + block$update$aq <- block$Inits$q + if (block$constant$q.type == 1) { + #if it's a vector q case + aq <- bq <- rep(NA, length(block$data$y.censored)) + for (i in seq_along(aq)) { + CHAR <- paste0("[", i, "]") + aq[i] <- (mean(dat[, paste0("q", CHAR)]))^2/stats::var(dat[, paste0("q", CHAR)]) + bq[i] <- mean(dat[, paste0("q", CHAR)])/stats::var(dat[, paste0("q", CHAR)]) + } + #update aqq and bqq + block$aqq[,block$t+1] <- block$aqq[, block$t] + block$aqq[block$constant$H, block$t+1] <- aq + block$bqq[,block$t+1] <- block$bqq[, block$t] + block$bqq[block$constant$H, block$t+1] <- bq + } else if (block$constant$q.type == 2) { + #previous updates + mq <- dat[, grep("q", colnames(dat))] # Omega, Precision + q.bar <- matrix(apply(mq, 2, mean), + length(block$constant$H), + length(block$constant$H) + ) + wish.df <- function(Om, X, i, j, col) { + (Om[i, j]^2 + Om[i, i] * Om[j, j]) / stats::var(X[, col]) + } + col <- matrix(1:length(block$constant$H) ^ 2, + length(block$constant$H), + length(block$constant$H)) + WV <- matrix(0, length(block$constant$H), length(block$constant$H)) + for (i in seq_along(block$constant$H)) { + for (j in seq_along(block$constant$H)) { + WV[i, j] <- wish.df(q.bar, X = mq, i = i, j = j, col = col[i, j]) + } + } + bq <- mean(WV) + if (bq < block$constant$YN) { + bq <- block$constant$YN + } + aq <- solve(q.bar) * bq + block$aqq[,,block$t+1] <- PEcAn.utils::GrabFillMatrix(block$aqq[,,block$t], block$constant$H, aq) + block$bqq[block$t+1] <- bq + + # #if it's a wishart case + # bq <- block$data$bq + # aq <- block$data$aq + # for (i in 1:dim(aq)[1]) { + # CHAR <- paste0("[", i, "]") + # for (j in 1:dim(aq)[2]) { + # aq[i, j] <- M[paste0("q[", i, ", ", j, "]")]/bq + # } + # } + # #update aqq and bqq + # block$aqq[,,block$t+1] <- PEcAn.utils::GrabFillMatrix(block$aqq[,,block$t], block$constant$H, aq) + # block$bqq[block$t+1] <- block$bqq[block$t] + } + #update mua and pa; mufa, and pfa + iX <- grep("X[", colnames(dat), fixed = TRUE) + iX.mod <- grep("X.mod[", colnames(dat), fixed = TRUE) + mua <- colMeans(dat[, iX]) + pa <- stats::cov(dat[, iX]) + mufa <- colMeans(dat[, iX.mod]) + pfa <- stats::cov(dat[, iX.mod]) + + #return values. + block$update <- list(aq = aq, bq = bq, mua = mua, pa = pa, mufa = mufa, pfa = pfa) + return(block) +} + +##' @title update_q +##' @name update_q +##' @author Dongchen Zhang +##' +##' @param block.list.all each block within the `block.list` lists. +##' @param t time point. +##' @param nt total length of time steps. +##' @param MCMC_dat data frame of MCMC samples, the default it NULL. +##' +##' @return It returns the `block.list.all` object with initialized/updated Q filled in. +update_q <- function (block.list.all, t, nt, MCMC_dat = NULL) { + block.list <- block.list.all[[t]] + #if it's an update. + if (is.null(MCMC_dat)) { + #loop over blocks + if (t == 1) { + for (i in seq_along(block.list)) { + nvar <- length(block.list[[i]]$data$muf) + nobs <- length(block.list[[i]]$data$y.censored) + if (block.list[[i]]$constant$q.type == 1) { + #initialize aqq and bqq for nt + block.list[[i]]$aqq <- array(1, dim = c(nvar, nt)) + block.list[[i]]$bqq <- array(1, dim = c(nvar, nt)) + #update aq and bq based on aqq and bqq + block.list[[i]]$data$aq <- block.list[[i]]$aqq[block.list[[i]]$constant$H, t] + block.list[[i]]$data$bq <- block.list[[i]]$bqq[block.list[[i]]$constant$H, t] + } else if (block.list[[i]]$constant$q.type == 2) { + #initialize aqq and bqq for nt + block.list[[i]]$aqq <- array(1, dim = c(nvar, nvar, nt)) + block.list[[i]]$aqq[,,t] <- toeplitz((nvar:1)/nvar) + block.list[[i]]$bqq <- rep(nobs, nt) + #update aq and bq based on aqq and bqq + block.list[[i]]$data$aq <- PEcAn.utils::GrabFillMatrix(block.list[[i]]$aqq[,,t], block.list[[i]]$constant$H) + block.list[[i]]$data$bq <- block.list[[i]]$bqq[t] + } + } + } else if (t > 1) { + #if we want to update q from previous SDA runs. + block.list.pre <- block.list.all[[t - 1]] + for (i in seq_along(block.list)) { + nvar <- length(block.list[[i]]$data$muf) + nobs <- length(block.list[[i]]$data$y.censored) + if (block.list[[i]]$constant$q.type == 1) { + #copy previous aqq and bqq to the current t + block.list[[i]]$aqq <- block.list.pre[[i]]$aqq + block.list[[i]]$bqq <- block.list.pre[[i]]$bqq + #update aq and bq + block.list[[i]]$data$aq <- block.list[[i]]$aqq[block.list[[i]]$constant$H, t] + block.list[[i]]$data$bq <- block.list[[i]]$bqq[block.list[[i]]$constant$H, t] + } else if (block.list[[i]]$constant$q.type == 2) { + #initialize aqq and bqq for nt + block.list[[i]]$aqq <- block.list.pre[[i]]$aqq + block.list[[i]]$bqq <- block.list.pre[[i]]$bqq + #if previous Q is smaller than the actual YN. + if (block.list.pre[[i]]$bqq[t] <= block.list[[i]]$constant$YN) { + block.list[[i]]$bqq[t] <- block.list[[i]]$constant$YN + } + #update aq and bq based on aqq and bqq + block.list[[i]]$data$aq <- PEcAn.utils::GrabFillMatrix(block.list[[i]]$aqq[,,t], block.list[[i]]$constant$H) + block.list[[i]]$data$bq <- block.list[[i]]$bqq[t] + } + } + } + } else { + #TODO: Implement the feature that Q can be updated based on the pft types. + } + + #return values. + block.list.all[[t]] <- block.list + return(block.list.all) +} + +##' @title block.2.vector +##' @name block.2.vector +##' @author Dongchen Zhang +##' +##' @param block.list lists of blocks generated by the `build.block.xy` function. +##' @param X A matrix contains ensemble forecasts. +##' @param H H index created by the `construct_nimble_H` function. +##' +##' @return It returns a list of analysis results by MCMC sampling. +block.2.vector <- function (block.list, X, H) { + site.ids <- attributes(X)$Site + mu.f <- mu.a <- c() + Pf <- Pa <- matrix(0, length(site.ids), length(site.ids)) + for (L in block.list) { + ind <- c() + for (id in L$site.ids) { + ind <- c(ind, which(site.ids == id)) + } + #convert mu.f and pf + mu.a[ind] <- mu.f[ind] <- L$update$mufa + Pa[ind, ind] <- Pf[ind, ind] <- L$update$pfa + #convert mu.a and pa + ind <- intersect(ind, H$H.ind) + mu.a[ind] <- L$update$mua + Pa[ind, ind] <- L$update$pa + } + return(list(mu.f = mu.f, + Pf = Pf, + mu.a = mu.a, + Pa = Pa)) +} \ No newline at end of file From dc949e8ee10453ca9ca520d7aeccad16d658c4ee Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 14:27:18 -0400 Subject: [PATCH 09/68] Added the nimble code for the block-based MCMC sampling. --- modules/assim.sequential/R/Nimble_codes.R | 36 +++++++++++++++++++ .../assim.sequential/man/GEF.Block.Nimble.Rd | 16 +++++++++ 2 files changed, 52 insertions(+) create mode 100644 modules/assim.sequential/man/GEF.Block.Nimble.Rd diff --git a/modules/assim.sequential/R/Nimble_codes.R b/modules/assim.sequential/R/Nimble_codes.R index 38929c17e25..f33f8ac2835 100644 --- a/modules/assim.sequential/R/Nimble_codes.R +++ b/modules/assim.sequential/R/Nimble_codes.R @@ -220,6 +220,42 @@ GEF.MultiSite.Nimble <- nimbleCode({ }) +#GEF.Block.Nimble--This does the block-based SDA ------------------------------------- +#' block-based TWEnF +#' @format TBD +GEF.Block.Nimble <- nimbleCode({ + #I think the blocked nimble has to be implemented and used instead of a long vector sampling. + #1) due to the convergence of X.mod. + #2) temporal efficiency. MVN sampling over a large cov matrix can be time consuming. + #4) this data structure design allows us to implement the MCMC sampling parallely. + + #sample X.mod from the forecast. + X.mod[1:N] ~ dmnorm(mean = muf[1:N], cov = pf[1:N, 1:N]) + + #here we only consider vector Q and Wishart Q. + if (q.type == 1) { + for (i in 1:YN) { + #sample Q. + q[i] ~ dgamma(shape = aq[i], rate = bq[i]) + #sample latent variable X. + X[i] ~ dnorm(X.mod[H[i]], sd = 1/sqrt(q[i])) + #likelihood + y.censored[i] ~ dnorm(X[i], sd = 1/sqrt(r[i, i])) + } + } else if (q.type == 2) { + #if it's a Wishart Q. + #sample Q. + q[1:YN, 1:YN] ~ dwishart(R = aq[1:YN, 1:YN], df = bq) + #sample latent variable X. + for (i in 1:YN) { + Xs[i] <- X.mod[H[i]] + } + X[1:YN] ~ dmnorm(Xs[1:YN], prec = q[1:YN, 1:YN]) + #likelihood + y.censored[1:YN] ~ dmnorm(X[1:YN], prec = r[1:YN, 1:YN]) + } +}) + #sampler_toggle------------------------------------------------------------------------------------------------ #' sampler toggling #' @export diff --git a/modules/assim.sequential/man/GEF.Block.Nimble.Rd b/modules/assim.sequential/man/GEF.Block.Nimble.Rd new file mode 100644 index 00000000000..2a343a1f361 --- /dev/null +++ b/modules/assim.sequential/man/GEF.Block.Nimble.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Nimble_codes.R +\docType{data} +\name{GEF.Block.Nimble} +\alias{GEF.Block.Nimble} +\title{block-based TWEnF} +\format{ +TBD +} +\usage{ +GEF.Block.Nimble +} +\description{ +block-based TWEnF +} +\keyword{datasets} From b704d7029c99a3350d1c5bd09a1c578395a26951 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 14:27:39 -0400 Subject: [PATCH 10/68] Update the script. --- .../inst/MultiSite-Exs/SDA/Create_Multi_settings.R | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R b/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R index e259db3343c..2a5f6377562 100644 --- a/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R +++ b/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R @@ -64,6 +64,7 @@ template <- PEcAn.settings::Settings(list( NC.Overwrite = FALSE, NC.Prefix = "sipnet.out", q.type = "SINGLE", + by.site = FALSE, Localization.FUN = "Local.support", scalef = 1, chains = 5, From 61f28ffe7b2b8b8fdd36e0bec79fb79dd67baf07 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 14:28:00 -0400 Subject: [PATCH 11/68] Add the MCMC_Init function. --- modules/assim.sequential/man/MCMC_Init.Rd | 25 +++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 modules/assim.sequential/man/MCMC_Init.Rd diff --git a/modules/assim.sequential/man/MCMC_Init.Rd b/modules/assim.sequential/man/MCMC_Init.Rd new file mode 100644 index 00000000000..df583db642f --- /dev/null +++ b/modules/assim.sequential/man/MCMC_Init.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Analysis_sda_block.R +\name{MCMC_Init} +\alias{MCMC_Init} +\title{MCMC_Init} +\usage{ +MCMC_Init(block.list, X) +} +\arguments{ +\item{block.list}{lists of blocks generated by the `build.block.xy` function.} + +\item{X}{A matrix contains ensemble forecasts.} +} +\value{ +It returns the `block.list` object with initial conditions filled in. +} +\description{ +MCMC_Init +} +\details{ +This function helps create initial conditions for the MCMC sampling. +} +\author{ +Dongchen Zhang +} From 5b6019fd839bd6f35da258b7e676ef86c5b16ad7 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 14:28:14 -0400 Subject: [PATCH 12/68] Add the MCMC_block_function. --- .../man/MCMC_block_function.Rd | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 modules/assim.sequential/man/MCMC_block_function.Rd diff --git a/modules/assim.sequential/man/MCMC_block_function.Rd b/modules/assim.sequential/man/MCMC_block_function.Rd new file mode 100644 index 00000000000..7fffaad194f --- /dev/null +++ b/modules/assim.sequential/man/MCMC_block_function.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Analysis_sda_block.R +\name{MCMC_block_function} +\alias{MCMC_block_function} +\title{MCMC_block_function} +\usage{ +MCMC_block_function(block) +} +\arguments{ +\item{block}{each block within the `block.list` lists.} +} +\value{ +It returns the `block` object with analysis results filled in. +} +\description{ +MCMC_block_function +} +\author{ +Dongchen Zhang +} From c78f5b1fd14a41be833a51c7214c84cdb6423b47 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 14:28:27 -0400 Subject: [PATCH 13/68] Add the analysis_sda_block function. --- .../man/analysis_sda_block.Rd | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 modules/assim.sequential/man/analysis_sda_block.Rd diff --git a/modules/assim.sequential/man/analysis_sda_block.Rd b/modules/assim.sequential/man/analysis_sda_block.Rd new file mode 100644 index 00000000000..b5f836aa932 --- /dev/null +++ b/modules/assim.sequential/man/analysis_sda_block.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Analysis_sda_block.R +\name{analysis_sda_block} +\alias{analysis_sda_block} +\title{analysis_sda_block} +\usage{ +analysis_sda_block( + settings, + block.list.all, + X, + obs.mean, + obs.cov, + t, + nt, + MCMC.args +) +} +\arguments{ +\item{settings}{pecan standard multi-site settings list.} + +\item{block.list.all}{List contains nt empty sub-elements.} + +\item{X}{A matrix contains ensemble forecasts.} + +\item{obs.mean}{List of dataframe of observation means, named with observation datetime.} + +\item{obs.cov}{List of covariance matrices of state variables , named with observation datetime.} + +\item{t}{time point.} + +\item{nt}{total length of time steps.} + +\item{MCMC.args}{arguments for the MCMC sampling.} +} +\value{ +It returns the `build.block.xy` object and the analysis results. +} +\description{ +This function provides the block-based MCMC sampling approach. +} +\details{ +This function will add data and constants into each block that are needed for the MCMC sampling. +} +\author{ +Dongchen Zhang +} From 8fd4485d8267fcac35851171a0f7a2a0d9893e93 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 14:28:42 -0400 Subject: [PATCH 14/68] Add the block.2.vector function. --- .../assim.sequential/man/block.2.vector.Rd | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 modules/assim.sequential/man/block.2.vector.Rd diff --git a/modules/assim.sequential/man/block.2.vector.Rd b/modules/assim.sequential/man/block.2.vector.Rd new file mode 100644 index 00000000000..cf9b1687396 --- /dev/null +++ b/modules/assim.sequential/man/block.2.vector.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Analysis_sda_block.R +\name{block.2.vector} +\alias{block.2.vector} +\title{block.2.vector} +\usage{ +block.2.vector(block.list, X, H) +} +\arguments{ +\item{block.list}{lists of blocks generated by the `build.block.xy` function.} + +\item{X}{A matrix contains ensemble forecasts.} + +\item{H}{H index created by the `construct_nimble_H` function.} +} +\value{ +It returns a list of analysis results by MCMC sampling. +} +\description{ +block.2.vector +} +\author{ +Dongchen Zhang +} From 45013266cf00958681b4132f060e353aeb41cbf4 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 14:28:53 -0400 Subject: [PATCH 15/68] Add the build.block.xy function. --- .../assim.sequential/man/build.block.xy.Rd | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 modules/assim.sequential/man/build.block.xy.Rd diff --git a/modules/assim.sequential/man/build.block.xy.Rd b/modules/assim.sequential/man/build.block.xy.Rd new file mode 100644 index 00000000000..261301277d8 --- /dev/null +++ b/modules/assim.sequential/man/build.block.xy.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Analysis_sda_block.R +\name{build.block.xy} +\alias{build.block.xy} +\title{build.block.xy} +\usage{ +build.block.xy(settings, block.list.all, X, obs.mean, obs.cov, t) +} +\arguments{ +\item{settings}{pecan standard multi-site settings list.} + +\item{block.list.all}{List contains nt empty sub-elements.} + +\item{X}{A matrix contains ensemble forecasts.} + +\item{obs.mean}{List of dataframe of observation means, named with observation datetime.} + +\item{obs.cov}{List of covariance matrices of state variables , named with observation datetime.} + +\item{t}{time point.} +} +\value{ +It returns the `build.block.xy` object with data and constants filled in. +} +\description{ +This function split long vector and covariance matrix into blocks corresponding to the localization. +} +\details{ +This function will add data and constants into each block that are needed for the MCMC sampling. +} +\author{ +Dongchen Zhang +} From 7e48b2147784b9b14717f64df032c2ddc21e9fd9 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 14:29:07 -0400 Subject: [PATCH 16/68] Add the update_q function. --- modules/assim.sequential/man/update_q.Rd | 26 ++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 modules/assim.sequential/man/update_q.Rd diff --git a/modules/assim.sequential/man/update_q.Rd b/modules/assim.sequential/man/update_q.Rd new file mode 100644 index 00000000000..bdf6acef08f --- /dev/null +++ b/modules/assim.sequential/man/update_q.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Analysis_sda_block.R +\name{update_q} +\alias{update_q} +\title{update_q} +\usage{ +update_q(block.list.all, t, nt, MCMC_dat = NULL) +} +\arguments{ +\item{block.list.all}{each block within the `block.list` lists.} + +\item{t}{time point.} + +\item{nt}{total length of time steps.} + +\item{MCMC_dat}{data frame of MCMC samples, the default it NULL.} +} +\value{ +It returns the `block.list.all` object with initialized/updated Q filled in. +} +\description{ +update_q +} +\author{ +Dongchen Zhang +} From fc5ad951874a63c1897d3da3f39fd23851615730 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 14:29:38 -0400 Subject: [PATCH 17/68] Reformat the arguments and connected this function to the block analysis function. --- .../assim.sequential/R/sda.enkf_MultiSite.R | 263 +++++++++--------- .../man/sda.enkf.multisite.Rd | 23 +- 2 files changed, 139 insertions(+), 147 deletions(-) diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index a1614aa93af..f7a49bbd3f0 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -6,15 +6,9 @@ #' @param obs.mean List of dataframe of observation means, named with observation datetime. #' @param obs.cov List of covariance matrices of state variables , named with observation datetime. #' @param Q Process covariance matrix given if there is no data to estimate it. -#' @param restart Used for iterative updating previous forecasts. Default NULL. List object includes file path to previous runs and start date for SDA -#' @param forceRun Used to force job.sh files that were not run for ensembles in SDA (quick fix) -#' @param keepNC Used for debugging issues. .nc files are usually removed after each year in the out folder. This flag will keep the .nc + .nc.var files for futher investigations. +#' @param restart Used for iterative updating previous forecasts. Default NULL. List object includes file path to previous runs and start date for SDA. #' @param pre_enkf_params Used for carrying out SDA with pre-existed enkf.params, in which the Pf, aqq, and bqq can be used for the analysis step. -#' @param run_parallel If allows to proceed under parallel mode, default is TRUE. #' @param ensemble.samples Pass ensemble.samples from outside to avoid GitHub check issues. -#' @param parallel_qsub Bool variable decide if you want to submit the bash jobs under the parallel mode, the default value is TRUE. -#' @param free_run Bool variable decide if the run is a free run with no analysis been used, the default value is FALSE. -#' @param send_email List object containing the "from", "to", and "body", the default value is NULL. #' @param control List of flags controlling the behaviour of the SDA. trace for reporting back the SDA outcomes, interactivePlot for plotting the outcomes after each step, #' TimeseriesPlot for post analysis examination, BiasPlot for plotting the correlation between state variables, plot.title is the title of post analysis plots and debug mode allows for pausing the code and examining the variables inside the function. #' @@ -32,14 +26,8 @@ sda.enkf.multisite <- function(settings, obs.cov, Q = NULL, restart = NULL, - forceRun = TRUE, - keepNC = TRUE, pre_enkf_params = NULL, - run_parallel = TRUE, ensemble.samples = NULL, - parallel_qsub = TRUE, - free_run = FALSE, - send_email = NULL, control=list(trace = TRUE, FF = FALSE, interactivePlot = FALSE, @@ -50,7 +38,13 @@ sda.enkf.multisite <- function(settings, debug = FALSE, pause = FALSE, Profiling = FALSE, - OutlierDetection=FALSE), + OutlierDetection=FALSE, + parallel_qsub = TRUE, + free_run = FALSE, + send_email = NULL, + keepNC = TRUE, + forceRun = TRUE, + run_parallel = TRUE), ...) { #add if/else for when restart points to folder instead if T/F set restart as T if(is.list(restart)){ @@ -60,7 +54,7 @@ sda.enkf.multisite <- function(settings, }else{ restart_flag = FALSE } - if(run_parallel){ + if(control$run_parallel){ if (future::supportsMulticore()) { future::plan(future::multicore) } else { @@ -348,7 +342,7 @@ sda.enkf.multisite <- function(settings, load(file.path(settings$outdir, "samples.Rdata")) } #reformatting params - new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens) + new.params <- PEcAnAssimSequential:::sda_matchparam(settings, ensemble.samples, site.ids, nens) } #sample met ensemble members #TODO: incorporate Phyllis's restart work @@ -431,19 +425,16 @@ sda.enkf.multisite <- function(settings, writeLines(runs.tmp[runs.tmp != ''], file.path(rundir, 'runs.txt')) paste(file.path(rundir, 'runs.txt')) ## testing Sys.sleep(0.01) ## testing - if(parallel_qsub){ + if(control$parallel_qsub){ PEcAn.remote::qsub_parallel(settings, files=PEcAn.remote::merge_job_files(settings, 10), prefix = paste0(obs.year, ".nc")) }else{ PEcAn.workflow::start_model_runs(settings, write=settings$database$bety$write) } - - #------------- Reading - every iteration and for SDA - #put building of X into a function that gets called max_t <- 0 while("try-error" %in% class( - try(reads <- build_X(out.configs = out.configs, + try(reads <- PEcAnAssimSequential:::build_X(out.configs = out.configs, settings = settings, new.params = new.params, nens = nens, @@ -501,112 +492,132 @@ sda.enkf.multisite <- function(settings, # which clearly should be triggering the `else` of this if, but the # `else` has not been invoked in a while an may need updating - if (control$debug) browser() - #Making R and Y - Obs.cons <- Construct.R(site.ids, var.names, obs.mean[[t]], obs.cov[[t]]) - - Y <- Obs.cons$Y - R <- Obs.cons$R - if (length(Y) > 1) { - PEcAn.logger::logger.info("The zero variances in R and Pf is being replaced by half and one fifth of the minimum variance in those matrices respectively.") - diag(R)[which(diag(R)==0)] <- min(diag(R)[which(diag(R) != 0)])/2 + #decide if we want to estimate the process variance and choose the according function. + if(processvar == FALSE) { + an.method<-EnKF + } else if (processvar == TRUE && settings$state.data.assimilation$q.type %in% c("SINGLE", "SITE")) { + an.method<-GEF.MultiSite } - # making the mapping operator - H <- Construct.H.multisite(site.ids, var.names, obs.mean[[t]]) - #Pass aqq and bqq. - aqq <- NULL - bqq <- numeric(nt + 1) - Pf <- NULL - #if t>1 - if(is.null(pre_enkf_params) && t>1){ - aqq <- enkf.params[[t-1]]$aqq - bqq <- enkf.params[[t-1]]$bqq - X.new<-enkf.params[[t-1]]$X.new - } - if(!is.null(pre_enkf_params) && t>1){ - aqq <- pre_enkf_params[[t-1]]$aqq - bqq <- pre_enkf_params[[t-1]]$bqq - X.new<-pre_enkf_params[[t-1]]$X.new - } - if(!is.null(pre_enkf_params)){ - Pf <- pre_enkf_params[[t]]$Pf - } - - recompileTobit = !exists('Cmcmc_tobit2space') - recompileGEF = !exists('Cmcmc') - - #weight list - # This reads ensemble weights generated by `get_ensemble_weights` function from assim.sequential package - weight_list <- list() - if(!file.exists(file.path(settings$outdir, "ensemble_weights.Rdata"))){ - PEcAn.logger::logger.warn("ensemble_weights.Rdata cannot be found. Make sure you generate samples by running the get.ensemble.weights function before running SDA if you want the ensembles to be weighted.") - #create null list - for(tt in 1:length(obs.times)){ - weight_list[[tt]] <- rep(1,nens) #no weights + #decide if we want the block analysis function or multi-site analysis function. + if (processvar == TRUE && settings$state.data.assimilation$q.type %in% c("vector", "wishart")) { + #initialize block.list.all. + if (t == 1) { + block.list.all <- obs.mean %>% purrr::map(function(l){NULL}) } - } else{ - load(file.path(settings$outdir, "ensemble_weights.Rdata")) ## loads ensemble.samples - } - wts <- unlist(weight_list[[t]]) - ###-------------------------------------------------------------------### - ### Analysis ### - ###-------------------------------------------------------------------###---- - if(processvar == FALSE){an.method<-EnKF}else{an.method<-GEF.MultiSite} - - #-analysis function - enkf.params[[obs.t]] <- GEF.MultiSite( - settings, - FUN = an.method, - Forecast = list(Q = Q, X = X), - Observed = list(R = R, Y = Y), - H = H, - extraArg = list( - aqq = aqq, - bqq = bqq, - Pf = Pf, - t = t, - nitr.GEF = nitr.GEF, - nthin = nthin, - nburnin = nburnin, - censored.data = censored.data, - recompileGEF = recompileGEF, - recompileTobit = recompileTobit, - wts = wts - ), - choose = choose, - nt = nt, - obs.mean = obs.mean, - nitr = 100000, - nburnin = 10000, - obs.cov = obs.cov, - site.ids = site.ids, - blocked.dis = blocked.dis, - distances = distances - ) - tictoc::tic(paste0("Preparing for Adjustment for cycle = ", t)) - #Forecast - mu.f <- enkf.params[[obs.t]]$mu.f - Pf <- enkf.params[[obs.t]]$Pf - #Analysis - Pa <- enkf.params[[obs.t]]$Pa - mu.a <- enkf.params[[obs.t]]$mu.a - #extracting extra outputs - if (control$debug) browser() - if (processvar) { - aqq <- enkf.params[[obs.t]]$aqq - bqq <- enkf.params[[obs.t]]$bqq - } - # Adding obs elements to the enkf.params - #This can later on help with diagnostics - enkf.params[[obs.t]] <- - c( - enkf.params[[obs.t]], - R = list(R), - Y = list(Y), - RestartList = list(restart.list %>% stats::setNames(site.ids)) + #initialize MCMC arguments. + if (!exists("MCMC.args")) { + MCMC.args <- list(niter = 1e5, + nthin = 10, + nchain = 1, + nburnin = 1e4) + } + #running analysis function. + enkf.params[[obs.t]] <- PEcAnAssimSequential:::analysis_sda_block(settings, block.list.all, X, obs.mean, obs.cov, t, nt, MCMC.args) + enkf.params[[obs.t]] <- c(enkf.params[[obs.t]], RestartList = list(restart.list %>% stats::setNames(site.ids))) + block.list.all <- enkf.params[[obs.t]]$block.list.all + #Forecast + mu.f <- enkf.params[[obs.t]]$mu.f + Pf <- enkf.params[[obs.t]]$Pf + #Analysis + Pa <- enkf.params[[obs.t]]$Pa + mu.a <- enkf.params[[obs.t]]$mu.a + } else if (exists("an.method")) { + #Making R and Y + Obs.cons <- Construct.R(site.ids, var.names, obs.mean[[t]], obs.cov[[t]]) + Y <- Obs.cons$Y + R <- Obs.cons$R + if (length(Y) > 1) { + PEcAn.logger::logger.info("The zero variances in R and Pf is being replaced by half and one fifth of the minimum variance in those matrices respectively.") + diag(R)[which(diag(R)==0)] <- min(diag(R)[which(diag(R) != 0)])/2 + } + #Pass aqq and bqq. + aqq <- NULL + bqq <- numeric(nt + 1) + Pf <- NULL + #if t>1 + if(is.null(pre_enkf_params) && t>1){ + aqq <- enkf.params[[t-1]]$aqq + bqq <- enkf.params[[t-1]]$bqq + X.new<-enkf.params[[t-1]]$X.new + } + if(!is.null(pre_enkf_params) && t>1){ + aqq <- pre_enkf_params[[t-1]]$aqq + bqq <- pre_enkf_params[[t-1]]$bqq + X.new<-pre_enkf_params[[t-1]]$X.new + } + if(!is.null(pre_enkf_params)){ + Pf <- pre_enkf_params[[t]]$Pf + } + recompileTobit = !exists('Cmcmc_tobit2space') + recompileGEF = !exists('Cmcmc') + #weight list + # This reads ensemble weights generated by `get_ensemble_weights` function from assim.sequential package + weight_list <- list() + if(!file.exists(file.path(settings$outdir, "ensemble_weights.Rdata"))){ + PEcAn.logger::logger.warn("ensemble_weights.Rdata cannot be found. Make sure you generate samples by running the get.ensemble.weights function before running SDA if you want the ensembles to be weighted.") + #create null list + for(tt in 1:length(obs.times)){ + weight_list[[tt]] <- rep(1,nens) #no weights + } + } else{ + load(file.path(settings$outdir, "ensemble_weights.Rdata")) ## loads ensemble.samples + } + wts <- unlist(weight_list[[t]]) + #-analysis function + enkf.params[[obs.t]] <- GEF.MultiSite( + settings, + FUN = an.method, + Forecast = list(Q = Q, X = X), + Observed = list(R = R, Y = Y), + H = H, + extraArg = list( + aqq = aqq, + bqq = bqq, + Pf = Pf, + t = t, + nitr.GEF = nitr.GEF, + nthin = nthin, + nburnin = nburnin, + censored.data = censored.data, + recompileGEF = recompileGEF, + recompileTobit = recompileTobit, + wts = wts + ), + choose = choose, + nt = nt, + obs.mean = obs.mean, + nitr = 100000, + nburnin = 10000, + obs.cov = obs.cov, + site.ids = site.ids, + blocked.dis = blocked.dis, + distances = distances ) + tictoc::tic(paste0("Preparing for Adjustment for cycle = ", t)) + #Forecast + mu.f <- enkf.params[[obs.t]]$mu.f + Pf <- enkf.params[[obs.t]]$Pf + #Analysis + Pa <- enkf.params[[obs.t]]$Pa + mu.a <- enkf.params[[obs.t]]$mu.a + #extracting extra outputs + if (control$debug) browser() + if (processvar) { + aqq <- enkf.params[[obs.t]]$aqq + bqq <- enkf.params[[obs.t]]$bqq + } + # Adding obs elements to the enkf.params + #This can later on help with diagnostics + enkf.params[[obs.t]] <- + c( + enkf.params[[obs.t]], + R = list(R), + Y = list(Y), + RestartList = list(restart.list %>% stats::setNames(site.ids)) + ) + } ###-------------------------------------------------------------------### ### Trace ### @@ -615,11 +626,9 @@ sda.enkf.multisite <- function(settings, if(control$trace) { PEcAn.logger::logger.warn ("\n --------------------------- ",obs.year," ---------------------------\n") PEcAn.logger::logger.warn ("\n --------------Obs mean----------- \n") - print(Y) + print(enkf.params[[obs.t]]$Y) PEcAn.logger::logger.warn ("\n --------------Obs Cov ----------- \n") - print(R) - PEcAn.logger::logger.warn ("\n --------------Obs H ----------- \n") - print(H) + print(enkf.params[[obs.t]]$R) PEcAn.logger::logger.warn ("\n --------------Forecast mean ----------- \n") print(enkf.params[[obs.t]]$mu.f) PEcAn.logger::logger.warn ("\n --------------Forecast Cov ----------- \n") @@ -708,7 +717,7 @@ sda.enkf.multisite <- function(settings, if (control$Profiling) alltocs(file.path(settings$outdir,"SDA", "Profiling.csv")) # remove files as SDA runs - if (!(keepNC)){ + if (!(control$keepNC) && t == 1){ unlink(list.files(outdir, "*.nc", recursive = TRUE, full.names = TRUE)) } if(!is.null(control$send_email)){ diff --git a/modules/assim.sequential/man/sda.enkf.multisite.Rd b/modules/assim.sequential/man/sda.enkf.multisite.Rd index 3f216df4803..5017d63df8f 100644 --- a/modules/assim.sequential/man/sda.enkf.multisite.Rd +++ b/modules/assim.sequential/man/sda.enkf.multisite.Rd @@ -10,17 +10,12 @@ sda.enkf.multisite( obs.cov, Q = NULL, restart = NULL, - forceRun = TRUE, - keepNC = TRUE, pre_enkf_params = NULL, - run_parallel = TRUE, ensemble.samples = NULL, - parallel_qsub = TRUE, - free_run = FALSE, - send_email = NULL, control = list(trace = TRUE, FF = FALSE, interactivePlot = FALSE, TimeseriesPlot = FALSE, BiasPlot = FALSE, plot.title = NULL, facet.plots = FALSE, debug = FALSE, pause - = FALSE, Profiling = FALSE, OutlierDetection = FALSE), + = FALSE, Profiling = FALSE, OutlierDetection = FALSE, parallel_qsub = TRUE, free_run + = FALSE, send_email = NULL, keepNC = TRUE, forceRun = TRUE, run_parallel = TRUE), ... ) } @@ -33,24 +28,12 @@ sda.enkf.multisite( \item{Q}{Process covariance matrix given if there is no data to estimate it.} -\item{restart}{Used for iterative updating previous forecasts. Default NULL. List object includes file path to previous runs and start date for SDA} - -\item{forceRun}{Used to force job.sh files that were not run for ensembles in SDA (quick fix)} - -\item{keepNC}{Used for debugging issues. .nc files are usually removed after each year in the out folder. This flag will keep the .nc + .nc.var files for futher investigations.} +\item{restart}{Used for iterative updating previous forecasts. Default NULL. List object includes file path to previous runs and start date for SDA.} \item{pre_enkf_params}{Used for carrying out SDA with pre-existed enkf.params, in which the Pf, aqq, and bqq can be used for the analysis step.} -\item{run_parallel}{If allows to proceed under parallel mode, default is TRUE.} - \item{ensemble.samples}{Pass ensemble.samples from outside to avoid GitHub check issues.} -\item{parallel_qsub}{Bool variable decide if you want to submit the bash jobs under the parallel mode, the default value is TRUE.} - -\item{free_run}{Bool variable decide if the run is a free run with no analysis been used, the default value is FALSE.} - -\item{send_email}{List object containing the "from", "to", and "body", the default value is NULL.} - \item{control}{List of flags controlling the behaviour of the SDA. trace for reporting back the SDA outcomes, interactivePlot for plotting the outcomes after each step, TimeseriesPlot for post analysis examination, BiasPlot for plotting the correlation between state variables, plot.title is the title of post analysis plots and debug mode allows for pausing the code and examining the variables inside the function.} } From 2b883e5b04224f25cd6969b57fb10fccd1110cc0 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 14:29:48 -0400 Subject: [PATCH 18/68] Format. --- modules/assim.sequential/R/Analysis_sda_multiSite.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/assim.sequential/R/Analysis_sda_multiSite.R b/modules/assim.sequential/R/Analysis_sda_multiSite.R index 4b4c67f92c4..219e8983e80 100644 --- a/modules/assim.sequential/R/Analysis_sda_multiSite.R +++ b/modules/assim.sequential/R/Analysis_sda_multiSite.R @@ -15,7 +15,7 @@ ##' ##' @return It returns a list with estimated mean and cov matrix of forecast state variables as well as mean and cov estimated as a result of assimilation/analysis . ##' @export -EnKF.MultiSite <-function(settings, Forecast, Observed, H, extraArg=NULL, ...){ +EnKF.MultiSite <- function(settings, Forecast, Observed, H, extraArg=NULL, ...){ #------------------------------Setup Localization.FUN <- settings$state.data.assimilation$Localization.FUN # localization function scalef <- settings$state.data.assimilation$scalef %>% as.numeric() # scale factor for localization @@ -72,7 +72,7 @@ EnKF.MultiSite <-function(settings, Forecast, Observed, H, extraArg=NULL, ...){ ##' @rdname GEF ##' @export -GEF.MultiSite<-function(settings, Forecast, Observed, H, extraArg,...){ +GEF.MultiSite <- function(settings, Forecast, Observed, H, extraArg,...){ #-- reading the dots and exposing them to the inside of the function dots<-list(...) if (length(dots) > 0) lapply(names(dots),function(name){assign(name,dots[[name]], pos = 1 )}) From 4f0959f889c411777f55bd50966338a1daeab829 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 14:33:59 -0400 Subject: [PATCH 19/68] Update the changelog file. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bc0cc6c903..5211b75c026 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ see if you need to change any of these: - Added new features of the SDA function including: 1) allow user-defined free-run mode; 2) allow user-defined parallel mode for the qsub submission; 3) allow user-defined email option to report the progress. - The analysis function now supports the parallelization of multi-chain MCMC sampling with the fully randomized inits function. +- Added the new feature of the block-based SDA workflow, which supports the parallel computation. ### Fixed From feb47aba498b411d4c9d2bf726fb5d86e7091508 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 15:20:33 -0400 Subject: [PATCH 20/68] Export the GEF.Block.Nimble code, and use the future_map function instead of the foreach function for the parallelization. --- modules/assim.sequential/NAMESPACE | 2 +- .../assim.sequential/R/Analysis_sda_block.R | 21 +++++-------------- modules/assim.sequential/R/Nimble_codes.R | 1 + 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE index c352ba52faa..9e0ba94474a 100644 --- a/modules/assim.sequential/NAMESPACE +++ b/modules/assim.sequential/NAMESPACE @@ -9,6 +9,7 @@ export(Create_Site_PFT_CSV) export(EnKF) export(EnKF.MultiSite) export(GEF) +export(GEF.Block.Nimble) export(GEF.MultiSite) export(GEF.MultiSite.Nimble) export(Local.support) @@ -59,7 +60,6 @@ import(furrr) import(lubridate) import(nimble) importFrom(dplyr,"%>%") -importFrom(foreach,"%dopar%") importFrom(lubridate,"%m+%") importFrom(magrittr,"%>%") importFrom(rlang,.data) diff --git a/modules/assim.sequential/R/Analysis_sda_block.R b/modules/assim.sequential/R/Analysis_sda_block.R index b24ff3881b8..24633954953 100644 --- a/modules/assim.sequential/R/Analysis_sda_block.R +++ b/modules/assim.sequential/R/Analysis_sda_block.R @@ -15,7 +15,6 @@ ##' @description This function provides the block-based MCMC sampling approach. ##' ##' @return It returns the `build.block.xy` object and the analysis results. -##' @importFrom foreach %dopar% ##' @importFrom dplyr %>% analysis_sda_block <- function (settings, block.list.all, X, obs.mean, obs.cov, t, nt, MCMC.args) { #convert from vector values to block lists. @@ -51,26 +50,16 @@ analysis_sda_block <- function (settings, block.list.all, X, obs.mean, obs.cov, }) #parallel for loop over each block. - #initialize parallel settings. - cores <- parallel::detectCores() - cl <- parallel::makeCluster(cores) - doSNOW::registerDoSNOW(cl) - - #progress bar - pb <- utils::txtProgressBar(min=1, max=length(block.list.all[[t]]), style=3) - progress <- function(n) utils::setTxtProgressBar(pb, n) - opts <- list(progress=progress) PEcAn.logger::logger.info(paste0("Running MCMC ", "for ", length(block.list.all[[t]]), " blocks")) - - if ("try-error" %in% class(try(block.list.all[[t]] <- foreach::foreach(block = block.list.all[[t]], .packages="Kendall", .options.snow=opts) %dopar% { - library(nimble) - return(MCMC_block_function(block)) - }))) { + if ("try-error" %in% class(try(block.list.all[[t]] <- furrr::future_map(block.list.all[[t]], MCMC_block_function, .progress = T)))) { PEcAn.logger::logger.error("Something wrong within the MCMC_block_function function.") } + PEcAn.logger::logger.info("Completed!") #convert from block lists to vector values. - V <- block.2.vector(block.list.all[[t]], X, H) + if ("try-error" %in% class(try(V <- block.2.vector(block.list.all[[t]], X, H)))) { + PEcAn.logger::logger.error("Something wrong within the block.2.vector function.") + } #return values return(list(block.list.all = block.list.all, diff --git a/modules/assim.sequential/R/Nimble_codes.R b/modules/assim.sequential/R/Nimble_codes.R index f33f8ac2835..4b183647a16 100644 --- a/modules/assim.sequential/R/Nimble_codes.R +++ b/modules/assim.sequential/R/Nimble_codes.R @@ -223,6 +223,7 @@ GEF.MultiSite.Nimble <- nimbleCode({ #GEF.Block.Nimble--This does the block-based SDA ------------------------------------- #' block-based TWEnF #' @format TBD +#' @export GEF.Block.Nimble <- nimbleCode({ #I think the blocked nimble has to be implemented and used instead of a long vector sampling. #1) due to the convergence of X.mod. From 7a1679ea62272d680e0eb7cec50f272b937d26c4 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 15:36:48 -0400 Subject: [PATCH 21/68] Move the matrix_operation.R into the AssimSequential package. --- {base/utils => modules/assim.sequential}/R/matrix_operation.R | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {base/utils => modules/assim.sequential}/R/matrix_operation.R (100%) diff --git a/base/utils/R/matrix_operation.R b/modules/assim.sequential/R/matrix_operation.R similarity index 100% rename from base/utils/R/matrix_operation.R rename to modules/assim.sequential/R/matrix_operation.R From b352a15322eb01036a3efd17cc1e19e026143dd3 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 15:55:43 -0400 Subject: [PATCH 22/68] Move the script from util to Assimsequential. --- base/utils/NAMESPACE | 2 -- modules/assim.sequential/NAMESPACE | 2 ++ modules/assim.sequential/R/Analysis_sda_block.R | 14 +++++++------- .../assim.sequential}/man/GrabFillMatrix.Rd | 0 .../assim.sequential}/man/matrix_network.Rd | 0 5 files changed, 9 insertions(+), 9 deletions(-) rename {base/utils => modules/assim.sequential}/man/GrabFillMatrix.Rd (100%) rename {base/utils => modules/assim.sequential}/man/matrix_network.Rd (100%) diff --git a/base/utils/NAMESPACE b/base/utils/NAMESPACE index fc0b6306428..300bd155fbb 100644 --- a/base/utils/NAMESPACE +++ b/base/utils/NAMESPACE @@ -1,6 +1,5 @@ # Generated by roxygen2: do not edit by hand -export(GrabFillMatrix) export(arrhenius.scaling) export(as.sequence) export(bugs.rdist) @@ -28,7 +27,6 @@ export(listToArgString) export(load.modelpkg) export(load_local) export(match_file) -export(matrix_network) export(mcmc.list2init) export(misc.are.convertible) export(misc.convert) diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE index 9e0ba94474a..527ca7afbe5 100644 --- a/modules/assim.sequential/NAMESPACE +++ b/modules/assim.sequential/NAMESPACE @@ -12,6 +12,7 @@ export(GEF) export(GEF.Block.Nimble) export(GEF.MultiSite) export(GEF.MultiSite.Nimble) +export(GrabFillMatrix) export(Local.support) export(Obs.data.prepare.MultiSite) export(Prep_OBS_SDA) @@ -34,6 +35,7 @@ export(hop_test) export(interactive.plotting.sda) export(inv.alr) export(load_data_paleon_sda) +export(matrix_network) export(metSplit) export(obs_timestep2timepoint) export(outlier.detector.boxplot) diff --git a/modules/assim.sequential/R/Analysis_sda_block.R b/modules/assim.sequential/R/Analysis_sda_block.R index 24633954953..b12be8f0470 100644 --- a/modules/assim.sequential/R/Analysis_sda_block.R +++ b/modules/assim.sequential/R/Analysis_sda_block.R @@ -180,7 +180,7 @@ build.block.xy <- function(settings, block.list.all, X, obs.mean, obs.cov, t) { } } else { #find networks given TRUE/FALSE matrix representing sites' interactions. - block.vec <- PEcAn.utils::matrix_network(dis.matrix <= as.numeric(settings$state.data.assimilation$scalef)) + block.vec <- matrix_network(dis.matrix <= as.numeric(settings$state.data.assimilation$scalef)) block.list <- vector("list", length(block.vec)) #loop over sites for (i in seq_along(block.vec)) {#i is site index @@ -201,11 +201,11 @@ build.block.xy <- function(settings, block.list.all, X, obs.mean, obs.cov, t) { } #fill in mu.f and Pf block.list[[i]]$data$muf <- mu.f[f.ind] - block.list[[i]]$data$pf <- PEcAn.utils::GrabFillMatrix(Pf, f.ind) + block.list[[i]]$data$pf <- GrabFillMatrix(Pf, f.ind) #fill in y and R block.list[[i]]$data$y.censored <- y.censored[y.ind] - block.list[[i]]$data$r <- PEcAn.utils::GrabFillMatrix(solve(R), y.ind) + block.list[[i]]$data$r <- GrabFillMatrix(solve(R), y.ind) #fill in constants block.h <- Construct.H.multisite(site.ids[ids], var.names, obs.mean[[t]]) @@ -348,7 +348,7 @@ MCMC_block_function <- function(block) { bq <- block$constant$YN } aq <- solve(q.bar) * bq - block$aqq[,,block$t+1] <- PEcAn.utils::GrabFillMatrix(block$aqq[,,block$t], block$constant$H, aq) + block$aqq[,,block$t+1] <- GrabFillMatrix(block$aqq[,,block$t], block$constant$H, aq) block$bqq[block$t+1] <- bq # #if it's a wishart case @@ -361,7 +361,7 @@ MCMC_block_function <- function(block) { # } # } # #update aqq and bqq - # block$aqq[,,block$t+1] <- PEcAn.utils::GrabFillMatrix(block$aqq[,,block$t], block$constant$H, aq) + # block$aqq[,,block$t+1] <- GrabFillMatrix(block$aqq[,,block$t], block$constant$H, aq) # block$bqq[block$t+1] <- block$bqq[block$t] } #update mua and pa; mufa, and pfa @@ -409,7 +409,7 @@ update_q <- function (block.list.all, t, nt, MCMC_dat = NULL) { block.list[[i]]$aqq[,,t] <- toeplitz((nvar:1)/nvar) block.list[[i]]$bqq <- rep(nobs, nt) #update aq and bq based on aqq and bqq - block.list[[i]]$data$aq <- PEcAn.utils::GrabFillMatrix(block.list[[i]]$aqq[,,t], block.list[[i]]$constant$H) + block.list[[i]]$data$aq <- GrabFillMatrix(block.list[[i]]$aqq[,,t], block.list[[i]]$constant$H) block.list[[i]]$data$bq <- block.list[[i]]$bqq[t] } } @@ -435,7 +435,7 @@ update_q <- function (block.list.all, t, nt, MCMC_dat = NULL) { block.list[[i]]$bqq[t] <- block.list[[i]]$constant$YN } #update aq and bq based on aqq and bqq - block.list[[i]]$data$aq <- PEcAn.utils::GrabFillMatrix(block.list[[i]]$aqq[,,t], block.list[[i]]$constant$H) + block.list[[i]]$data$aq <- GrabFillMatrix(block.list[[i]]$aqq[,,t], block.list[[i]]$constant$H) block.list[[i]]$data$bq <- block.list[[i]]$bqq[t] } } diff --git a/base/utils/man/GrabFillMatrix.Rd b/modules/assim.sequential/man/GrabFillMatrix.Rd similarity index 100% rename from base/utils/man/GrabFillMatrix.Rd rename to modules/assim.sequential/man/GrabFillMatrix.Rd diff --git a/base/utils/man/matrix_network.Rd b/modules/assim.sequential/man/matrix_network.Rd similarity index 100% rename from base/utils/man/matrix_network.Rd rename to modules/assim.sequential/man/matrix_network.Rd From 678393f397e2da125ff7475b83323dec4fa0e3e2 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 16:12:31 -0400 Subject: [PATCH 23/68] Remove the namespace for the AssimSequential package. --- modules/assim.sequential/R/sda.enkf_MultiSite.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index f7a49bbd3f0..c192ab27372 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -342,7 +342,7 @@ sda.enkf.multisite <- function(settings, load(file.path(settings$outdir, "samples.Rdata")) } #reformatting params - new.params <- PEcAnAssimSequential:::sda_matchparam(settings, ensemble.samples, site.ids, nens) + new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens) } #sample met ensemble members #TODO: incorporate Phyllis's restart work @@ -434,7 +434,7 @@ sda.enkf.multisite <- function(settings, #put building of X into a function that gets called max_t <- 0 while("try-error" %in% class( - try(reads <- PEcAnAssimSequential:::build_X(out.configs = out.configs, + try(reads <- build_X(out.configs = out.configs, settings = settings, new.params = new.params, nens = nens, @@ -514,7 +514,7 @@ sda.enkf.multisite <- function(settings, nburnin = 1e4) } #running analysis function. - enkf.params[[obs.t]] <- PEcAnAssimSequential:::analysis_sda_block(settings, block.list.all, X, obs.mean, obs.cov, t, nt, MCMC.args) + enkf.params[[obs.t]] <- analysis_sda_block(settings, block.list.all, X, obs.mean, obs.cov, t, nt, MCMC.args) enkf.params[[obs.t]] <- c(enkf.params[[obs.t]], RestartList = list(restart.list %>% stats::setNames(site.ids))) block.list.all <- enkf.params[[obs.t]]$block.list.all #Forecast From 98138805ab5a729c0c0b819472b8987a492d0e23 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 16:31:27 -0400 Subject: [PATCH 24/68] Add namespace. --- modules/assim.sequential/R/Analysis_sda_block.R | 13 +++++++++---- .../assim.sequential/R/Multi_Site_Constructors.R | 4 ++-- modules/assim.sequential/R/sda.enkf_MultiSite.R | 2 ++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/modules/assim.sequential/R/Analysis_sda_block.R b/modules/assim.sequential/R/Analysis_sda_block.R index b12be8f0470..e573280108d 100644 --- a/modules/assim.sequential/R/Analysis_sda_block.R +++ b/modules/assim.sequential/R/Analysis_sda_block.R @@ -121,6 +121,10 @@ build.block.xy <- function(settings, block.list.all, X, obs.mean, obs.cov, t) { Obs.cons <- Construct.R(site.ids, var.names, obs.mean[[t]], obs.cov[[t]]) Y <- Obs.cons$Y R <- Obs.cons$R + if (length(Y) > 1) { + PEcAn.logger::logger.info("The zero variances in R and Pf is being replaced by half and one fifth of the minimum variance in those matrices respectively.") + diag(R)[which(diag(R)==0)] <- min(diag(R)[which(diag(R) != 0)])/2 + } #create matrix the describes the support for each observed state variable at time t min_max <- settings$state.data.assimilation$state.variables %>% purrr::map(function(state.variable){ @@ -231,6 +235,7 @@ build.block.xy <- function(settings, block.list.all, X, obs.mean, obs.cov, t) { ##' ##' @return It returns the `block.list` object with initial conditions filled in. MCMC_Init <- function (block.list, X) { + var.names <- unique(attributes(X)$dimnames[[2]]) #sample mu.f from X. sample.mu.f <- X[sample(seq_along(1:nrow(X)), 1),] for (i in seq_along(block.list)) { @@ -251,14 +256,14 @@ MCMC_Init <- function (block.list, X) { #if we want the vector q. if (block.list[[i]]$constant$q.type == 1) { for (j in seq_along(block.list[[i]]$data$y.censored)) { - block.list[[i]]$Inits$q <- c(block.list[[i]]$Inits$q, rgamma(1, shape = block.list[[i]]$data$aq[j], rate = block.list[[i]]$data$bq[j])) + block.list[[i]]$Inits$q <- c(block.list[[i]]$Inits$q, stats::rgamma(1, shape = block.list[[i]]$data$aq[j], rate = block.list[[i]]$data$bq[j])) } } else if (block.list[[i]]$constant$q.type == 2) { #if we want the wishart Q. if ("try-error" %in% class(try(block.list[[i]]$Inits$q <- - rWishart(1, df = block.list[[i]]$data$bq, Sigma = block.list[[i]]$data$aq)[,,1], silent = T))) { + stats::rWishart(1, df = block.list[[i]]$data$bq, Sigma = block.list[[i]]$data$aq)[,,1], silent = T))) { block.list[[i]]$Inits$q <- - rWishart(1, df = block.list[[i]]$data$bq, Sigma = toeplitz((block.list[[i]]$constant$YN:1)/block.list[[i]]$constant$YN))[,,1] + stats::rWishart(1, df = block.list[[i]]$data$bq, Sigma = stats::toeplitz((block.list[[i]]$constant$YN:1)/block.list[[i]]$constant$YN))[,,1] } } } @@ -406,7 +411,7 @@ update_q <- function (block.list.all, t, nt, MCMC_dat = NULL) { } else if (block.list[[i]]$constant$q.type == 2) { #initialize aqq and bqq for nt block.list[[i]]$aqq <- array(1, dim = c(nvar, nvar, nt)) - block.list[[i]]$aqq[,,t] <- toeplitz((nvar:1)/nvar) + block.list[[i]]$aqq[,,t] <- stats::toeplitz((nvar:1)/nvar) block.list[[i]]$bqq <- rep(nobs, nt) #update aq and bq based on aqq and bqq block.list[[i]]$data$aq <- GrabFillMatrix(block.list[[i]]$aqq[,,t], block.list[[i]]$constant$H) diff --git a/modules/assim.sequential/R/Multi_Site_Constructors.R b/modules/assim.sequential/R/Multi_Site_Constructors.R index 1e417ec26e2..e231750ae42 100755 --- a/modules/assim.sequential/R/Multi_Site_Constructors.R +++ b/modules/assim.sequential/R/Multi_Site_Constructors.R @@ -235,7 +235,7 @@ construct_nimble_H <- function(site.ids, var.names, obs.t, pft.path = NULL, by = Ind[which(total_site_id == site.ids[i])] <- i } } else if (by == "pft") { - pft <- read.csv(pft.path) + pft <- utils::read.csv(pft.path) rownames(pft) <- pft$site total_site_id <- rep(site.ids, each = length(var.names)) total_pft <- pft[total_site_id, 2] @@ -246,7 +246,7 @@ construct_nimble_H <- function(site.ids, var.names, obs.t, pft.path = NULL, by = } } else if (by == "block_pft_var") { #by pft - pft <- read.csv(pft.path) + pft <- utils::read.csv(pft.path) rownames(pft) <- pft$site total_site_id <- rep(site.ids, each = length(var.names)) total_pft <- pft[total_site_id, 2] diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index c192ab27372..7c0dbc7f4bb 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -532,6 +532,8 @@ sda.enkf.multisite <- function(settings, PEcAn.logger::logger.info("The zero variances in R and Pf is being replaced by half and one fifth of the minimum variance in those matrices respectively.") diag(R)[which(diag(R)==0)] <- min(diag(R)[which(diag(R) != 0)])/2 } + # making the mapping operator + H <- Construct.H.multisite(site.ids, var.names, obs.mean[[t]]) #Pass aqq and bqq. aqq <- NULL bqq <- numeric(nt + 1) From 463e48ac347e237559efcca0f447d05c0d84c04f Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 13 Jul 2023 21:56:00 -0400 Subject: [PATCH 25/68] Tweak the documents. --- .../04_more_web_interface/02_hidden_analyses.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd b/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd index b134bf850eb..78d77be6da0 100644 --- a/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd +++ b/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd @@ -372,9 +372,9 @@ Before running the SDA analysis functions, the ensemble forecast results have to * If we decide to use `EnKF.MultiSite`, then the analysis results will be calculated based on equations. -* If we decide to use `GEF.MultiSite`, then it will first do censoring process based on the how you setup the `censored.data` flag within settings xml file. Then, if t equals to 1, we will first initialize the `aqq`, `bqq`, `aq`, and `bq` based on how you setup the `q.type` argument within settings xml file. After preparing the initial conditions (ICs), data, and constants for the `GEF.MultiSite.Nimble` nimble model, the MCMC sampling will happen afterwards. Finally, the process variance and the analysis results will be calculated and updated and returned to the `sda.enkf.multisite` function. +* If we decide to use `GEF.MultiSite`, then it will first do censoring process based on how you setup the `censored.data` flag within settings xml file. Then, if t equals to 1, we will first initialize the `aqq`, `bqq`, `aq`, and `bq` based on how you setup the `q.type` argument within settings xml file. After preparing the initial conditions (ICs), data, and constants for the `GEF.MultiSite.Nimble` nimble model, the MCMC sampling will happen afterwards. Finally, the process variance and the analysis results will be calculated and updated and returned to the `sda.enkf.multisite` function. -*If we decide to use `analysis_sda_block`, then if t equals to 1, we will need to initialize the overall block-based MCMC lists (`block.list.all`). After that, we will need to specify the `MCMC_args` either from the `control` list or by default (see bellow). Next, within the `analysis_sda_block` function, it will first use the `build.block.xy` function to initialize the block lists based on each site or underlying networks between sites, and then split long vectors or large matrices (`mu.f`, `y.censored`, or `Pf`) into each block and fill in the data (except for process variance) and constants for each block. Then, it will update the process variance using the `update_q` function, if t equals to 1, then it will initialize the arguments for the process variance, otherwilse, it will grab the previous updated process variance. After that, in the `MCMC_Init` function, it will creates the initial conditions (ICs) for the MCMC sampling based on randomly sampled data. Then, the completed block-based arguments (`block.list.all`) will be used by the `MCMC_block_function` function under the parallel mode. Finally, the block-based results will be converted into long vectors and large matrices by the `block.2.vector` function and values such as `block.list.all`, `mu.f`, `mu.a`, `Pf`, `Pa` will be returned back to the `sda.enkf.multisite` function. +*If we decide to use `analysis_sda_block`, then if t equals 1, we need to initialize the overall block-based MCMC lists (`block.list.all`). After that, we need to specify the `MCMC_args` either from the `control` list or by default (see below). Next, within the `analysis_sda_block` function, it will first use the `build.block.xy` function to initialize the block lists based on each site or underlying networks between sites and then split long vectors or large matrices (`mu.f`, `y.censored`, or `Pf`) into small blocks and fill in the data (except for process variance) and constants for each block. Then, we will update the process error using the `update_q` function. If t equals 1, it will initialize the arguments for the process error. Otherwise, it will grab the previously updated process error. After that, in the `MCMC_Init` function, it will create the initial conditions (ICs) for the MCMC sampling based on randomly sampled data. Then, the completed block-based arguments (`block.list.all`) will be used by the `MCMC_block_function` function under the parallel mode. Finally, the block-based results will be converted into long vectors and large matrices by the `block.2.vector` function and values such as `block.list.all`, `mu.f`, `mu.a`, `Pf`, `Pa` will be returned to the `sda.enkf.multisite` function. ``` MCMC.args <- list(niter = 1e5, nthin = 10, From cbfe7b636594f58709f2b06a9b6ca59415a710e7 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Mon, 17 Jul 2023 12:14:08 -0400 Subject: [PATCH 26/68] bug fixes for initializing aqq and bqq. --- modules/assim.sequential/R/Analysis_sda_block.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/assim.sequential/R/Analysis_sda_block.R b/modules/assim.sequential/R/Analysis_sda_block.R index e573280108d..292160531ad 100644 --- a/modules/assim.sequential/R/Analysis_sda_block.R +++ b/modules/assim.sequential/R/Analysis_sda_block.R @@ -403,16 +403,16 @@ update_q <- function (block.list.all, t, nt, MCMC_dat = NULL) { nobs <- length(block.list[[i]]$data$y.censored) if (block.list[[i]]$constant$q.type == 1) { #initialize aqq and bqq for nt - block.list[[i]]$aqq <- array(1, dim = c(nvar, nt)) - block.list[[i]]$bqq <- array(1, dim = c(nvar, nt)) + block.list[[i]]$aqq <- array(1, dim = c(nvar, nt + 1)) + block.list[[i]]$bqq <- array(1, dim = c(nvar, nt + 1)) #update aq and bq based on aqq and bqq block.list[[i]]$data$aq <- block.list[[i]]$aqq[block.list[[i]]$constant$H, t] block.list[[i]]$data$bq <- block.list[[i]]$bqq[block.list[[i]]$constant$H, t] } else if (block.list[[i]]$constant$q.type == 2) { #initialize aqq and bqq for nt - block.list[[i]]$aqq <- array(1, dim = c(nvar, nvar, nt)) + block.list[[i]]$aqq <- array(1, dim = c(nvar, nvar, nt + 1)) block.list[[i]]$aqq[,,t] <- stats::toeplitz((nvar:1)/nvar) - block.list[[i]]$bqq <- rep(nobs, nt) + block.list[[i]]$bqq <- rep(nobs, nt + 1) #update aq and bq based on aqq and bqq block.list[[i]]$data$aq <- GrabFillMatrix(block.list[[i]]$aqq[,,t], block.list[[i]]$constant$H) block.list[[i]]$data$bq <- block.list[[i]]$bqq[t] From 1d0b8cec35a47fbc726b85e1ef37c21759bce77c Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Mon, 17 Jul 2023 12:15:17 -0400 Subject: [PATCH 27/68] Add the tolerance for detecting the nc files. --- modules/assim.sequential/R/sda.enkf_MultiSite.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index 7c0dbc7f4bb..825aa85d037 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -447,7 +447,7 @@ sda.enkf.multisite <- function(settings, ){ Sys.sleep(10) max_t <- max_t + 1 - if(max_t > 20){ + if(max_t > 100){ PEcAn.logger::logger.info("Can't find outputed NC file! Please rerun the code!") break } From b886ae46d9b87dd4de11c63e4f3b086434ae6c59 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Mon, 17 Jul 2023 12:19:10 -0400 Subject: [PATCH 28/68] Bug fixes for unusually high soil carbon values. --- models/sipnet/R/write.configs.SIPNET.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/models/sipnet/R/write.configs.SIPNET.R b/models/sipnet/R/write.configs.SIPNET.R index 91c5224c9a0..ee55ab89b9f 100755 --- a/models/sipnet/R/write.configs.SIPNET.R +++ b/models/sipnet/R/write.configs.SIPNET.R @@ -441,7 +441,12 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs plant_wood_vars <- c("AbvGrndWood", "abvGrndWoodFrac", "coarseRootFrac", "fineRootFrac") if (all(plant_wood_vars %in% ic.names)) { # reconstruct total wood C - wood_total_C <- IC$AbvGrndWood / IC$abvGrndWoodFrac + if(IC$abvGrndWoodFrac < 0.1){ + wood_total_C <- 0 + }else{ + wood_total_C <- IC$AbvGrndWood / IC$abvGrndWoodFrac + } + #Sanity check if (is.infinite(wood_total_C) | is.nan(wood_total_C) | wood_total_C < 0) { wood_total_C <- 0 From 020174b3d1a5ba115b3baaad4239a15ffb8f2741 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Mon, 17 Jul 2023 12:27:54 -0400 Subject: [PATCH 29/68] lower the threshold. --- models/sipnet/R/write.configs.SIPNET.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/sipnet/R/write.configs.SIPNET.R b/models/sipnet/R/write.configs.SIPNET.R index ee55ab89b9f..b1bb635b9d4 100755 --- a/models/sipnet/R/write.configs.SIPNET.R +++ b/models/sipnet/R/write.configs.SIPNET.R @@ -441,7 +441,7 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs plant_wood_vars <- c("AbvGrndWood", "abvGrndWoodFrac", "coarseRootFrac", "fineRootFrac") if (all(plant_wood_vars %in% ic.names)) { # reconstruct total wood C - if(IC$abvGrndWoodFrac < 0.1){ + if(IC$abvGrndWoodFrac < 0.05){ wood_total_C <- 0 }else{ wood_total_C <- IC$AbvGrndWood / IC$abvGrndWoodFrac From 81dc1ddd765290cb101a649972bb3f672f0708d7 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 2 Aug 2023 11:12:23 -0400 Subject: [PATCH 30/68] Update the function. --- base/remote/R/qsub_parallel.R | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/base/remote/R/qsub_parallel.R b/base/remote/R/qsub_parallel.R index aca657d1ba8..b1f05b16354 100644 --- a/base/remote/R/qsub_parallel.R +++ b/base/remote/R/qsub_parallel.R @@ -91,19 +91,15 @@ qsub_parallel <- function(settings, files = NULL, prefix = "sipnet.out") { pb <- utils::txtProgressBar(min = 0, max = length(unlist(run_list)), style = 3) pbi <- 0 folders <- file.path(settings$host$outdir, run_list) - while (length(folders) > 0) { - Sys.sleep(10) - completed_folders <- foreach::foreach(folder = folders, settings = rep(settings, length(run_list))) %dopar% { + completed_folders <- c() + while (length(completed_folders) < length(folders)) { + completed_folders <- foreach::foreach(folder = folders) %dopar% { if(file.exists(file.path(folder, prefix))){ return(folder) } - } - if(length(unlist(completed_folders)) > 0){ - Ind <- which(unlist(completed_folders) %in% folders) - folders <- folders[-Ind] - pbi <- pbi + length(completed_folders) - utils::setTxtProgressBar(pb, pbi) - } + } %>% unlist + pbi <- length(completed_folders) + utils::setTxtProgressBar(pb, pbi) } close(pb) parallel::stopCluster(cl) From 339581ae98c0551ff77d66b8e8f544cea6cc9c78 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 22 Aug 2023 13:41:33 -0400 Subject: [PATCH 31/68] 1) Allow pre-existed aqq & bqq imported from outside. 2) Allow pre-specified prior of Q imported from outside. 3) Allow NA values for any site that doesn't have any observation. 4) Solve the one observation or one site problem. 5) Solved the aqq & bqq length issue. --- .../assim.sequential/R/Analysis_sda_block.R | 175 ++++++++++++------ 1 file changed, 116 insertions(+), 59 deletions(-) diff --git a/modules/assim.sequential/R/Analysis_sda_block.R b/modules/assim.sequential/R/Analysis_sda_block.R index 292160531ad..201b37d9f3f 100644 --- a/modules/assim.sequential/R/Analysis_sda_block.R +++ b/modules/assim.sequential/R/Analysis_sda_block.R @@ -16,7 +16,7 @@ ##' ##' @return It returns the `build.block.xy` object and the analysis results. ##' @importFrom dplyr %>% -analysis_sda_block <- function (settings, block.list.all, X, obs.mean, obs.cov, t, nt, MCMC.args) { +analysis_sda_block <- function (settings, block.list.all, X, obs.mean, obs.cov, t, nt, MCMC.args, block.list.all.pre = NULL) { #convert from vector values to block lists. if ("try-error" %in% class(try(block.results <- build.block.xy(settings = settings, block.list.all = block.list.all, @@ -25,6 +25,7 @@ analysis_sda_block <- function (settings, block.list.all, X, obs.mean, obs.cov, obs.cov = obs.cov, t = t)))) { PEcAn.logger::logger.error("Something wrong within the build.block.xy function.") + return(0) } #grab block.list and H from the results. block.list.all <- block.results[[1]] @@ -33,13 +34,18 @@ analysis_sda_block <- function (settings, block.list.all, X, obs.mean, obs.cov, R <- block.results[[4]] #update q. - if ("try-error" %in% class(try(block.list.all <- update_q(block.list.all, t, nt)))) { + if ("try-error" %in% class(try(block.list.all <- update_q(block.list.all, t, nt, aqq.Init = as.numeric(settings$state.data.assimilation$aqq.Init), + bqq.Init = as.numeric(settings$state.data.assimilation$bqq.Init), + MCMC_dat = NULL, + block.list.all.pre)))) { PEcAn.logger::logger.error("Something wrong within the update_q function.") + return(0) } #add initial conditions. if ("try-error" %in% class(try(block.list.all[[t]] <- MCMC_Init(block.list.all[[t]], X)))) { PEcAn.logger::logger.error("Something wrong within the MCMC_Init function.") + return(0) } #update MCMC args. @@ -53,12 +59,14 @@ analysis_sda_block <- function (settings, block.list.all, X, obs.mean, obs.cov, PEcAn.logger::logger.info(paste0("Running MCMC ", "for ", length(block.list.all[[t]]), " blocks")) if ("try-error" %in% class(try(block.list.all[[t]] <- furrr::future_map(block.list.all[[t]], MCMC_block_function, .progress = T)))) { PEcAn.logger::logger.error("Something wrong within the MCMC_block_function function.") + return(0) } PEcAn.logger::logger.info("Completed!") #convert from block lists to vector values. if ("try-error" %in% class(try(V <- block.2.vector(block.list.all[[t]], X, H)))) { PEcAn.logger::logger.error("Something wrong within the block.2.vector function.") + return(0) } #return values @@ -118,39 +126,47 @@ build.block.xy <- function(settings, block.list.all, X, obs.mean, obs.cov, t) { } #Handle observation - Obs.cons <- Construct.R(site.ids, var.names, obs.mean[[t]], obs.cov[[t]]) - Y <- Obs.cons$Y - R <- Obs.cons$R - if (length(Y) > 1) { - PEcAn.logger::logger.info("The zero variances in R and Pf is being replaced by half and one fifth of the minimum variance in those matrices respectively.") - diag(R)[which(diag(R)==0)] <- min(diag(R)[which(diag(R) != 0)])/2 - } - #create matrix the describes the support for each observed state variable at time t - min_max <- settings$state.data.assimilation$state.variables %>% - purrr::map(function(state.variable){ - c(as.numeric(state.variable$min_value), - as.numeric(state.variable$max_value)) - }) %>% unlist() %>% as.vector() %>% - matrix(length(settings$state.data.assimilation$state.variables), 2, byrow = T) %>% - `rownames<-`(var.names) - #Create y.censored and y.ind - #describing if the obs are within the defined range. - y.ind <- y.censored <- c() - for (i in seq_along(Y)) { - if (Y[i] > min_max[names(Y[i]), 1]) { - y.ind[i] = 1; y.censored[i] = Y[i] - } else {y.ind[i] <- y.censored[i] <- 0} + if (as.logical(settings$state.data.assimilation$free.run)) { + obs.mean[[t]] <- vector("list", length(site.ids)) %>% `names<-`(site.ids) + obs.cov[[t]] <- vector("list", length(site.ids)) %>% `names<-`(site.ids) + H <- list(ind = seq_along(rep(var.names, length(site.ids)))) + Y <- rep(NA, length(H$ind)) + R <- diag(1, length(H$ind)) + } else { + Obs.cons <- Construct.R(site.ids, var.names, obs.mean[[t]], obs.cov[[t]]) + Y <- Obs.cons$Y + R <- Obs.cons$R + if (length(Y) > 1) { + PEcAn.logger::logger.info("The zero variances in R and Pf is being replaced by half and one fifth of the minimum variance in those matrices respectively.") + diag(R)[which(diag(R)==0)] <- min(diag(R)[which(diag(R) != 0)])/2 + } + #create matrix the describes the support for each observed state variable at time t + min_max <- settings$state.data.assimilation$state.variables %>% + purrr::map(function(state.variable){ + c(as.numeric(state.variable$min_value), + as.numeric(state.variable$max_value)) + }) %>% unlist() %>% as.vector() %>% + matrix(length(settings$state.data.assimilation$state.variables), 2, byrow = T) %>% + `rownames<-`(var.names) + #Create y.censored and y.ind + #describing if the obs are within the defined range. + y.ind <- y.censored <- c() + for (i in seq_along(Y)) { + if (Y[i] > min_max[names(Y[i]), 1]) { + y.ind[i] = 1; y.censored[i] = Y[i] + } else {y.ind[i] <- y.censored[i] <- 0} + } + #create H + H <- construct_nimble_H(site.ids = site.ids, + var.names = var.names, + obs.t = obs.mean[[t]], + pft.path = settings[[1]]$run$inputs$pft.site$path, + by = "block_pft_var") } #observation number per site obs_per_site <- obs.mean[[t]] %>% purrr::map(function(site.obs){length(site.obs)}) %>% unlist() - #create H - H <- construct_nimble_H(site.ids = site.ids, - var.names = var.names, - obs.t = obs.mean[[t]], - pft.path = settings[[1]]$run$inputs$pft.site$path, - by = "block_pft_var") #start the blocking process #should we consider interactions between sites? @@ -170,21 +186,40 @@ build.block.xy <- function(settings, block.list.all, X, obs.mean, obs.cov, t) { block.list[[i]]$data$pf <- Pf[f.start:f.end, f.start:f.end] #fill in y and r - y.start <- obs_per_site[i] * (i - 1) + 1 - y.end <- obs_per_site[i] * i - block.list[[i]]$data$y.censored <- y.censored[y.start:y.end] - block.list[[i]]$data$r <- solve(R[y.start:y.end, y.start:y.end]) + if (obs_per_site[i] == 0) { + y.start <- 1 + y.end <- length(var.names) + block.list[[i]]$data$y.censored <- rep(NA, length(var.names)) + block.list[[i]]$data$r <- diag(1, length(var.names)) + block.h <- matrix(1, 1, length(var.names)) + } else { + y.start <- obs_per_site[i] * (i - 1) + 1 + y.end <- obs_per_site[i] * i + block.list[[i]]$data$y.censored <- y.censored[y.start:y.end] + block.list[[i]]$data$r <- solve(R[y.start:y.end, y.start:y.end]) + block.h <- Construct.H.multisite(site.ids[i], var.names, obs.mean[[t]]) + } #fill in constants. - block.h <- Construct.H.multisite(site.ids[i], var.names, obs.mean[[t]]) + block.list[[i]]$H <- block.h block.list[[i]]$constant$H <- which(apply(block.h, 2, sum) == 1) block.list[[i]]$constant$N <- length(f.start:f.end) block.list[[i]]$constant$YN <- length(y.start:y.end) block.list[[i]]$constant$q.type <- q.type } + names(block.list) <- site.ids } else { #find networks given TRUE/FALSE matrix representing sites' interactions. block.vec <- matrix_network(dis.matrix <= as.numeric(settings$state.data.assimilation$scalef)) + #check if the matrix_network function is working correctly. + #check if the blocks are calculated correctly. + if (block.vec %>% + purrr::map(function(l){length(l)}) %>% + unlist %>% + sum() != length(site.ids)) { + PEcAn.logger::logger.error("Block calculation failed, please check the matrix_network function!") + return(0) + } block.list <- vector("list", length(block.vec)) #loop over sites for (i in seq_along(block.vec)) {#i is site index @@ -213,6 +248,7 @@ build.block.xy <- function(settings, block.list.all, X, obs.mean, obs.cov, t) { #fill in constants block.h <- Construct.H.multisite(site.ids[ids], var.names, obs.mean[[t]]) + block.list[[i]]$H <- block.h block.list[[i]]$constant$H <- which(apply(block.h, 2, sum) == 1) block.list[[i]]$constant$N <- length(f.ind) block.list[[i]]$constant$YN <- length(y.ind) @@ -237,7 +273,7 @@ build.block.xy <- function(settings, block.list.all, X, obs.mean, obs.cov, t) { MCMC_Init <- function (block.list, X) { var.names <- unique(attributes(X)$dimnames[[2]]) #sample mu.f from X. - sample.mu.f <- X[sample(seq_along(1:nrow(X)), 1),] + sample.mu.f <- colMeans(X) for (i in seq_along(block.list)) { #number of observations. num.obs <- length(block.list[[i]]$data$y.censored) @@ -248,7 +284,7 @@ MCMC_Init <- function (block.list, X) { end <- (block.list[[i]]$sites.per.block[j]) * length(var.names) block.list[[i]]$Inits$X.mod <- c(block.list[[i]]$Inits$X.mod, sample.mu.f[start:end]) #initialize X - block.list[[i]]$Inits$X <- block.list[[i]]$Inits$X.mod[block.list[[i]]$constant$H] + block.list[[i]]$Inits$X <- block.list[[i]]$data$y.censored #initialize Xs block.list[[i]]$Inits$Xs <- block.list[[i]]$Inits$X.mod[block.list[[i]]$constant$H] } @@ -293,6 +329,7 @@ MCMC_block_function <- function(block) { #hear we change the RW_block sampler to the ess sampler #because it has a better performance of MVN sampling samplerLists <- conf$getSamplers() + samplerNumberOffset <- length(samplerLists) if (block$constant$q.type == 1) { #if we have vector q #only X.mod should be sampled with ess sampler. @@ -305,11 +342,24 @@ MCMC_block_function <- function(block) { } conf$setSamplers(samplerLists) + #add toggle Y sampler. + for (i in 1:block$constant$YN) { + conf$addSampler(paste0("y.censored[", i, "]"), 'toggle', control=list(type='RW')) + } + conf$printSamplers() #compile MCMC Rmcmc <- nimble::buildMCMC(conf) Cmodel <- nimble::compileNimble(model_pred) Cmcmc <- nimble::compileNimble(Rmcmc, project = model_pred, showCompilerOutput = FALSE) + #if we don't have any NA in the Y. + if (!any(is.na(block$data$y.censored))) { + #add toggle Y sampler. + for(i in 1:block$constant$YN) { + valueInCompiledNimbleFunction(Cmcmc$samplerFunctions[[samplerNumberOffset+i]], 'toggle', 0) + } + } + #run MCMC dat <- runMCMC(Cmcmc, niter = block$MCMC$niter, nburnin = block$MCMC$nburnin, thin = block$MCMC$nthin, nchains = block$MCMC$nchain) @@ -355,27 +405,25 @@ MCMC_block_function <- function(block) { aq <- solve(q.bar) * bq block$aqq[,,block$t+1] <- GrabFillMatrix(block$aqq[,,block$t], block$constant$H, aq) block$bqq[block$t+1] <- bq - - # #if it's a wishart case - # bq <- block$data$bq - # aq <- block$data$aq - # for (i in 1:dim(aq)[1]) { - # CHAR <- paste0("[", i, "]") - # for (j in 1:dim(aq)[2]) { - # aq[i, j] <- M[paste0("q[", i, ", ", j, "]")]/bq - # } - # } - # #update aqq and bqq - # block$aqq[,,block$t+1] <- GrabFillMatrix(block$aqq[,,block$t], block$constant$H, aq) - # block$bqq[block$t+1] <- block$bqq[block$t] } #update mua and pa; mufa, and pfa iX <- grep("X[", colnames(dat), fixed = TRUE) iX.mod <- grep("X.mod[", colnames(dat), fixed = TRUE) - mua <- colMeans(dat[, iX]) - pa <- stats::cov(dat[, iX]) - mufa <- colMeans(dat[, iX.mod]) - pfa <- stats::cov(dat[, iX.mod]) + if (length(iX) == 1) { + mua <- mean(dat[, iX]) + pa <- var(dat[, iX]) + } else { + mua <- colMeans(dat[, iX]) + pa <- stats::cov(dat[, iX]) + } + + if (length(iX.mod) == 1) { + mufa <- mean(dat[, iX.mod]) + pfa <- var(dat[, iX.mod]) + } else { + mufa <- colMeans(dat[, iX.mod]) + pfa <- stats::cov(dat[, iX.mod]) + } #return values. block$update <- list(aq = aq, bq = bq, mua = mua, pa = pa, mufa = mufa, pfa = pfa) @@ -392,7 +440,7 @@ MCMC_block_function <- function(block) { ##' @param MCMC_dat data frame of MCMC samples, the default it NULL. ##' ##' @return It returns the `block.list.all` object with initialized/updated Q filled in. -update_q <- function (block.list.all, t, nt, MCMC_dat = NULL) { +update_q <- function (block.list.all, t, nt, aqq.Init = NULL, bqq.Init = NULL, MCMC_dat = NULL, block.list.all.pre = NULL) { block.list <- block.list.all[[t]] #if it's an update. if (is.null(MCMC_dat)) { @@ -403,8 +451,13 @@ update_q <- function (block.list.all, t, nt, MCMC_dat = NULL) { nobs <- length(block.list[[i]]$data$y.censored) if (block.list[[i]]$constant$q.type == 1) { #initialize aqq and bqq for nt - block.list[[i]]$aqq <- array(1, dim = c(nvar, nt + 1)) - block.list[[i]]$bqq <- array(1, dim = c(nvar, nt + 1)) + if (!is.null(aqq.Init) && !is.null(bqq.Init)) { + block.list[[i]]$aqq <- array(aqq.Init, dim = c(nvar, nt + 1)) + block.list[[i]]$bqq <- array(bqq.Init, dim = c(nvar, nt + 1)) + } else { + block.list[[i]]$aqq <- array(1, dim = c(nvar, nt + 1)) + block.list[[i]]$bqq <- array(1, dim = c(nvar, nt + 1)) + } #update aq and bq based on aqq and bqq block.list[[i]]$data$aq <- block.list[[i]]$aqq[block.list[[i]]$constant$H, t] block.list[[i]]$data$bq <- block.list[[i]]$bqq[block.list[[i]]$constant$H, t] @@ -419,8 +472,12 @@ update_q <- function (block.list.all, t, nt, MCMC_dat = NULL) { } } } else if (t > 1) { - #if we want to update q from previous SDA runs. - block.list.pre <- block.list.all[[t - 1]] + if (!is.null(block.list.all.pre)) { + block.list.pre <- block.list.all.pre[[t - 1]] + } else { + #if we want to update q from previous SDA runs. + block.list.pre <- block.list.all[[t - 1]] + } for (i in seq_along(block.list)) { nvar <- length(block.list[[i]]$data$muf) nobs <- length(block.list[[i]]$data$y.censored) From 8d71e9f5afc853b9de8da1708186f4266787176f Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 22 Aug 2023 13:42:38 -0400 Subject: [PATCH 32/68] Add the special case where only one observation existed for one site. --- modules/assim.sequential/R/Nimble_codes.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/assim.sequential/R/Nimble_codes.R b/modules/assim.sequential/R/Nimble_codes.R index 4b183647a16..9a194248784 100644 --- a/modules/assim.sequential/R/Nimble_codes.R +++ b/modules/assim.sequential/R/Nimble_codes.R @@ -238,8 +238,12 @@ GEF.Block.Nimble <- nimbleCode({ for (i in 1:YN) { #sample Q. q[i] ~ dgamma(shape = aq[i], rate = bq[i]) - #sample latent variable X. - X[i] ~ dnorm(X.mod[H[i]], sd = 1/sqrt(q[i])) + if (length(H) == 1) { + X[i] ~ dnorm(X.mod[H], sd = 1/sqrt(q[i])) + } else { + #sample latent variable X. + X[i] ~ dnorm(X.mod[H[i]], sd = 1/sqrt(q[i])) + } #likelihood y.censored[i] ~ dnorm(X[i], sd = 1/sqrt(r[i, i])) } From 3625e448caa081d30994cb9e86f2b00897f7b803 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 22 Aug 2023 13:43:21 -0400 Subject: [PATCH 33/68] Add the feature of returning NULL values for a free run inside the observation assembler function. --- modules/assim.sequential/R/SDA_OBS_Assembler.R | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/modules/assim.sequential/R/SDA_OBS_Assembler.R b/modules/assim.sequential/R/SDA_OBS_Assembler.R index 9510a2ad89a..9776cb33ee6 100644 --- a/modules/assim.sequential/R/SDA_OBS_Assembler.R +++ b/modules/assim.sequential/R/SDA_OBS_Assembler.R @@ -19,6 +19,21 @@ SDA_OBS_Assembler <- function(settings){ #extract Obs_Prep object from settings. Obs_Prep <- settings$state.data.assimilation$Obs_Prep + #check if we want to proceed the free run without any observations. + if (as.logical(settings$state.data.assimilation$free.run)) { + #calcualte time points. + time_points <- obs_timestep2timepoint(Obs_Prep$start.date, Obs_Prep$end.date, Obs_Prep$timestep) + + #generate obs.mean and obs.cov with NULL filled. + obs.mean = vector("list", length(time_points)) %>% `names<-`(time_points) + obs.cov = vector("list", length(time_points)) %>% `names<-`(time_points) + + #save files. + save(obs.mean, file = file.path(Obs_Prep$outdir, "Rdata", "obs.mean.Rdata")) + save(obs.cov, file = file.path(Obs_Prep$outdir, "Rdata", "obs.cov.Rdata")) + return(list(obs.mean = obs.mean, obs.cov = obs.cov)) + } + #prepare site_info offline, because we need to submit this to server remotely, which might not support the Bety connection. site_info <- settings %>% purrr::map(~.x[['run']] ) %>% From c3b6a2e1d2d6dbb2e18474c8fe8eb69187994dd6 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 22 Aug 2023 13:43:42 -0400 Subject: [PATCH 34/68] Bug fixings. --- modules/assim.sequential/R/matrix_operation.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/assim.sequential/R/matrix_operation.R b/modules/assim.sequential/R/matrix_operation.R index 5567e153aac..c5177c21ece 100644 --- a/modules/assim.sequential/R/matrix_operation.R +++ b/modules/assim.sequential/R/matrix_operation.R @@ -59,7 +59,7 @@ matrix_network <- function (mat) { Inits <- c(Inits, which(mat[init,])) } Inits <- Inits[which(!Inits %in% vec)] - vec <- c(vec, Inits) + vec <- sort(unique(c(vec, Inits))) #if we don't have any new site that belongs to this network. if (length(Inits) == 0) { #then stop. From 7fd1df4b9e6a682a48a58ebffd4fb40798c11c62 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 22 Aug 2023 13:48:26 -0400 Subject: [PATCH 35/68] 1) Remove the free_run tag from the control list and instead use the flag from the settings object. 2) Add the user-defined jobs.per.file inside the control list. 3) solved the issue of not properly initializing the block.list.all argument. 4) Remove the feature of using the pre-existed Pf. 5) Using the forceRun flag inside the control list to decide if we want to proceed with the analysis function for a free run. --- .../assim.sequential/R/sda.enkf_MultiSite.R | 39 +++++++++++-------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index 825aa85d037..fa2e6c04c8e 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -40,7 +40,6 @@ sda.enkf.multisite <- function(settings, Profiling = FALSE, OutlierDetection=FALSE, parallel_qsub = TRUE, - free_run = FALSE, send_email = NULL, keepNC = TRUE, forceRun = TRUE, @@ -356,7 +355,7 @@ sda.enkf.multisite <- function(settings, ) }) ###------------------------------------------------------------------------------------------------### - ### loop over time ### + ### w over time ### ###------------------------------------------------------------------------------------------------### for(t in 1:nt){ obs.t <- as.character(lubridate::date(obs.times[t])) @@ -426,7 +425,7 @@ sda.enkf.multisite <- function(settings, paste(file.path(rundir, 'runs.txt')) ## testing Sys.sleep(0.01) ## testing if(control$parallel_qsub){ - PEcAn.remote::qsub_parallel(settings, files=PEcAn.remote::merge_job_files(settings, 10), prefix = paste0(obs.year, ".nc")) + PEcAn.remote::qsub_parallel(settings, files=PEcAn.remote::merge_job_files(settings, control$jobs.per.file), prefix = paste0(obs.year, ".nc")) }else{ PEcAn.workflow::start_model_runs(settings, write=settings$database$bety$write) } @@ -447,9 +446,10 @@ sda.enkf.multisite <- function(settings, ){ Sys.sleep(10) max_t <- max_t + 1 - if(max_t > 100){ + if(max_t > 3){ PEcAn.logger::logger.info("Can't find outputed NC file! Please rerun the code!") break + return(0) } PEcAn.logger::logger.info("Empty folder, try again!") } @@ -487,7 +487,9 @@ sda.enkf.multisite <- function(settings, ###-------------------------------------------------------------------### ### preparing OBS ### ###-------------------------------------------------------------------###---- - if (!is.null(obs.mean[[t]][[1]]) & !control$free_run) { ## | any(sapply(obs.mean[[t]],function(x){any(!is.na(x))})) + #To trigger the analysis function with free run, you need to first specify the control$forceRun as TRUE, + #Then specify the settings$state.data.assimilation$by.site as TRUE, and finally + if (!is.null(obs.mean[[t]][[1]]) & as.logical(settings$state.data.assimilation$free.run) | control$forceRun) { # TODO: as currently configured, Analysis runs even if all obs are NA, # which clearly should be triggering the `else` of this if, but the # `else` has not been invoked in a while an may need updating @@ -503,7 +505,7 @@ sda.enkf.multisite <- function(settings, #decide if we want the block analysis function or multi-site analysis function. if (processvar == TRUE && settings$state.data.assimilation$q.type %in% c("vector", "wishart")) { #initialize block.list.all. - if (t == 1) { + if (t == 1 | !exists("block.list.all")) { block.list.all <- obs.mean %>% purrr::map(function(l){NULL}) } #initialize MCMC arguments. @@ -511,10 +513,10 @@ sda.enkf.multisite <- function(settings, MCMC.args <- list(niter = 1e5, nthin = 10, nchain = 1, - nburnin = 1e4) + nburnin = 5e4) } #running analysis function. - enkf.params[[obs.t]] <- analysis_sda_block(settings, block.list.all, X, obs.mean, obs.cov, t, nt, MCMC.args) + enkf.params[[obs.t]] <- analysis_sda_block(settings, block.list.all, X, obs.mean, obs.cov, t, nt, MCMC.args, pre_enkf_params) enkf.params[[obs.t]] <- c(enkf.params[[obs.t]], RestartList = list(restart.list %>% stats::setNames(site.ids))) block.list.all <- enkf.params[[obs.t]]$block.list.all #Forecast @@ -663,11 +665,12 @@ sda.enkf.multisite <- function(settings, #will throw an error when q.bar and Pf are different sizes i.e. when you are running with no obs and do not variance for all state variables #Pa <- Pf + solve(q.bar) #hack have Pa = Pf for now - if(!is.null(pre_enkf_params)){ - Pf <- pre_enkf_params[[t]]$Pf - }else{ - Pf <- stats::cov(X) # Cov Forecast - This is used as an initial condition - } + # if(!is.null(pre_enkf_params)){ + # Pf <- pre_enkf_params[[t]]$Pf + # }else{ + # Pf <- stats::cov(X) # Cov Forecast - This is used as an initial condition + # } + Pf <- stats::cov(X) Pa <- Pf } enkf.params[[obs.t]] <- list(mu.f = mu.f, Pf = Pf, mu.a = mu.a, Pa = Pa) @@ -712,9 +715,13 @@ sda.enkf.multisite <- function(settings, tictoc::tic(paste0("Visulization for cycle = ", t)) #writing down the image - either you asked for it or nor :) - if ((t%%2 == 0 | t == nt) & (control$TimeseriesPlot) & !is.null(obs.mean[[t]][[1]])){ - SDA_timeseries_plot(ANALYSIS, FORECAST, obs.mean, obs.cov, settings$outdir, by = "var", types = c("FORECAST", "ANALYSIS", "OBS")) - } + if ((t%%2 == 0 | t == nt) & (control$TimeseriesPlot)){ + if (as.logical(settings$state.data.assimilation$free.run)) { + SDA_timeseries_plot(ANALYSIS, FORECAST, obs.mean, obs.cov, settings$outdir, by = "var", types = c("FORECAST", "ANALYSIS")) + } else { + SDA_timeseries_plot(ANALYSIS, FORECAST, obs.mean, obs.cov, settings$outdir, by = "var", types = c("FORECAST", "ANALYSIS", "OBS")) + } + } #Saving the profiling result if (control$Profiling) alltocs(file.path(settings$outdir,"SDA", "Profiling.csv")) From 8a236d75b51db8f6f38ae23c4f30b4a6eef83580 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 22 Aug 2023 13:49:26 -0400 Subject: [PATCH 36/68] Update the rd files. --- modules/assim.sequential/man/analysis_sda_block.Rd | 3 ++- modules/assim.sequential/man/sda.enkf.multisite.Rd | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/assim.sequential/man/analysis_sda_block.Rd b/modules/assim.sequential/man/analysis_sda_block.Rd index b5f836aa932..cc8346fb0c4 100644 --- a/modules/assim.sequential/man/analysis_sda_block.Rd +++ b/modules/assim.sequential/man/analysis_sda_block.Rd @@ -12,7 +12,8 @@ analysis_sda_block( obs.cov, t, nt, - MCMC.args + MCMC.args, + block.list.all.pre = NULL ) } \arguments{ diff --git a/modules/assim.sequential/man/sda.enkf.multisite.Rd b/modules/assim.sequential/man/sda.enkf.multisite.Rd index 5017d63df8f..62a06a1821d 100644 --- a/modules/assim.sequential/man/sda.enkf.multisite.Rd +++ b/modules/assim.sequential/man/sda.enkf.multisite.Rd @@ -14,8 +14,8 @@ sda.enkf.multisite( ensemble.samples = NULL, control = list(trace = TRUE, FF = FALSE, interactivePlot = FALSE, TimeseriesPlot = FALSE, BiasPlot = FALSE, plot.title = NULL, facet.plots = FALSE, debug = FALSE, pause - = FALSE, Profiling = FALSE, OutlierDetection = FALSE, parallel_qsub = TRUE, free_run - = FALSE, send_email = NULL, keepNC = TRUE, forceRun = TRUE, run_parallel = TRUE), + = FALSE, Profiling = FALSE, OutlierDetection = FALSE, parallel_qsub = TRUE, + send_email = NULL, keepNC = TRUE, forceRun = TRUE, run_parallel = TRUE), ... ) } From 25a004eb1ce44972819d23d8a7c5f8226ecdf584 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 22 Aug 2023 13:49:44 -0400 Subject: [PATCH 37/68] Update the Rd file. --- modules/assim.sequential/man/update_q.Rd | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/modules/assim.sequential/man/update_q.Rd b/modules/assim.sequential/man/update_q.Rd index bdf6acef08f..9d44af37253 100644 --- a/modules/assim.sequential/man/update_q.Rd +++ b/modules/assim.sequential/man/update_q.Rd @@ -4,7 +4,15 @@ \alias{update_q} \title{update_q} \usage{ -update_q(block.list.all, t, nt, MCMC_dat = NULL) +update_q( + block.list.all, + t, + nt, + aqq.Init = NULL, + bqq.Init = NULL, + MCMC_dat = NULL, + block.list.all.pre = NULL +) } \arguments{ \item{block.list.all}{each block within the `block.list` lists.} From 92f73ec123bd66c03b6ea21de86a9c138b339856 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 22 Aug 2023 13:53:55 -0400 Subject: [PATCH 38/68] Update the roxygen information and the Rd files. --- modules/assim.sequential/R/Analysis_sda_block.R | 4 ++++ modules/assim.sequential/man/analysis_sda_block.Rd | 2 ++ modules/assim.sequential/man/update_q.Rd | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/modules/assim.sequential/R/Analysis_sda_block.R b/modules/assim.sequential/R/Analysis_sda_block.R index 201b37d9f3f..4784b821cfd 100644 --- a/modules/assim.sequential/R/Analysis_sda_block.R +++ b/modules/assim.sequential/R/Analysis_sda_block.R @@ -10,6 +10,7 @@ ##' @param t time point. ##' @param nt total length of time steps. ##' @param MCMC.args arguments for the MCMC sampling. +##' @param block.list.all.pre pre-existed block.list.all object for passing the aqq and bqq to the current SDA run, the default is NULL. ##' @details This function will add data and constants into each block that are needed for the MCMC sampling. ##' ##' @description This function provides the block-based MCMC sampling approach. @@ -437,7 +438,10 @@ MCMC_block_function <- function(block) { ##' @param block.list.all each block within the `block.list` lists. ##' @param t time point. ##' @param nt total length of time steps. +##' @param aqq.Init the initial values of aqq, the default is NULL. +##' @param bqq.Init the initial values of bqq, the default is NULL. ##' @param MCMC_dat data frame of MCMC samples, the default it NULL. +##' @param block.list.all.pre pre-existed block.list.all object for passing the aqq and bqq to the current SDA run, the default is NULL. ##' ##' @return It returns the `block.list.all` object with initialized/updated Q filled in. update_q <- function (block.list.all, t, nt, aqq.Init = NULL, bqq.Init = NULL, MCMC_dat = NULL, block.list.all.pre = NULL) { diff --git a/modules/assim.sequential/man/analysis_sda_block.Rd b/modules/assim.sequential/man/analysis_sda_block.Rd index cc8346fb0c4..a95674e3b99 100644 --- a/modules/assim.sequential/man/analysis_sda_block.Rd +++ b/modules/assim.sequential/man/analysis_sda_block.Rd @@ -32,6 +32,8 @@ analysis_sda_block( \item{nt}{total length of time steps.} \item{MCMC.args}{arguments for the MCMC sampling.} + +\item{block.list.all.pre}{pre-existed block.list.all object for passing the aqq and bqq to the current SDA run, the default is NULL.} } \value{ It returns the `build.block.xy` object and the analysis results. diff --git a/modules/assim.sequential/man/update_q.Rd b/modules/assim.sequential/man/update_q.Rd index 9d44af37253..c342c7bbe56 100644 --- a/modules/assim.sequential/man/update_q.Rd +++ b/modules/assim.sequential/man/update_q.Rd @@ -21,7 +21,13 @@ update_q( \item{nt}{total length of time steps.} +\item{aqq.Init}{the initial values of aqq, the default is NULL.} + +\item{bqq.Init}{the initial values of bqq, the default is NULL.} + \item{MCMC_dat}{data frame of MCMC samples, the default it NULL.} + +\item{block.list.all.pre}{pre-existed block.list.all object for passing the aqq and bqq to the current SDA run, the default is NULL.} } \value{ It returns the `block.list.all` object with initialized/updated Q filled in. From 14c985acb3388b0b3aa7bf5eef6903d2d1d6eb2f Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 22 Aug 2023 13:56:39 -0400 Subject: [PATCH 39/68] Bug fixes for the "too many nc files opened!" for the prepare_pools function. --- modules/data.land/R/pool_ic_netcdf2list.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/data.land/R/pool_ic_netcdf2list.R b/modules/data.land/R/pool_ic_netcdf2list.R index acf2a3850d3..995bc28d1bd 100644 --- a/modules/data.land/R/pool_ic_netcdf2list.R +++ b/modules/data.land/R/pool_ic_netcdf2list.R @@ -20,6 +20,8 @@ pool_ic_netcdf2list <- function(nc.path){ for(varname in names(vals)){ vals[[varname]] <- ncdf4::ncvar_get(IC.nc,varname) } + ncdf4::nc_close(IC.nc) + on.exit() return(list(dims = dims, vals = vals)) } else{ From 0fa327f8bb3c2a841da8655706c48f04a16980ce Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 22 Aug 2023 14:53:17 -0400 Subject: [PATCH 40/68] Update the documentation. --- .../04_more_web_interface/02_hidden_analyses.Rmd | 11 +++++++++-- .../inst/MultiSite-Exs/SDA/Create_Multi_settings.R | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd b/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd index 78d77be6da0..6a71be6cd65 100644 --- a/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd +++ b/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd @@ -330,7 +330,6 @@ control=list(trace = TRUE, free_run = FALSE, send_email = NULL, keepNC = TRUE, - forceRun = TRUE, run_parallel = TRUE) ``` @@ -366,7 +365,15 @@ $`2010/12/31`$`1000000651` [2,] 0.1213583 0.001162113 ``` #### Anlysis SDA workflow -Before running the SDA analysis functions, the ensemble forecast results have to be generated, and arguments such as H matrix, MCMC arguments, and multi-site Y and R (by `Construct.R` function) have to be generated as well. +Before running the SDA analysis functions, the ensemble forecast results have to be generated, and arguments such as H matrix, MCMC arguments, and multi-site Y and R (by `Construct.R` function) have to be generated as well. If you want to proceed the block-based SDA workflow for a free run (estimate the process error to the free run), you need to first specify the `state.data.assimilation` as follows. Beyond that, if any of your sites has zero observation, you should flag the `by.site` as TRUE. +``` + + TRUE + TRUE + +``` + +Here are the workflows for three types of SDA analysis functions that we are currently used. * Decide which analysis function to be used. Here we have three options: 1) traditional ensemble Kalman Filter (`EnKF.MultiSite`) with analytical solution; 2) generalized ensemble Kalman Filter (`GEF.MultiSite`); and 3) block-based generalized ensemble Kalman Filter (`analysis_sda_block`). The latter two methods support the process variance across space and time. To choose the analysis method 1, we need to set the `process.variance` as FALSE. Otherwise, if we set the `process.variance` as TRUE and provide the `q.type` as either `SINGLE` or `SITE` the 2 method will be used, and if we provide the `q.type` as either `vector` or `wishart` the 3 method will be used. diff --git a/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R b/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R index 2a5f6377562..22ec21a1415 100644 --- a/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R +++ b/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R @@ -60,6 +60,7 @@ template <- PEcAn.settings::Settings(list( process.variance = TRUE, adjustment = TRUE, censored.data = FALSE, + free.run = FALSE, FullYearNC = TRUE, NC.Overwrite = FALSE, NC.Prefix = "sipnet.out", From 70eabaa47c279b0c37b2ca4b7286f9cdc90237d2 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 22 Aug 2023 16:05:44 -0400 Subject: [PATCH 41/68] Remove the %>% operator to avoid adding a dependency. --- base/remote/R/qsub_parallel.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/base/remote/R/qsub_parallel.R b/base/remote/R/qsub_parallel.R index b1f05b16354..5930d779d9c 100644 --- a/base/remote/R/qsub_parallel.R +++ b/base/remote/R/qsub_parallel.R @@ -97,7 +97,8 @@ qsub_parallel <- function(settings, files = NULL, prefix = "sipnet.out") { if(file.exists(file.path(folder, prefix))){ return(folder) } - } %>% unlist + } + completed_folders <- unlist(completed_folders) pbi <- length(completed_folders) utils::setTxtProgressBar(pb, pbi) } From 68cdac1bd601e5be51f61aadc6963269203279ef Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 22 Aug 2023 16:17:05 -0400 Subject: [PATCH 42/68] Add namespace for the var function. --- modules/assim.sequential/R/Analysis_sda_block.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/assim.sequential/R/Analysis_sda_block.R b/modules/assim.sequential/R/Analysis_sda_block.R index 4784b821cfd..d4518a54987 100644 --- a/modules/assim.sequential/R/Analysis_sda_block.R +++ b/modules/assim.sequential/R/Analysis_sda_block.R @@ -412,7 +412,7 @@ MCMC_block_function <- function(block) { iX.mod <- grep("X.mod[", colnames(dat), fixed = TRUE) if (length(iX) == 1) { mua <- mean(dat[, iX]) - pa <- var(dat[, iX]) + pa <- stats::var(dat[, iX]) } else { mua <- colMeans(dat[, iX]) pa <- stats::cov(dat[, iX]) @@ -420,7 +420,7 @@ MCMC_block_function <- function(block) { if (length(iX.mod) == 1) { mufa <- mean(dat[, iX.mod]) - pfa <- var(dat[, iX.mod]) + pfa <- stats::var(dat[, iX.mod]) } else { mufa <- colMeans(dat[, iX.mod]) pfa <- stats::cov(dat[, iX.mod]) From 026f32921a9bd3b976bef767fb53fb15c013e599 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 23 Aug 2023 13:26:08 -0400 Subject: [PATCH 43/68] update the documentation. --- .../04_more_web_interface/02_hidden_analyses.Rmd | 4 +++- .../inst/MultiSite-Exs/SDA/Create_Multi_settings.R | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd b/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd index 6a71be6cd65..1c3b1ca8715 100644 --- a/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd +++ b/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd @@ -327,7 +327,6 @@ control=list(trace = TRUE, Profiling = FALSE, OutlierDetection=FALSE, parallel_qsub = TRUE, - free_run = FALSE, send_email = NULL, keepNC = TRUE, run_parallel = TRUE) @@ -396,6 +395,8 @@ MCMC.args <- list(niter = 1e5, TRUE + 1 + 1 FALSE TRUE FALSE @@ -403,6 +404,7 @@ MCMC.args <- list(niter = 1e5, FALSE sipnet.out SINGLE + FALSE FALSE Local.support 1 diff --git a/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R b/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R index 22ec21a1415..eda487b4cf1 100644 --- a/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R +++ b/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R @@ -58,6 +58,8 @@ template <- PEcAn.settings::Settings(list( ############################################################################ state.data.assimilation = structure(list( process.variance = TRUE, + aqq.Init = 1, + bqq.Init = 1, adjustment = TRUE, censored.data = FALSE, free.run = FALSE, From dc613a3b0819f1eae76ecd9350e3a520df697255 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 23 Aug 2023 13:35:40 -0400 Subject: [PATCH 44/68] update the documentation. --- book_source/03_topical_pages/03_pecan_xml.Rmd | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/book_source/03_topical_pages/03_pecan_xml.Rmd b/book_source/03_topical_pages/03_pecan_xml.Rmd index 57aef029e4b..8f477a48fdc 100644 --- a/book_source/03_topical_pages/03_pecan_xml.Rmd +++ b/book_source/03_topical_pages/03_pecan_xml.Rmd @@ -750,6 +750,8 @@ The following tags can be used for state data assimilation. More detailed inform ```xml TRUE + 1 + 1 FALSE TRUE FALSE @@ -757,6 +759,7 @@ The following tags can be used for state data assimilation. More detailed inform FALSE sipnet.out SINGLE + FALSE FALSE Local.support 1 @@ -836,6 +839,8 @@ The following tags can be used for state data assimilation. More detailed inform ``` * **process.variance** : [optional] TRUE/FLASE flag for if process variance should be estimated (TRUE) or not (FALSE). If TRUE, a generalized ensemble filter will be used. If FALSE, an ensemble Kalman filter will be used. Default is FALSE. +* **aqq.Init** : [optional] The initial value of aqq used for estimate the Q distribution, the default value is 1. +* **bqq.Init** : [optional] The initial value of bqq used for estimate the Q distribution, the default value is 1. * **sample.parameters** : [optional] TRUE/FLASE flag for if parameters should be sampled for each ensemble member or not. This allows for more spread in the intial conditions of the forecast. * **adjustment** : [optional] Bool variable decide if you want to adjust analysis results by the likelihood. * **censored.data** : [optional] Bool variable decide if you want to do MCMC sampling for the forecast ensemble space, the default is FALSE. @@ -843,6 +848,7 @@ The following tags can be used for state data assimilation. More detailed inform * **NC.Overwrite** : [optional] Bool variable decide if you want to overwrite the previous netcdf file when there is a overlap in time, the default is FALSE. * **NC.Prefix** : [optional] The prefix for the generation of the full-year netcdf file, the default is sipnet.out. * **q.type** : [optional] The type of process variance that will be estimated, the default is SINGLE. +* **free.run** : [optional] If it's a free run without any observations, the default is FALSE. * **by.site** : [optional] The flag, that is exclusively used by the `analysis_sda_block` function, decide if we want to consider the underlying networks between sites. The default is FALSE. * **Localization.FUN** : [optional] The localization function name for the localization operation, the default is Local.support. * **scalef** : [optional] The scale parameter used for the localization operation, the smaller the value is, the sites are more isolated. From 59eeadfeea3c21eb2f36569d1e7be5bdd68cf96b Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 25 Aug 2023 14:36:35 -0400 Subject: [PATCH 45/68] Fix bug for the single site and no observation case. --- modules/assim.sequential/R/Analysis_sda_block.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/assim.sequential/R/Analysis_sda_block.R b/modules/assim.sequential/R/Analysis_sda_block.R index d4518a54987..6e27e30742f 100644 --- a/modules/assim.sequential/R/Analysis_sda_block.R +++ b/modules/assim.sequential/R/Analysis_sda_block.R @@ -127,7 +127,8 @@ build.block.xy <- function(settings, block.list.all, X, obs.mean, obs.cov, t) { } #Handle observation - if (as.logical(settings$state.data.assimilation$free.run)) { + #if we do free run or the current obs.mean are all NULL. + if (as.logical(settings$state.data.assimilation$free.run) | all(is.null(unlist(obs.mean[[t]])))) { obs.mean[[t]] <- vector("list", length(site.ids)) %>% `names<-`(site.ids) obs.cov[[t]] <- vector("list", length(site.ids)) %>% `names<-`(site.ids) H <- list(ind = seq_along(rep(var.names, length(site.ids)))) From 980b9a1c444c4191f3fd13e608748bcbb8f684c0 Mon Sep 17 00:00:00 2001 From: Abhi Date: Thu, 7 Sep 2023 23:06:12 +0530 Subject: [PATCH 46/68] Fix typos in README.md for improved readability and accuracy --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d15b29c263f..1595e8ab1da 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ PEcAn is not itself an ecosystem model, and it can be used to with a variety of ## Documentation -Consult documentation of the PEcAn Project; either the [lastest stable development](https://pecanproject.github.io/pecan-documentation/develop/) branch, the latest [release](https://pecanproject.github.io/pecan-documentation/master/). Documentation from [earlier releases is here](https://pecanproject.github.io/documentation.html). +Consult documentation of the PEcAn Project; either the [latest stable development](https://pecanproject.github.io/pecan-documentation/develop/) branch, the latest [release](https://pecanproject.github.io/pecan-documentation/master/). Documentation from [earlier releases is here](https://pecanproject.github.io/documentation.html). ## Getting Started @@ -47,7 +47,7 @@ This, however, may have limited functionality without also installing other comp ### Website -Visit our [webage](https://pecanproject.github.io) to keep up with latest news, version, and information about the PEcAn Project +Visit our [webpage](https://pecanproject.github.io) to keep up with latest news, version, and information about the PEcAn Project #### Web Interface demo The fastest way to begin modeling ecosystems is through the PEcAn web interface. From d08f41776a5875db42ed309155758cc2e378a3b0 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 8 Sep 2023 14:21:34 -0400 Subject: [PATCH 47/68] Fix the issue when some sites have no observation. --- modules/assim.sequential/R/Analysis_sda_block.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/assim.sequential/R/Analysis_sda_block.R b/modules/assim.sequential/R/Analysis_sda_block.R index 6e27e30742f..9b16849eae7 100644 --- a/modules/assim.sequential/R/Analysis_sda_block.R +++ b/modules/assim.sequential/R/Analysis_sda_block.R @@ -195,8 +195,8 @@ build.block.xy <- function(settings, block.list.all, X, obs.mean, obs.cov, t) { block.list[[i]]$data$r <- diag(1, length(var.names)) block.h <- matrix(1, 1, length(var.names)) } else { - y.start <- obs_per_site[i] * (i - 1) + 1 - y.end <- obs_per_site[i] * i + y.start <- sum(obs_per_site[1:i-1]) + 1#obs_per_site[i] * (i - 1) + 1 + y.end <- y.start + obs_per_site[i] - 1#obs_per_site[i] * i block.list[[i]]$data$y.censored <- y.censored[y.start:y.end] block.list[[i]]$data$r <- solve(R[y.start:y.end, y.start:y.end]) block.h <- Construct.H.multisite(site.ids[i], var.names, obs.mean[[t]]) From c16f88a5b736a36a04972ecf8de7c08a72f80460 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 8 Sep 2023 14:22:44 -0400 Subject: [PATCH 48/68] Fix the logic for the analysis function. --- modules/assim.sequential/R/sda.enkf_MultiSite.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index fa2e6c04c8e..4c8e29aafb2 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -489,7 +489,7 @@ sda.enkf.multisite <- function(settings, ###-------------------------------------------------------------------###---- #To trigger the analysis function with free run, you need to first specify the control$forceRun as TRUE, #Then specify the settings$state.data.assimilation$by.site as TRUE, and finally - if (!is.null(obs.mean[[t]][[1]]) & as.logical(settings$state.data.assimilation$free.run) | control$forceRun) { + if (!is.null(obs.mean[[t]][[1]]) | as.logical(settings$state.data.assimilation$free.run) & control$forceRun) { # TODO: as currently configured, Analysis runs even if all obs are NA, # which clearly should be triggering the `else` of this if, but the # `else` has not been invoked in a while an may need updating From f81a4147ac1dd64a94c1d8335da8e4c4e461f578 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 14 Nov 2023 15:59:43 -0500 Subject: [PATCH 49/68] Merge block GEF into general GEF. --- modules/assim.sequential/R/Nimble_codes.R | 86 ++++++++--------------- 1 file changed, 29 insertions(+), 57 deletions(-) diff --git a/modules/assim.sequential/R/Nimble_codes.R b/modules/assim.sequential/R/Nimble_codes.R index 9a194248784..4a2e8fc6760 100644 --- a/modules/assim.sequential/R/Nimble_codes.R +++ b/modules/assim.sequential/R/Nimble_codes.R @@ -176,65 +176,37 @@ tobit.model <- nimbleCode({ #' @format TBD #' @export GEF.MultiSite.Nimble <- nimbleCode({ - if (q.type == 1) { - # Sorting out qs - qq ~ dgamma(aq, bq) ## aq and bq are estimated over time - q[1:YN, 1:YN] <- qq * diag(YN) - } else if (q.type == 2) { - # Sorting out qs - q[1:YN, 1:YN] ~ dwish(R = aq[1:YN, 1:YN], df = bq) ## aq and bq are estimated over time - - } - # X model X.mod[1:N] ~ dmnorm(mean = muf[1:N], cov = pf[1:N, 1:N]) - - for (i in 1:nH) { - tmpX[i] <- X.mod[H[i]] - Xs[i] <- tmpX[i] - } - ## add process error to x model but just for the state variables that we have data and H knows who - X[1:YN] ~ dmnorm(Xs[1:YN], prec = q[1:YN, 1:YN]) - - ## Likelihood - y.censored[1:YN] ~ dmnorm(X[1:YN], prec = r[1:YN, 1:YN]) - - # #puting the ones that they don't have q in Xall - They come from X.model - # # If I don't have data on then then their q is not identifiable, so we use the same Xs as Xmodel - if(nNotH > 0){ - for (j in 1:nNotH) { - tmpXmod[j] <- X.mod[NotH[j]] - Xall[NotH[j]] <- tmpXmod[j] + if (q.type == 1 | q.type == 2) { + if (q.type == 1) {#single Q + # Sorting out qs + qq ~ dgamma(aq, bq) ## aq and bq are estimated over time + q[1:YN, 1:YN] <- qq * diag(YN) + } else if (q.type == 2) {#site Q + # Sorting out qs + q[1:YN, 1:YN] ~ dwish(R = aq[1:YN, 1:YN], df = bq) ## aq and bq are estimated over time } - } - - #These are the one that they have data and their q can be estimated. - for (i in 1:nH) { - tmpXH[i] <- X[i] - Xall[H[i]] <- tmpXH[i] - } - - for (i in 1:YN) { - y.ind[i] ~ dinterval(y.censored[i], 0) - } - -}) - -#GEF.Block.Nimble--This does the block-based SDA ------------------------------------- -#' block-based TWEnF -#' @format TBD -#' @export -GEF.Block.Nimble <- nimbleCode({ - #I think the blocked nimble has to be implemented and used instead of a long vector sampling. - #1) due to the convergence of X.mod. - #2) temporal efficiency. MVN sampling over a large cov matrix can be time consuming. - #4) this data structure design allows us to implement the MCMC sampling parallely. - - #sample X.mod from the forecast. - X.mod[1:N] ~ dmnorm(mean = muf[1:N], cov = pf[1:N, 1:N]) - - #here we only consider vector Q and Wishart Q. - if (q.type == 1) { + + for (i in 1:nH) { + tmpX[i] <- X.mod[H[i]] + Xs[i] <- tmpX[i] + } + ## add process error to x model but just for the state variables that we have data and H knows who + X[1:YN] ~ dmnorm(Xs[1:YN], prec = q[1:YN, 1:YN]) + + ## Likelihood + y.censored[1:YN] ~ dmnorm(X[1:YN], prec = r[1:YN, 1:YN]) + + # #puting the ones that they don't have q in Xall - They come from X.model + # # If I don't have data on then then their q is not identifiable, so we use the same Xs as Xmodel + if(nNotH > 0){ + for (j in 1:nNotH) { + tmpXmod[j] <- X.mod[NotH[j]] + Xall[NotH[j]] <- tmpXmod[j] + } + } + } else if (q.type == 3) {#Vector Q for (i in 1:YN) { #sample Q. q[i] ~ dgamma(shape = aq[i], rate = bq[i]) @@ -247,7 +219,7 @@ GEF.Block.Nimble <- nimbleCode({ #likelihood y.censored[i] ~ dnorm(X[i], sd = 1/sqrt(r[i, i])) } - } else if (q.type == 2) { + } else if (q.type == 4) {#Wishart Q #if it's a Wishart Q. #sample Q. q[1:YN, 1:YN] ~ dwishart(R = aq[1:YN, 1:YN], df = bq) From e551332376a9a53fc219222fc14eb6ea4e014076 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 14 Nov 2023 15:59:58 -0500 Subject: [PATCH 50/68] Add sleep time. --- base/remote/R/qsub_parallel.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/base/remote/R/qsub_parallel.R b/base/remote/R/qsub_parallel.R index 5930d779d9c..6b9826033fb 100644 --- a/base/remote/R/qsub_parallel.R +++ b/base/remote/R/qsub_parallel.R @@ -3,6 +3,7 @@ #' @param settings pecan settings object #' @param files allow submit jobs based on job.sh file paths. #' @param prefix used for detecting if jobs are completed or not. +#' @param sleep time (in second) that we wait each time for the jobs to be completed. #' @export #' @examples #' \dontrun{ @@ -11,7 +12,7 @@ #' @author Dongchen Zhang #' #' @importFrom foreach %dopar% -qsub_parallel <- function(settings, files = NULL, prefix = "sipnet.out") { +qsub_parallel <- function(settings, files = NULL, prefix = "sipnet.out", sleep = 10) { if("try-error" %in% class(try(find.package("doSNOW"), silent = T))){ PEcAn.logger::logger.info("Package doSNOW is not installed! Please install it and rerun the function!") return(0) @@ -93,6 +94,7 @@ qsub_parallel <- function(settings, files = NULL, prefix = "sipnet.out") { folders <- file.path(settings$host$outdir, run_list) completed_folders <- c() while (length(completed_folders) < length(folders)) { + Sys.sleep(sleep) completed_folders <- foreach::foreach(folder = folders) %dopar% { if(file.exists(file.path(folder, prefix))){ return(folder) From 2927ac47268c6057687325337538d3601b8d8092 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 14 Nov 2023 16:00:06 -0500 Subject: [PATCH 51/68] Update documentations. --- .../02_hidden_analyses.Rmd | 182 +++++++++--------- 1 file changed, 91 insertions(+), 91 deletions(-) diff --git a/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd b/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd index 1c3b1ca8715..9eaaed7a847 100644 --- a/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd +++ b/book_source/02_demos_tutorials_workflows/04_more_web_interface/02_hidden_analyses.Rmd @@ -105,7 +105,7 @@ This is the main ensemble Kalman filter and generalized filter code. Originally, * IC - (optional) initial condition matrix (dimensions: ensemble memeber # by state variables). Default is NULL. -* Q - (optional) process covariance matrix (dimensions: state variable by state variables). Defualt is NULL. +* Q - (optional) process covariance matrix (dimensions: state variable by state variables). Default is NULL. #### State Data Assimilation Workflow Before running sda.enkf, these tasks must be completed (in no particular order), @@ -182,7 +182,7 @@ Create diagnostics #### State Data Assimilation Tags Descriptions -* **adjustment** : [optional] TRUE/FLASE flag for if ensembles needs to be adjusted based on weights estimated given their likelihood during analysis step. The defualt is TRUE for this flag. +* **adjustment** : [optional] TRUE/FLASE flag for if ensembles needs to be adjusted based on weights estimated given their likelihood during analysis step. The Default is TRUE for this flag. * **process.variance** : [optional] TRUE/FLASE flag for if process variance should be estimated (TRUE) or not (FALSE). If TRUE, a generalized ensemble filter will be used. If FALSE, an ensemble Kalman filter will be used. Default is FALSE. If you use the TRUE argument you can set three more optional tags to control the MCMCs built for the generalized esnsemble filter. * **nitrGEF** : [optional] numeric defining the length of the MCMC chains. * **nthin** : [optional] numeric defining thining length for the MCMC chains. @@ -289,7 +289,7 @@ $n_{t+1} = \frac{\sum_{i=1}^{I}\sum_{j=1}^{J}\frac{v_{ijt}^{2}+v_{iit}v_{jjt}}{V where we calculate the mean of the process covariance matrix $\left(\bar{\boldsymbol{Q}_{t}}\right)$ from the posterior samples at time t. Degrees of freedom for the Wishart are typically calculated element by element where $v_{ij}$ are the elements of $\boldsymbol{V}_{t}$. $I$ and $J$ index rows and columns of $\boldsymbol{V}$. Here, we calculate a mean number of degrees of freedom for $t+1$ by summing over all the elements of the scale matrix $\left(\boldsymbol{V}\right)$ and dividing by the count of those elements $\left(I\times J\right)$. We fit this model sequentially through time in the R computing environment using R package 'rjags.' -Users have control over how they think is the best way to estimate $Q$. Our code will look for the tag `q.type` in the XML settings under `state.data.assimilation` which can take 3 values of Single, PFT or Site. If `q.type` is set to single then one value of process variance will be estimated across all different sites or PFTs. On the other hand, when q.type` is set to Site or PFT then a process variance will be estimated for each site or PFT at a cost of more time and computation power. +Users have control over how they think is the best way to estimate $Q$. Our code will look for the tag `q.type` in the XML settings under `state.data.assimilation` which can take 4 values of Single, Site, Vector, or Wishart. If `q.type` is set to single then one value of process variance will be estimated across all different sites and variables. When `q.type` is set to Site then a process variance will be estimated for each siteat a cost of more time and computation power. When the `q.type` is set to Vector or Wishart then process errors for each variable of each site will be estimated and propagated through time, while the Wishart Q support the estimation of covariance between sites and variables through the MCMC sampling of wishart distributions, which further support the propagation of process error through not just time but space and variables. #### Multi-site State data assimilation. `sda.enkf.multisite` is housed within: `/pecan/modules/assim.sequential/R` @@ -301,27 +301,22 @@ The 4-site tutorial is housed within: `~/pecan/modules/assim.sequential/inst/Mul #### **sda.enkf.multisite.R Arguments** * settings - (required) [State Data Assimilation Tags Example] settings object -* obs.mean - (required) List of sites named by site ids, which contains dataframe for observation means, named with observation datetime. +* obs.mean - (required) Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation means for each state variables of each site for each time point. -* obs.cov - (required) List of sites named by site ids, which contains dataframe for observation covariance matrix, named with observation datetime. +* obs.cov - (required) Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation covariances for all state variables of each site for each time point. -* Q - (optional) Process covariance matrix given if there is no data to estimate it. Defualt is NULL. +* Q - (optional) Process covariance matrix given if there is no data to estimate it. Default is NULL. -* restart - (optional) Used for iterative updating previous forecasts. Default NULL. List object includes file path to previous runs and start date for SDA. Defualt is NULL. +* restart - (optional) Used for iterative updating previous forecasts. Default NULL. List object includes file path to previous runs and start date for SDA. Default is NULL. -* pre_enkf_params - (optional) Used for carrying out SDA with pre-existed enkf.params, in which the Pf, aqq, and bqq can be used for the analysis step. Defualt is NULL. +* pre_enkf_params - (optional) Used for carrying out SDA with pre-existed `enkf.params`, in which the `Pf`, `aqq`, and `bqq` can be used for the analysis step. Default is NULL. -* ensemble.samples - (optional) Pass ensemble.samples from outside to avoid GitHub check issues. Defualt is NULL. +* ensemble.samples - (optional) Pass ensemble.samples from outside, which are lists of calibrated parameters for different plant functional types. This also helps to avoid GitHub check issues. Default is NULL. -* control - (optional) List of flags controlling the behaviour of the SDA. Default is as follows: +* control - (optional) List of flags controlling the behavior of the SDA. Here is an example of the `control` list. The detailed explanation of them are shown inside the `sda.enkf.multisite` function in the `assim.sequential` package. ``` control=list(trace = TRUE, - FF = FALSE, - interactivePlot = FALSE, TimeseriesPlot = FALSE, - BiasPlot = FALSE, - plot.title = NULL, - facet.plots = FALSE, debug = FALSE, pause = FALSE, Profiling = FALSE, @@ -329,13 +324,15 @@ control=list(trace = TRUE, parallel_qsub = TRUE, send_email = NULL, keepNC = TRUE, - run_parallel = TRUE) + run_parallel = TRUE, + MCMC.args = NULL) ``` #### **obs.mean and obs.cov Description** -The observed mean and cov needs to be formatted as list of different dates with observations. For each element of this list also there needs to be a list with mean and cov matrices of different sites named by their siteid. In case that zero variance was estimated for a variable inside the obs.cov, the SDA code will automatically replace that with half of the minimum variance from other non-zero variables in that time step. +The observations are required for passing the time points into the SDA workflow, which should match the start and end date in the settings object. For the generations of obs.mean and obs.cov, please use the function `SDA_OBS_Assembler` inside the `assim.sequential` package. For the unconstrained runs, please specify the `free.run` flag as TRUE inside the `settings$state.data.assimilation$Obs_Prep` section. Otherwise, please specify the arguments that are needed for preparations of different observations (note that, the observation preparation functions currently only support `MODIS LAI`, `Landtrendr AGB`, `SMAP soil moisture`, and `SoilGrid soil organic carbon`). For the details about how to setup those arguments, please reference the `Create_Multi_settings.R` script inside `~/pecan/modules/assim.sequential/inst/MultiSite-Exs/SDA` directory. +The observed mean and covariance need to be formatted as list of different dates with observations. For each element of this list also there needs to be a list with mean and cov matrices of different sites named by their siteid. In case that zero variance was estimated for a variable inside the obs.cov, the SDA code will automatically replace that with half of the minimum variance from other non-zero variables in that time step. -This would look like something like this: +Here are examples of the `obs.mean` and `obs.cov` for single time point, two sites, and two observations. ``` > obs.mean @@ -364,32 +361,36 @@ $`2010/12/31`$`1000000651` [2,] 0.1213583 0.001162113 ``` #### Anlysis SDA workflow -Before running the SDA analysis functions, the ensemble forecast results have to be generated, and arguments such as H matrix, MCMC arguments, and multi-site Y and R (by `Construct.R` function) have to be generated as well. If you want to proceed the block-based SDA workflow for a free run (estimate the process error to the free run), you need to first specify the `state.data.assimilation` as follows. Beyond that, if any of your sites has zero observation, you should flag the `by.site` as TRUE. -``` - - TRUE - TRUE - -``` - -Here are the workflows for three types of SDA analysis functions that we are currently used. +Before running the SDA analysis functions, the ensemble forecast results have to be generated, and arguments such as H matrix, MCMC arguments, and multi-site Y and R (by `Construct.R` function) have to be generated as well. Here are the workflows for three types of SDA analysis functions that we are currently used. -* Decide which analysis function to be used. Here we have three options: 1) traditional ensemble Kalman Filter (`EnKF.MultiSite`) with analytical solution; 2) generalized ensemble Kalman Filter (`GEF.MultiSite`); and 3) block-based generalized ensemble Kalman Filter (`analysis_sda_block`). The latter two methods support the process variance across space and time. To choose the analysis method 1, we need to set the `process.variance` as FALSE. Otherwise, if we set the `process.variance` as TRUE and provide the `q.type` as either `SINGLE` or `SITE` the 2 method will be used, and if we provide the `q.type` as either `vector` or `wishart` the 3 method will be used. +* Decide which analysis function to be used. Here we have three options: 1) traditional ensemble Kalman Filter (`EnKF.MultiSite`) with analytical solution, within which the process error needs to be prescribed from outside (see `Q` arguments in the `sda.enkf.multisite` function); 2) generalized ensemble Kalman Filter (`GEF.MultiSite`); and 3) block-based generalized ensemble Kalman Filter (`analysis_sda_block`). The latter two methods support the feature of propagating process variance across space and time. To choose the analysis method 1, we need to set the `process.variance` as FALSE. Otherwise, if we set the `process.variance` as TRUE and provide the `q.type` as either `SINGLE` or `SITE` the method `GEF.MultiSite` will be used, and if we provide the `q.type` as either `vector` or `wishart` the method `analysis_sda_block` will be used. The explanations for different Q types can be found in the `The Generalized Ensemble Filter` section in this documentation. For the `analysis_sda_block` method, there is also a special case for complete or partial missing of observations. * If we decide to use `EnKF.MultiSite`, then the analysis results will be calculated based on equations. * If we decide to use `GEF.MultiSite`, then it will first do censoring process based on how you setup the `censored.data` flag within settings xml file. Then, if t equals to 1, we will first initialize the `aqq`, `bqq`, `aq`, and `bq` based on how you setup the `q.type` argument within settings xml file. After preparing the initial conditions (ICs), data, and constants for the `GEF.MultiSite.Nimble` nimble model, the MCMC sampling will happen afterwards. Finally, the process variance and the analysis results will be calculated and updated and returned to the `sda.enkf.multisite` function. -*If we decide to use `analysis_sda_block`, then if t equals 1, we need to initialize the overall block-based MCMC lists (`block.list.all`). After that, we need to specify the `MCMC_args` either from the `control` list or by default (see below). Next, within the `analysis_sda_block` function, it will first use the `build.block.xy` function to initialize the block lists based on each site or underlying networks between sites and then split long vectors or large matrices (`mu.f`, `y.censored`, or `Pf`) into small blocks and fill in the data (except for process variance) and constants for each block. Then, we will update the process error using the `update_q` function. If t equals 1, it will initialize the arguments for the process error. Otherwise, it will grab the previously updated process error. After that, in the `MCMC_Init` function, it will create the initial conditions (ICs) for the MCMC sampling based on randomly sampled data. Then, the completed block-based arguments (`block.list.all`) will be used by the `MCMC_block_function` function under the parallel mode. Finally, the block-based results will be converted into long vectors and large matrices by the `block.2.vector` function and values such as `block.list.all`, `mu.f`, `mu.a`, `Pf`, `Pa` will be returned to the `sda.enkf.multisite` function. +* If we decide to use `analysis_sda_block` method, then if t equals 1, the workflow will first build blocks using the `matrix_network` function for the calculations for indexes of variables by networks of site groups based on the spatial scale (see `scalef` argument in the `Example of multi-settings pecan xml file` section) that we specify inside the `state.data.assimilation` section. Then, the workflow will execute the `build.block.xy` to automatically initialize the overall block-based MCMC lists (`block.list.all`) and fill in datasets (`mu.f`, `y.censored`, or `Pf`) for each block and for all time points to facilitate the process of passing arguments between scripts/functions. After that, the `MCMC_args` (see the explanations inside the roxygen structure of the `sda.enkf.multisite` function) will be specified either from the `control` list or by default (see below). Then, the workflow will update the process error using the `update_q` function. If t equals 1, it will initialize the arguments for the process error. Otherwise, it will grab the previously updated process error. After that, in the `MCMC_Init` function, it will create the initial conditions (ICs) for the MCMC sampling based on randomly sampled data. Then, the completed block-based arguments (`block.list.all`) will be used by the `MCMC_block_function` function under the parallel mode. Finally, the block-based results will be converted into long vectors and large matrices by the `block.2.vector` function and values such as `block.list.all`, `mu.f`, `mu.a`, `Pf`, `Pa` will be returned to the `sda.enkf.multisite` function. + ``` MCMC.args <- list(niter = 1e5, nthin = 10, - nchain = 1, + nchain = 3, nburnin = 1e4) ``` +* There is a special case for the `analysis_sda_block` method where NA values appear in the observations, which provides the opportunity for the estimations of process error without any observations. This special case currently is only working under restricted conditions when we set the `scalef` as 0 (that feature currently only works for isolated-site SDA runs) and turn on the `free.run` flag in the settings, which will then automatically fill in NA values to the observations by each site (see bellow). + +``` + + 0 + TRUE + +``` + #### Example of multi-settings pecan xml file +Here is an example of what does a multi-settings pecan xml file look like. The detailed explanation for the xml file can be found under the `Multi-Settings` section in the `03_pecan_xml.Rmd` documentation. + ``` @@ -405,73 +406,72 @@ MCMC.args <- list(niter = 1e5, sipnet.out SINGLE FALSE - FALSE Local.support 1 5 - - AbvGrndWood - MgC/ha - 0 - 9999 - - - LAI - - 0 - 9999 - - - SoilMoistFrac - - 0 - 100 - - - TotSoilCarb - kg/m^2 - 0 - 9999 - + + AbvGrndWood + MgC/ha + 0 + 9999 + + + LAI + + 0 + 9999 + + + SoilMoistFrac + + 0 + 100 + + + TotSoilCarb + kg/m^2 + 0 + 9999 + - - /projectnb/dietzelab/dongchen/Multi-site/download_500_sites/AGB - TRUE - TRUE - - year - 1 - - - - 30 - TRUE - TRUE - - year - 1 - - - - 30 - TRUE - FALSE - - year - 1 - - - - - year - 1 - - - /projectnb/dietzelab/dongchen/All_NEON_SDA/test_OBS - 2012-07-15 - 2021-07-15 + + /projectnb/dietzelab/dongchen/Multi-site/download_500_sites/AGB + TRUE + TRUE + + year + 1 + + + + 30 + TRUE + TRUE + + year + 1 + + + + 30 + TRUE + FALSE + + year + 1 + + + + + year + 1 + + + /projectnb/dietzelab/dongchen/All_NEON_SDA/test_OBS + 2012-07-15 + 2021-07-15 2004/01/01 From 778f9cae08cbeeb85284c761459a008381e1f9df Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 14 Nov 2023 16:00:16 -0500 Subject: [PATCH 52/68] Update documentations. --- book_source/03_topical_pages/03_pecan_xml.Rmd | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/book_source/03_topical_pages/03_pecan_xml.Rmd b/book_source/03_topical_pages/03_pecan_xml.Rmd index 8f477a48fdc..30eb369ff51 100644 --- a/book_source/03_topical_pages/03_pecan_xml.Rmd +++ b/book_source/03_topical_pages/03_pecan_xml.Rmd @@ -760,7 +760,6 @@ The following tags can be used for state data assimilation. More detailed inform sipnet.out SINGLE FALSE - FALSE Local.support 1 5 @@ -839,8 +838,8 @@ The following tags can be used for state data assimilation. More detailed inform ``` * **process.variance** : [optional] TRUE/FLASE flag for if process variance should be estimated (TRUE) or not (FALSE). If TRUE, a generalized ensemble filter will be used. If FALSE, an ensemble Kalman filter will be used. Default is FALSE. -* **aqq.Init** : [optional] The initial value of aqq used for estimate the Q distribution, the default value is 1. -* **bqq.Init** : [optional] The initial value of bqq used for estimate the Q distribution, the default value is 1. +* **aqq.Init** : [optional] The initial value of `aqq` used for estimate the Q distribution, the default value is 1 (note that, the `aqq.init` and `bqq.init` right now only work on the `VECTOR` q type, and we didn't account for the variabilities of them across sites or variables, meaning we initialize the `aqq` and `bqq` given single value). +* **bqq.Init** : [optional] The initial value of `bqq` used for estimate the Q distribution, the default value is 1. * **sample.parameters** : [optional] TRUE/FLASE flag for if parameters should be sampled for each ensemble member or not. This allows for more spread in the intial conditions of the forecast. * **adjustment** : [optional] Bool variable decide if you want to adjust analysis results by the likelihood. * **censored.data** : [optional] Bool variable decide if you want to do MCMC sampling for the forecast ensemble space, the default is FALSE. @@ -849,7 +848,6 @@ The following tags can be used for state data assimilation. More detailed inform * **NC.Prefix** : [optional] The prefix for the generation of the full-year netcdf file, the default is sipnet.out. * **q.type** : [optional] The type of process variance that will be estimated, the default is SINGLE. * **free.run** : [optional] If it's a free run without any observations, the default is FALSE. -* **by.site** : [optional] The flag, that is exclusively used by the `analysis_sda_block` function, decide if we want to consider the underlying networks between sites. The default is FALSE. * **Localization.FUN** : [optional] The localization function name for the localization operation, the default is Local.support. * **scalef** : [optional] The scale parameter used for the localization operation, the smaller the value is, the sites are more isolated. * **chains** : [optional] The number of chains needed to be estimated during the MCMC sampling process. From 729873eeac8e1f19c2a7a93acc0515a1e4a3ac97 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 14 Nov 2023 16:00:41 -0500 Subject: [PATCH 53/68] Upgrade the detection of the boundary case for wood carbon propagation. --- models/sipnet/R/write.configs.SIPNET.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/sipnet/R/write.configs.SIPNET.R b/models/sipnet/R/write.configs.SIPNET.R index b1bb635b9d4..f26cb01cad7 100755 --- a/models/sipnet/R/write.configs.SIPNET.R +++ b/models/sipnet/R/write.configs.SIPNET.R @@ -442,7 +442,7 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs if (all(plant_wood_vars %in% ic.names)) { # reconstruct total wood C if(IC$abvGrndWoodFrac < 0.05){ - wood_total_C <- 0 + wood_total_C <- IC$AbvGrndWood }else{ wood_total_C <- IC$AbvGrndWood / IC$abvGrndWoodFrac } From d39fbde22c94c454b9154e2703651f33a94255e0 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 14 Nov 2023 16:01:18 -0500 Subject: [PATCH 54/68] Update the documentation of parameters and functions per Mike's request. --- .../assim.sequential/R/Analysis_sda_block.R | 69 +++++++++++-------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/modules/assim.sequential/R/Analysis_sda_block.R b/modules/assim.sequential/R/Analysis_sda_block.R index 9b16849eae7..69ccbfeb79f 100644 --- a/modules/assim.sequential/R/Analysis_sda_block.R +++ b/modules/assim.sequential/R/Analysis_sda_block.R @@ -3,14 +3,14 @@ ##' @author Dongchen Zhang ##' ##' @param settings pecan standard multi-site settings list. -##' @param block.list.all List contains nt empty sub-elements. -##' @param X A matrix contains ensemble forecasts. -##' @param obs.mean List of dataframe of observation means, named with observation datetime. -##' @param obs.cov List of covariance matrices of state variables , named with observation datetime. -##' @param t time point. -##' @param nt total length of time steps. -##' @param MCMC.args arguments for the MCMC sampling. -##' @param block.list.all.pre pre-existed block.list.all object for passing the aqq and bqq to the current SDA run, the default is NULL. +##' @param block.list.all Lists of forecast and analysis outputs for each time point of each block. If t=1, we initialize those outputs of each block with NULL from the `sda.enkf.multisite` function. +##' @param X A matrix contains ensemble forecasts with the dimensions of `[ensemble number, site number * number of state variables]`. The columns are matched with the site.ids and state variable names of the inside the `FORECAST` object in the `sda.enkf.multisite` script. +##' @param obs.mean Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation means for each state variables of each site for each time point. +##' @param obs.cov Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation covariances for all state variables of each site for each time point. +##' @param t time point in format of YYYY-MM-DD. +##' @param nt total length of time steps, corresponding to the `nt` variable in the `sda.enkf.multisite` function. +##' @param MCMC.args arguments for the MCMC sampling, details can be found in the roxygen strucutre for control list in the `sda.enkf.multisite` function. +##' @param block.list.all.pre pre-existed block.list.all object for passing the aqq and bqq to the current SDA run, the default is NULL. Details can be found in the roxygen structure for `pre_enkf_params` of the `sda.enkf.multisite` function ##' @details This function will add data and constants into each block that are needed for the MCMC sampling. ##' ##' @description This function provides the block-based MCMC sampling approach. @@ -25,7 +25,7 @@ analysis_sda_block <- function (settings, block.list.all, X, obs.mean, obs.cov, obs.mean = obs.mean, obs.cov = obs.cov, t = t)))) { - PEcAn.logger::logger.error("Something wrong within the build.block.xy function.") + PEcAn.logger::logger.severe("Something wrong within the build.block.xy function.") return(0) } #grab block.list and H from the results. @@ -39,13 +39,13 @@ analysis_sda_block <- function (settings, block.list.all, X, obs.mean, obs.cov, bqq.Init = as.numeric(settings$state.data.assimilation$bqq.Init), MCMC_dat = NULL, block.list.all.pre)))) { - PEcAn.logger::logger.error("Something wrong within the update_q function.") + PEcAn.logger::logger.severe("Something wrong within the update_q function.") return(0) } - #add initial conditions. + #add initial conditions for the MCMC sampling. if ("try-error" %in% class(try(block.list.all[[t]] <- MCMC_Init(block.list.all[[t]], X)))) { - PEcAn.logger::logger.error("Something wrong within the MCMC_Init function.") + PEcAn.logger::logger.severe("Something wrong within the MCMC_Init function.") return(0) } @@ -59,14 +59,14 @@ analysis_sda_block <- function (settings, block.list.all, X, obs.mean, obs.cov, #parallel for loop over each block. PEcAn.logger::logger.info(paste0("Running MCMC ", "for ", length(block.list.all[[t]]), " blocks")) if ("try-error" %in% class(try(block.list.all[[t]] <- furrr::future_map(block.list.all[[t]], MCMC_block_function, .progress = T)))) { - PEcAn.logger::logger.error("Something wrong within the MCMC_block_function function.") + PEcAn.logger::logger.severe("Something wrong within the MCMC_block_function function.") return(0) } PEcAn.logger::logger.info("Completed!") #convert from block lists to vector values. if ("try-error" %in% class(try(V <- block.2.vector(block.list.all[[t]], X, H)))) { - PEcAn.logger::logger.error("Something wrong within the block.2.vector function.") + PEcAn.logger::logger.severe("Something wrong within the block.2.vector function.") return(0) } @@ -98,9 +98,9 @@ analysis_sda_block <- function (settings, block.list.all, X, obs.mean, obs.cov, build.block.xy <- function(settings, block.list.all, X, obs.mean, obs.cov, t) { #set q.type from settings. if (settings$state.data.assimilation$q.type == "vector") { - q.type <- 1 + q.type <- 3 } else if (settings$state.data.assimilation$q.type == "wishart") { - q.type <- 2 + q.type <- 4 } #grab basic arguments based on X. @@ -108,19 +108,22 @@ build.block.xy <- function(settings, block.list.all, X, obs.mean, obs.cov, t) { var.names <- unique(attributes(X)$dimnames[[2]]) mu.f <- colMeans(X) Pf <- stats::cov(X) - diag(Pf)[which(diag(Pf)==0)] <- min(diag(Pf)[which(diag(Pf) != 0)])/5 #fixing det(Pf)==0 + if (length(diag(Pf)[which(diag(Pf)==0)]) > 0) { + diag(Pf)[which(diag(Pf)==0)] <- min(diag(Pf)[which(diag(Pf) != 0)])/5 #fixing det(Pf)==0 + PEcAn.logger::logger.warn("The zero variances in Pf is being replaced by one fifth of the minimum variance in those matrices respectively.") + } #distance calculations and localization + site.locs <- settings$run %>% + purrr::map('site') %>% + purrr::map_dfr(~c(.x[['lon']],.x[['lat']]) %>% as.numeric)%>% + t %>% + `colnames<-`(c("Lon","Lat")) %>% + `rownames<-`(site.ids) + #Finding the distance between the sites + dis.matrix <- sp::spDists(site.locs, longlat = TRUE) if (!is.null(settings$state.data.assimilation$Localization.FUN)) { Localization.FUN <- get(settings$state.data.assimilation$Localization.FUN) - site.locs <- settings$run %>% - purrr::map('site') %>% - purrr::map_dfr(~c(.x[['lon']],.x[['lat']]) %>% as.numeric)%>% - t %>% - `colnames<-`(c("Lon","Lat")) %>% - `rownames<-`(site.ids) - #Finding the distance between the sites - dis.matrix <- sp::spDists(site.locs, longlat = TRUE) #turn that into a blocked matrix format blocked.dis <- block_matrix(dis.matrix %>% as.numeric(), rep(length(var.names), length(site.ids))) Pf <- Localization.FUN(Pf, blocked.dis, settings$state.data.assimilation$scalef %>% as.numeric()) @@ -134,13 +137,18 @@ build.block.xy <- function(settings, block.list.all, X, obs.mean, obs.cov, t) { H <- list(ind = seq_along(rep(var.names, length(site.ids)))) Y <- rep(NA, length(H$ind)) R <- diag(1, length(H$ind)) + } else if (!as.logical(settings$state.data.assimilation$free.run) && all(is.null(unlist(obs.mean[[t]])))) { + PEcAn.logger::logger.error("Please set the settings$state.data.assimilation$free.run as TRUE if you don't have any observations!") + return(0) } else { Obs.cons <- Construct.R(site.ids, var.names, obs.mean[[t]], obs.cov[[t]]) Y <- Obs.cons$Y R <- Obs.cons$R if (length(Y) > 1) { - PEcAn.logger::logger.info("The zero variances in R and Pf is being replaced by half and one fifth of the minimum variance in those matrices respectively.") - diag(R)[which(diag(R)==0)] <- min(diag(R)[which(diag(R) != 0)])/2 + if (length(diag(R)[which(diag(R)==0)]) > 0) { + diag(R)[which(diag(R)==0)] <- min(diag(R)[which(diag(R) != 0)])/2 + PEcAn.logger::logger.warn("The zero variances in R is being replaced by half of the minimum variance in those matrices respectively.") + } } #create matrix the describes the support for each observed state variable at time t min_max <- settings$state.data.assimilation$state.variables %>% @@ -172,7 +180,7 @@ build.block.xy <- function(settings, block.list.all, X, obs.mean, obs.cov, t) { #start the blocking process #should we consider interactions between sites? - if(as.logical(settings$state.data.assimilation$by.site)){ + if(as.numeric(settings$state.data.assimilation$scalef) == 0){ block.list <- vector("list", length(site.ids)) #loop over sites for (i in seq_along(site.ids)) { @@ -219,7 +227,7 @@ build.block.xy <- function(settings, block.list.all, X, obs.mean, obs.cov, t) { purrr::map(function(l){length(l)}) %>% unlist %>% sum() != length(site.ids)) { - PEcAn.logger::logger.error("Block calculation failed, please check the matrix_network function!") + PEcAn.logger::logger.severe("Block calculation failed, please check the matrix_network function!") return(0) } block.list <- vector("list", length(block.vec)) @@ -318,7 +326,8 @@ MCMC_Init <- function (block.list, X) { ##' @return It returns the `block` object with analysis results filled in. MCMC_block_function <- function(block) { #build nimble model - model_pred <- nimble::nimbleModel(GEF.Block.Nimble, + #TODO: harmonize the MCMC code between block-based and general analysis functions to reduce the complexity of code. + model_pred <- nimble::nimbleModel(GEF.MultiSite.Nimble, data = block$data, inits = block$Inits, constants = block$constant, From 4a37538feefee97216d2a70444fcd659c6ce2e01 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 14 Nov 2023 16:01:25 -0500 Subject: [PATCH 55/68] Typo --- modules/assim.sequential/R/SDA_OBS_Assembler.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/assim.sequential/R/SDA_OBS_Assembler.R b/modules/assim.sequential/R/SDA_OBS_Assembler.R index 9776cb33ee6..e9e778e9af8 100644 --- a/modules/assim.sequential/R/SDA_OBS_Assembler.R +++ b/modules/assim.sequential/R/SDA_OBS_Assembler.R @@ -21,7 +21,7 @@ SDA_OBS_Assembler <- function(settings){ #check if we want to proceed the free run without any observations. if (as.logical(settings$state.data.assimilation$free.run)) { - #calcualte time points. + #calculate time points. time_points <- obs_timestep2timepoint(Obs_Prep$start.date, Obs_Prep$end.date, Obs_Prep$timestep) #generate obs.mean and obs.cov with NULL filled. From 50b61ecf2c76afd6c7dbe39baee876aef8387b1d Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 14 Nov 2023 16:01:43 -0500 Subject: [PATCH 56/68] Update the documentation of the roxygen structure. --- .../assim.sequential/R/sda.enkf_MultiSite.R | 39 ++++++++++++------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index 4c8e29aafb2..569aea85880 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -3,14 +3,25 @@ #' @author Michael Dietze, Ann Raiho and Alexis Helgeson \email{dietze@@bu.edu} #' #' @param settings PEcAn settings object -#' @param obs.mean List of dataframe of observation means, named with observation datetime. -#' @param obs.cov List of covariance matrices of state variables , named with observation datetime. +#' @param obs.mean Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation means for each state variables of each site for each time point. +#' @param obs.cov Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation covariances for all state variables of each site for each time point. #' @param Q Process covariance matrix given if there is no data to estimate it. #' @param restart Used for iterative updating previous forecasts. Default NULL. List object includes file path to previous runs and start date for SDA. -#' @param pre_enkf_params Used for carrying out SDA with pre-existed enkf.params, in which the Pf, aqq, and bqq can be used for the analysis step. +#' @param pre_enkf_params Used for passing pre-existing time-series of process error into the current SDA runs to ignore the impact by the differences between process errors. #' @param ensemble.samples Pass ensemble.samples from outside to avoid GitHub check issues. -#' @param control List of flags controlling the behaviour of the SDA. trace for reporting back the SDA outcomes, interactivePlot for plotting the outcomes after each step, -#' TimeseriesPlot for post analysis examination, BiasPlot for plotting the correlation between state variables, plot.title is the title of post analysis plots and debug mode allows for pausing the code and examining the variables inside the function. +#' @param control List of flags controlling the behavior of the SDA. +#' `trace` for reporting back the SDA outcomes; +#' `TimeseriesPlot` for post analysis examination; +#' `debug` decide if we want to pause the code and examining the variables inside the function; +#' `pause` decide if we want to pause the SDA workflow at current time point t; +#' `Profiling` decide if we want to export the temporal SDA outputs in CSV file; +#' `OutlierDetection` decide if we want to execute the outlier detection each time after the model forecasting; +#' `parallel_qsub` decide if we want to execute the `qsub` job submission under parallel mode; +#' `send_email` contains lists for sending email to report the SDA progress; +#' `keepNC` decide if we want to keep the NetCDF files inside the out directory; +#' `forceRun` decide if we want to proceed the Bayesian MCMC sampling without observations; +#' `run_parallel` decide if we want to run the SDA under parallel mode for the `future_map` function; +#' `MCMC.args` include lists for controling the MCMC sampling process (iteration, nchains, burnin, and nthin.). #' #’ @details #’ Restart mode: Basic idea is that during a restart (primary case envisioned as an iterative forecast), a new workflow folder is created and the previous forecast for the start_time is copied over. During restart the initial run before the loop is skipped, with the info being populated from the previous run. The function then dives right into the first Analysis, then continues on like normal. @@ -29,12 +40,7 @@ sda.enkf.multisite <- function(settings, pre_enkf_params = NULL, ensemble.samples = NULL, control=list(trace = TRUE, - FF = FALSE, - interactivePlot = FALSE, TimeseriesPlot = FALSE, - BiasPlot = FALSE, - plot.title = NULL, - facet.plots = FALSE, debug = FALSE, pause = FALSE, Profiling = FALSE, @@ -43,7 +49,8 @@ sda.enkf.multisite <- function(settings, send_email = NULL, keepNC = TRUE, forceRun = TRUE, - run_parallel = TRUE), + run_parallel = TRUE, + MCMC.args = NULL), ...) { #add if/else for when restart points to folder instead if T/F set restart as T if(is.list(restart)){ @@ -355,7 +362,7 @@ sda.enkf.multisite <- function(settings, ) }) ###------------------------------------------------------------------------------------------------### - ### w over time ### + ### loop over time ### ###------------------------------------------------------------------------------------------------### for(t in 1:nt){ obs.t <- as.character(lubridate::date(obs.times[t])) @@ -488,7 +495,7 @@ sda.enkf.multisite <- function(settings, ### preparing OBS ### ###-------------------------------------------------------------------###---- #To trigger the analysis function with free run, you need to first specify the control$forceRun as TRUE, - #Then specify the settings$state.data.assimilation$by.site as TRUE, and finally + #Then specify the settings$state.data.assimilation$scalef as 0, and settings$state.data.assimilation$free.run as TRUE. if (!is.null(obs.mean[[t]][[1]]) | as.logical(settings$state.data.assimilation$free.run) & control$forceRun) { # TODO: as currently configured, Analysis runs even if all obs are NA, # which clearly should be triggering the `else` of this if, but the @@ -509,11 +516,13 @@ sda.enkf.multisite <- function(settings, block.list.all <- obs.mean %>% purrr::map(function(l){NULL}) } #initialize MCMC arguments. - if (!exists("MCMC.args")) { + if (is.null(control$MCMC.args)) { MCMC.args <- list(niter = 1e5, nthin = 10, - nchain = 1, + nchain = 3, nburnin = 5e4) + } else { + MCMC.args <- control$MCMC.args } #running analysis function. enkf.params[[obs.t]] <- analysis_sda_block(settings, block.list.all, X, obs.mean, obs.cov, t, nt, MCMC.args, pre_enkf_params) From a3fc9317e41bedc391e66d79902749dbb80f9d5c Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 14 Nov 2023 16:02:02 -0500 Subject: [PATCH 57/68] Fixes on nc_close and on.exit usage. --- modules/data.land/R/pool_ic_netcdf2list.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/data.land/R/pool_ic_netcdf2list.R b/modules/data.land/R/pool_ic_netcdf2list.R index 995bc28d1bd..871d3132e78 100644 --- a/modules/data.land/R/pool_ic_netcdf2list.R +++ b/modules/data.land/R/pool_ic_netcdf2list.R @@ -20,8 +20,7 @@ pool_ic_netcdf2list <- function(nc.path){ for(varname in names(vals)){ vals[[varname]] <- ncdf4::ncvar_get(IC.nc,varname) } - ncdf4::nc_close(IC.nc) - on.exit() + on.exit(ncdf4::nc_close(IC.nc), add = FALSE) return(list(dims = dims, vals = vals)) } else{ From 0be65acdbd9fb81f1338e1126c47c43e1c37a292 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 14 Nov 2023 16:04:24 -0500 Subject: [PATCH 58/68] Update Rd files. --- base/remote/man/qsub_parallel.Rd | 4 ++- modules/assim.sequential/NAMESPACE | 1 - .../assim.sequential/man/GEF.Block.Nimble.Rd | 16 ----------- .../man/analysis_sda_block.Rd | 16 +++++------ .../man/sda.enkf.multisite.Rd | 28 +++++++++++++------ 5 files changed, 30 insertions(+), 35 deletions(-) delete mode 100644 modules/assim.sequential/man/GEF.Block.Nimble.Rd diff --git a/base/remote/man/qsub_parallel.Rd b/base/remote/man/qsub_parallel.Rd index cbd2c6614d4..a7d2f8bd751 100644 --- a/base/remote/man/qsub_parallel.Rd +++ b/base/remote/man/qsub_parallel.Rd @@ -4,7 +4,7 @@ \alias{qsub_parallel} \title{qsub_parallel} \usage{ -qsub_parallel(settings, files = NULL, prefix = "sipnet.out") +qsub_parallel(settings, files = NULL, prefix = "sipnet.out", sleep = 10) } \arguments{ \item{settings}{pecan settings object} @@ -12,6 +12,8 @@ qsub_parallel(settings, files = NULL, prefix = "sipnet.out") \item{files}{allow submit jobs based on job.sh file paths.} \item{prefix}{used for detecting if jobs are completed or not.} + +\item{sleep}{time (in second) that we wait each time for the jobs to be completed.} } \description{ qsub_parallel diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE index 527ca7afbe5..aa4d926ecb5 100644 --- a/modules/assim.sequential/NAMESPACE +++ b/modules/assim.sequential/NAMESPACE @@ -9,7 +9,6 @@ export(Create_Site_PFT_CSV) export(EnKF) export(EnKF.MultiSite) export(GEF) -export(GEF.Block.Nimble) export(GEF.MultiSite) export(GEF.MultiSite.Nimble) export(GrabFillMatrix) diff --git a/modules/assim.sequential/man/GEF.Block.Nimble.Rd b/modules/assim.sequential/man/GEF.Block.Nimble.Rd deleted file mode 100644 index 2a343a1f361..00000000000 --- a/modules/assim.sequential/man/GEF.Block.Nimble.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Nimble_codes.R -\docType{data} -\name{GEF.Block.Nimble} -\alias{GEF.Block.Nimble} -\title{block-based TWEnF} -\format{ -TBD -} -\usage{ -GEF.Block.Nimble -} -\description{ -block-based TWEnF -} -\keyword{datasets} diff --git a/modules/assim.sequential/man/analysis_sda_block.Rd b/modules/assim.sequential/man/analysis_sda_block.Rd index a95674e3b99..4be6f3ba235 100644 --- a/modules/assim.sequential/man/analysis_sda_block.Rd +++ b/modules/assim.sequential/man/analysis_sda_block.Rd @@ -19,21 +19,21 @@ analysis_sda_block( \arguments{ \item{settings}{pecan standard multi-site settings list.} -\item{block.list.all}{List contains nt empty sub-elements.} +\item{block.list.all}{Lists of forecast and analysis outputs for each time point of each block. If t=1, we initialize those outputs of each block with NULL from the `sda.enkf.multisite` function.} -\item{X}{A matrix contains ensemble forecasts.} +\item{X}{A matrix contains ensemble forecasts with the dimensions of `[ensemble number, site number * number of state variables]`. The columns are matched with the site.ids and state variable names of the inside the `FORECAST` object in the `sda.enkf.multisite` script.} -\item{obs.mean}{List of dataframe of observation means, named with observation datetime.} +\item{obs.mean}{Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation means for each state variables of each site for each time point.} -\item{obs.cov}{List of covariance matrices of state variables , named with observation datetime.} +\item{obs.cov}{Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation covariances for all state variables of each site for each time point.} -\item{t}{time point.} +\item{t}{time point in format of YYYY-MM-DD.} -\item{nt}{total length of time steps.} +\item{nt}{total length of time steps, corresponding to the `nt` variable in the `sda.enkf.multisite` function.} -\item{MCMC.args}{arguments for the MCMC sampling.} +\item{MCMC.args}{arguments for the MCMC sampling, details can be found in the roxygen strucutre for control list in the `sda.enkf.multisite` function.} -\item{block.list.all.pre}{pre-existed block.list.all object for passing the aqq and bqq to the current SDA run, the default is NULL.} +\item{block.list.all.pre}{pre-existed block.list.all object for passing the aqq and bqq to the current SDA run, the default is NULL. Details can be found in the roxygen structure for `pre_enkf_params` of the `sda.enkf.multisite` function} } \value{ It returns the `build.block.xy` object and the analysis results. diff --git a/modules/assim.sequential/man/sda.enkf.multisite.Rd b/modules/assim.sequential/man/sda.enkf.multisite.Rd index 62a06a1821d..9c3560b3e89 100644 --- a/modules/assim.sequential/man/sda.enkf.multisite.Rd +++ b/modules/assim.sequential/man/sda.enkf.multisite.Rd @@ -12,30 +12,40 @@ sda.enkf.multisite( restart = NULL, pre_enkf_params = NULL, ensemble.samples = NULL, - control = list(trace = TRUE, FF = FALSE, interactivePlot = FALSE, TimeseriesPlot = - FALSE, BiasPlot = FALSE, plot.title = NULL, facet.plots = FALSE, debug = FALSE, pause - = FALSE, Profiling = FALSE, OutlierDetection = FALSE, parallel_qsub = TRUE, - send_email = NULL, keepNC = TRUE, forceRun = TRUE, run_parallel = TRUE), + control = list(trace = TRUE, TimeseriesPlot = FALSE, debug = FALSE, pause = FALSE, + Profiling = FALSE, OutlierDetection = FALSE, parallel_qsub = TRUE, send_email = NULL, + keepNC = TRUE, forceRun = TRUE, run_parallel = TRUE, MCMC.args = NULL), ... ) } \arguments{ \item{settings}{PEcAn settings object} -\item{obs.mean}{List of dataframe of observation means, named with observation datetime.} +\item{obs.mean}{Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation means for each state variables of each site for each time point.} -\item{obs.cov}{List of covariance matrices of state variables , named with observation datetime.} +\item{obs.cov}{Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation covariances for all state variables of each site for each time point.} \item{Q}{Process covariance matrix given if there is no data to estimate it.} \item{restart}{Used for iterative updating previous forecasts. Default NULL. List object includes file path to previous runs and start date for SDA.} -\item{pre_enkf_params}{Used for carrying out SDA with pre-existed enkf.params, in which the Pf, aqq, and bqq can be used for the analysis step.} +\item{pre_enkf_params}{Used for passing pre-existing time-series of process error into the current SDA runs to ignore the impact by the differences between process errors.} \item{ensemble.samples}{Pass ensemble.samples from outside to avoid GitHub check issues.} -\item{control}{List of flags controlling the behaviour of the SDA. trace for reporting back the SDA outcomes, interactivePlot for plotting the outcomes after each step, -TimeseriesPlot for post analysis examination, BiasPlot for plotting the correlation between state variables, plot.title is the title of post analysis plots and debug mode allows for pausing the code and examining the variables inside the function.} +\item{control}{List of flags controlling the behavior of the SDA. +`trace` for reporting back the SDA outcomes; +`TimeseriesPlot` for post analysis examination; +`debug` decide if we want to pause the code and examining the variables inside the function; +`pause` decide if we want to pause the SDA workflow at current time point t; +`Profiling` decide if we want to export the temporal SDA outputs in CSV file; +`OutlierDetection` decide if we want to execute the outlier detection each time after the model forecasting; +`parallel_qsub` decide if we want to execute the `qsub` job submission under parallel mode; +`send_email` contains lists for sending email to report the SDA progress; +`keepNC` decide if we want to keep the NetCDF files inside the out directory; +`forceRun` decide if we want to proceed the Bayesian MCMC sampling without observations; +`run_parallel` decide if we want to run the SDA under parallel mode for the `future_map` function; +`MCMC.args` include lists for controling the MCMC sampling process (iteration, nchains, burnin, and nthin.).} } \value{ NONE From f4874f2b362daa804bbee52183a42085944439e3 Mon Sep 17 00:00:00 2001 From: Michael Dietze Date: Tue, 21 Nov 2023 16:07:40 -0500 Subject: [PATCH 59/68] Update Actions to R4.2 Swaps the outdated R4.0 build to R4.2 to see if that version builds successfully -- was overdue and might get the GH Actions working again. --- .github/workflows/ci.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3c27cf84d80..fbbc9564a3e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,7 +38,7 @@ jobs: fail-fast: false matrix: R: - - "4.0" + - "4.2" - "4.1" services: @@ -90,7 +90,7 @@ jobs: fail-fast: false matrix: R: - - "4.0" + - "4.2" - "4.1" env: @@ -147,7 +147,7 @@ jobs: fail-fast: false matrix: R: - - "4.0" + - "4.2" - "4.1" env: @@ -204,7 +204,7 @@ jobs: fail-fast: false matrix: R: - - "4.0" + - "4.2" - "4.1" env: @@ -266,7 +266,7 @@ jobs: fail-fast: false matrix: R: - - "4.0" + - "4.2" - "4.1" services: From 954a39be64cec007a6907c8d9d93d663fb84b4c2 Mon Sep 17 00:00:00 2001 From: Chris Black Date: Tue, 21 Nov 2023 19:41:18 -0800 Subject: [PATCH 60/68] start building depends for R 4.2 and 4.3 --- .github/workflows/depends.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/depends.yml b/.github/workflows/depends.yml index 4aabdddc531..2672e4a2a28 100644 --- a/.github/workflows/depends.yml +++ b/.github/workflows/depends.yml @@ -29,6 +29,8 @@ jobs: R: - "4.0" - "4.1" + - "4.2" + - "4.3" - "devel" steps: From 2e8a4503fd71c734bbf45d8874c0e55045532e24 Mon Sep 17 00:00:00 2001 From: Chris Black Date: Tue, 21 Nov 2023 20:25:45 -0800 Subject: [PATCH 61/68] add `patch`, needed to compile hdf5fr on R-devel --- docker/depends/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/depends/Dockerfile b/docker/depends/Dockerfile index 02905bbf433..0c6f89186fe 100644 --- a/docker/depends/Dockerfile +++ b/docker/depends/Dockerfile @@ -26,6 +26,7 @@ RUN apt-get update \ jags \ time \ openssh-client \ + patch \ rsync \ libgdal-dev \ libglpk-dev \ From 9213add94ce5a55c46c15b43cec2090df30915c4 Mon Sep 17 00:00:00 2001 From: istfer Date: Wed, 29 Nov 2023 09:54:05 +0200 Subject: [PATCH 62/68] reverting back lines that read rabbitmq.out --- base/workflow/R/start_model_runs.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/workflow/R/start_model_runs.R b/base/workflow/R/start_model_runs.R index 9aabc8dd0e1..6350dbd5934 100644 --- a/base/workflow/R/start_model_runs.R +++ b/base/workflow/R/start_model_runs.R @@ -292,7 +292,7 @@ start_model_runs <- function(settings, write = TRUE, stop.on.error = TRUE) { job_finished <- FALSE if (is_rabbitmq) { job_finished <- - file.exists(file.path(settings$modeloutdir, run, "rabbitmq.out")) + file.exists(file.path(jobids[run], "rabbitmq.out")) } else if (is_qsub) { job_finished <- PEcAn.remote::qsub_run_finished( run = jobids[run], @@ -304,7 +304,7 @@ start_model_runs <- function(settings, write = TRUE, stop.on.error = TRUE) { # TODO check output log if (is_rabbitmq) { - data <- readLines(file.path(settings$modeloutdir, run, "rabbitmq.out")) + data <- readLines(file.path(jobids[run], "rabbitmq.out")) if (data[-1] == "ERROR") { msg <- paste("Run", run, "has an ERROR executing") if (stop.on.error) { From 2e77c80a5be2664860aa022f9d681b71e3f8d540 Mon Sep 17 00:00:00 2001 From: Chris Black Date: Fri, 1 Dec 2023 16:06:13 -0800 Subject: [PATCH 63/68] Makefile: install with remotes::install_local instead of devtools::install devtools::install insists in checking for updates to all dependencies every call, even when `upgrade = FALSE`. Across all packages this adds up to a lot of repeated checks, and in combination with our separate dependency check it's enough to exceed GitHub APi request rate limits during CI builds. remotes::install_local appears to not check dependencies at all when dependencies=FALSE, which is what we want for this case. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a441991063c..cda4ad6f72b 100644 --- a/Makefile +++ b/Makefile @@ -74,7 +74,7 @@ depends_R_pkg = ./scripts/time.sh "depends ${1}" ./scripts/confirm_deps.R ${1} \ $(if $(findstring modules/benchmark,$(1)),NA,TRUE) install_R_pkg = ./scripts/time.sh "install ${1}" Rscript \ -e ${SETROPTIONS} \ - -e "devtools::install('$(strip $(1))', upgrade=FALSE)" + -e "remotes::install_local('$(strip $(1))', force=TRUE, dependencies=FALSE, upgrade=FALSE)" check_R_pkg = ./scripts/time.sh "check ${1}" Rscript scripts/check_with_errors.R $(strip $(1)) test_R_pkg = ./scripts/time.sh "test ${1}" Rscript \ -e "devtools::test('$(strip $(1))'," \ From f049dcaa03fefd5d353049a0d70672e443bbb567 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Sat, 9 Dec 2023 18:58:29 +0530 Subject: [PATCH 64/68] Update book.yml --- .github/workflows/book.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/book.yml b/.github/workflows/book.yml index bd4df36be03..eb063443d36 100644 --- a/.github/workflows/book.yml +++ b/.github/workflows/book.yml @@ -52,7 +52,7 @@ jobs: path: book_source/_book/ # download documentation repo - name: Checkout documentation repo - if: github.event_name != 'pull_request' + if: github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/master' || github.ref == 'refs/heads/develop') uses: actions/checkout@v3 with: repository: ${{ github.repository_owner }}/pecan-documentation @@ -60,11 +60,11 @@ jobs: token: ${{ secrets.GH_PAT }} # upload new documentation - name: publish to github - if: github.event_name != 'pull_request' + if: github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/master' || github.ref == 'refs/heads/develop') run: | git config --global user.email "pecanproj@gmail.com" git config --global user.name "GitHub Documentation Robot" - export VERSION=${GITHUB_REF##*/} + export VERSION=$(echo $GITHUB_REF | sed 's,.*/,,' ) cd pecan-documentation mkdir -p $VERSION rsync -a --delete ../book_source/_book/ ${VERSION}/ From 5e101bd44dbe0ae749089d29dc30abcaf89a4b70 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Tue, 12 Dec 2023 00:36:02 +0530 Subject: [PATCH 65/68] Update .github/workflows/book.yml Co-authored-by: Rob Kooper --- .github/workflows/book.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/book.yml b/.github/workflows/book.yml index eb063443d36..97309619060 100644 --- a/.github/workflows/book.yml +++ b/.github/workflows/book.yml @@ -52,7 +52,7 @@ jobs: path: book_source/_book/ # download documentation repo - name: Checkout documentation repo - if: github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/master' || github.ref == 'refs/heads/develop') + if: github.event_name == 'push' uses: actions/checkout@v3 with: repository: ${{ github.repository_owner }}/pecan-documentation From ad8dc5784fb2c004e5f9507345159fddc3569974 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Tue, 12 Dec 2023 20:57:31 +0530 Subject: [PATCH 66/68] Update .github/workflows/book.yml With ronkooper's suggestion to remove extra conditional statements. Co-authored-by: Chris Black --- .github/workflows/book.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/book.yml b/.github/workflows/book.yml index 97309619060..90f998d1ae6 100644 --- a/.github/workflows/book.yml +++ b/.github/workflows/book.yml @@ -60,7 +60,7 @@ jobs: token: ${{ secrets.GH_PAT }} # upload new documentation - name: publish to github - if: github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/master' || github.ref == 'refs/heads/develop') + if: github.event_name == 'push' run: | git config --global user.email "pecanproj@gmail.com" git config --global user.name "GitHub Documentation Robot" From 86a81bf7706a6f6271d1a95778bd99b77e55ca59 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Wed, 13 Dec 2023 10:14:02 +0530 Subject: [PATCH 67/68] Typo Errors and Markdown issues in docs fixed --- CONTRIBUTING.md | 27 ++++--- .../scripts/benchmarking.documentation.md | 74 +++++++++---------- 2 files changed, 51 insertions(+), 50 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 17c16a68581..3ca110f1f14 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,13 +1,13 @@ # How to contribute -Third-party contributions are highly encouraged for PEcAn and will grow the code as well as the understanding of PEcAn and its applications. The core development team can not add all models that exist to PEcAn or all possible scenarios and analysis that people want to conduct. Our goal is to keep it as easy as possible for you contribute changes that get things working in your environment. +Third-party contributions are highly encouraged for PEcAn and will grow the code as well as the understanding of PEcAn and its applications. The core development team can not add all models that exist to PEcAn or all possible scenarios and analysis that people want to conduct. Our goal is to keep it as easy as possible for you contribute changes that get things working in your environment. There are a few guidelines that we need contributors to follow so that we can have a chance of keeping on top of things. ## PEcAn CORE vs Models vs Modules New functionality is typically directed toward modules to provide a slimmer PEcAn Core, reducing the requirements to get PEcAn running on different platforms, especially HPC machines, and to allow greater freedom for modules and models. -Generally, new model should be added to the models folder and new modules should be added to the modules folder. +Generally, new model should be added to the models folder and new modules should be added to the modules folder. Exceptions include code that is reused in many models or modules and wrapper functions that call specific implementations in models; these can be placed in the core packages. If you are unsure of whether your contribution should be implemented as a model, module or part of PEcAn Core, you may visit [Chat Room](https://join.slack.com/t/pecanproject/shared_invite/enQtMzkyODUyMjQyNTgzLWEzOTM1ZjhmYWUxNzYwYzkxMWVlODAyZWQwYjliYzA0MDA0MjE4YmMyOTFhMjYyMjYzN2FjODE4N2Y4YWFhZmQ) or ask on the pecan-develop mailing list for advice. @@ -16,9 +16,9 @@ If you are unsure of whether your contribution should be implemented as a model, - Make sure you have a GitHub account. - Search GitHub and Google to see if your issue has already been reported - - Create an issue in GitHub, assuming one does not already exist. - - Clearly describe the issue including steps to reproduce when it is a bug. - - Make sure you fill in the earliest version that you know has the issue. + - Create an issue in GitHub, assuming one does not already exist. + - Clearly describe the issue including steps to reproduce when it is a bug. + - Make sure you fill in the earliest version that you know has the issue. - Ask @dlebauer, @mdietze or @robkooper to add you to the PEcAn project if you plan on fixing the issue. ## Getting Started @@ -33,17 +33,20 @@ At this point you will have a copy of PEcAn and you are almost ready to work on At this point you will have a copy of the pecan repo in your personal space. Next steps are to setup your local copy to work with the forked version. Introduce your self to GIT (if you have not done this yet), make sure you use an email associated with your GitHub account. + ```bash git config --global user.name "John Doe" git config --global user.email johndoe@example.com ``` Switch pecan to your fork + ```bash git remote set-url origin https://github.com//pecan.git ``` Setup pecan to be able to fetch from the master/develop + ```bash git remote add upstream https://github.com/PecanProject/pecan.git ``` @@ -66,30 +69,30 @@ Here is a simplified workflow on how add a new feature: Update your develop (both locally and on GitHub) -``` +```bash git fetch upstream git checkout develop git merge upstream/develop git push ``` -### Create a branch to do your work. +### Create a branch to do your work -A good practice is to call the branch in the form of GH- followed by the title of the issue. This makes it easier to find out the issue you are trying to solve and helps us to understand what is done in the branch. Calling a branch my-work is confusing. Names of branch can not have a space, and should be replaced with a hyphen. +A good practice is to call the branch in the form of `GH-` followed by the title of the issue. This makes it easier to find out the issue you are trying to solve and helps us to understand what is done in the branch. Calling a branch my-work is confusing. Names of branch can not have a space, and should be replaced with a hyphen. -``` +```bash git checkout -b GH-issuenumber-title-of-issue ``` ### Work and commit -Do you work, and commit as you see fit.Make your commit messages helpful. +Do you work, and commit as you see fit.Make your commit messages helpful. -### Push your changes up to GitHub. +### Push your changes up to GitHub If this is the first time pushing to GitHub you will need to extended command, other wise you can simply do a `git push`. -``` +```bash git push -u origin GH-issuenumber-title-of-issue ``` diff --git a/modules/benchmark/inst/scripts/benchmarking.documentation.md b/modules/benchmark/inst/scripts/benchmarking.documentation.md index 03bbb6700fe..958fc4ccf67 100644 --- a/modules/benchmark/inst/scripts/benchmarking.documentation.md +++ b/modules/benchmark/inst/scripts/benchmarking.documentation.md @@ -1,32 +1,33 @@ -# Benchmarking Documentation +# Benchmarking Documentation There are two ways to use `calc_benchmark` 1. With `workflow.R` to create new outputs 2. With `benchmark.workflow.R` to use exiting outputs -Both cases start with the same settings XML file, the only difference is that +Both cases start with the same settings XML file, the only difference is that `settings$new_run` will be `TRUE` or `FALSE` -This XML file should ultimately be generated through an online interface but for now can be -constructed by hand. +This XML file should ultimately be generated through an online interface but for now can be +constructed by hand. -## The bare minimum that the XML needs to contain: +## The bare minimum that the XML needs to contain - `` information - `` information (though in this case host doesn't matter because everything is happening locally) -- ``, ``, `` +- ``, ``, `` - `` settings: - - `ensemble_id`: the id of the ensemble that you will be using - either to determine the settings for a new ensemble or to provide the settings for the existing ensemble - - `input_id`: the id(s) of the benchmarking data - - `variable_id`: the id(s) of the variable(s) of interest within the data - - `metric_id`: the id(s) of the metric(s) to be calculated - - `new_run`: TRUE = create new run, FALSE = use existing run + - `ensemble_id`: the id of the ensemble that you will be using - either to determine the settings for a new ensemble or to provide the settings for the existing ensemble + - `input_id`: the id(s) of the benchmarking data + - `variable_id`: the id(s) of the variable(s) of interest within the data + - `metric_id`: the id(s) of the metric(s) to be calculated + - `new_run`: TRUE = create new run, FALSE = use existing run -Note: you don't need the benchmark reference run id. That will be retrieved/created later. +Note: you don't need the benchmark reference run id. That will be retrieved/created later. Example: -``` + +```xml 1000004796 1000008121 @@ -42,31 +43,32 @@ Example: ``` ## Example XML files + (I'll check them off when they are working) ## 1 variable, 1 metric, 1 site, 1 model - - settings.file: `modules/benchmark/inst/scripts/bm.1var.1metric.1site.1model.xml` - - [x] New Run - - [x] Existing Run +- settings.file: `modules/benchmark/inst/scripts/bm.1var.1metric.1site.1model.xml` +- [x] New Run +- [x] Existing Run ## 2 variables, 2 metric, 1 site, 1 model - - settings.file: `modules/benchmark/inst/scripts/bm.2var.2metric.1site.1model.xml` - - [x] New Run - - [x] Existing Run +- settings.file: `modules/benchmark/inst/scripts/bm.2var.2metric.1site.1model.xml` +- [x] New Run +- [x] Existing Run ## 2 variables, 2 metric, 1 site, 1 model - - settings.file: `modules/benchmark/inst/scripts/bm.2var.2metric.2site.1model.xml` - - settings will now be a multisite object - - [ ] New Run - - [ ] Existing Run - +- settings.file: `modules/benchmark/inst/scripts/bm.2var.2metric.2site.1model.xml` +- settings will now be a multisite object +- [ ] New Run +- [ ] Existing Run ------------------------------------------------------------------- + # Cheatsheet for looking up ids in BETY - + ## Sites - DUKE: 853 @@ -84,12 +86,12 @@ Example: - LeafLitter: 1000000046 - WoodyLitter: 1000000047 -## Metrics +## Metrics - timeseries.plot: 1000000001 - residual.plot: 1000000002 - MSE: 1000000003 -- MAE: 1000000004 +- MAE: 1000000004 - AME: 1000000005 - RAE: 1000000006 - PPMC: 1000000007 @@ -97,7 +99,7 @@ Example: ## Inputs: CDIAC -- DUKE +- DUKE - Ambient: 1000008119 - Elevated: 1000008120 - ORNL @@ -109,35 +111,31 @@ Example: ## Example Ensemble IDs -**1000004796** +#### 1000004796 - DALEC - ORNL -- Ambient (met = 1000000645) +- Ambient (met = 1000000645) - start date: 1998/01/01 - end date: 2008/12/31 - workflow id: 1000002773 - input id: 1000008121 - - -**1000004806** +#### 1000004806 - DALEC - DUKE -- Ambient (met = 1000000649) +- Ambient (met = 1000000649) - start date: 1996/01/01 - end date: 2007/12/31 - workflow id: 1000002775 - input id: 1000008119 - - -**1000473576** +#### 1000473576 - ED2_git - DUKE -- Ambient (met = 1000000649) +- Ambient (met = 1000000649) - start date: 1997/06/01 - end date: 2007/06/31 - workflow id: 1000003038 From b0ed2c22e0d42d70758e8ad3ba4de0b9234f6d7c Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Thu, 21 Dec 2023 22:21:08 +0530 Subject: [PATCH 68/68] Enhanced Documentations --- DEBUGING.md | 2 ++ DEV-INTRO.md | 72 ++++++++++++++++++++--------------------- README.md | 26 ++++++++------- contrib/README.md | 4 ++- documentation/README.md | 6 ++-- 5 files changed, 60 insertions(+), 50 deletions(-) diff --git a/DEBUGING.md b/DEBUGING.md index b62f71558d6..5c9c7c87aa6 100644 --- a/DEBUGING.md +++ b/DEBUGING.md @@ -1,3 +1,5 @@ +# DEBUGGING.MD + Adding the following to the workflow.R or to your .Rprofile will enable printing of a stacktrace in case something goes wrong. This will help with the development of PEcAn. diff --git a/DEV-INTRO.md b/DEV-INTRO.md index 4f88ef00d05..663927ebe33 100644 --- a/DEV-INTRO.md +++ b/DEV-INTRO.md @@ -34,10 +34,12 @@ The use of Docker in PEcAn is described in detail in the [PEcAn documentation](h ### Installing Docker To install Docker and docker-compose, see the docker documentation: + - Docker Desktop in [MacOS](https://docs.docker.com/docker-for-mac/install/) or [Windows](https://docs.docker.com/docker-for-windows/install/) - Docker (e.g. [Ubuntu](https://docs.docker.com/compose/install/)) and [docker-compose](https://docs.docker.com/compose/install/) on your linux operating system. _Note for Linux users:_ add your user to the docker group. This will prevent you from having to use `sudo` to start the docker containers, and makes sure that any file that is written to a mounted volume is owned by you. This can be done using + ```sh # for linux users sudo adduser ${USER} docker. @@ -51,13 +53,13 @@ By default docker-compose will use the files `docker-compose.yml` and `docker-co For Linux/MacOSX -``` +```sh cp docker-compose.dev.yml docker-compose.override.yml ``` For Windows -``` +```sh copy docker-compose.dev.yml docker-compose.override.yml ``` @@ -67,12 +69,12 @@ You can now use the command `docker-compose` to work with the containers setup f The steps in this section only need to be done the first time you start working with the stack in docker. After this is done you can skip these steps. You can find more detail about the docker commands in the [pecan documentation](https://pecanproject.github.io/pecan-documentation/master/docker-index.html). -* setup .env file -* create folders to hold the data -* load the postgresql database -* load some test data -* copy all R packages (optional but recommended) -* setup for web folder development (optional) +- setup .env file +- create folders to hold the data +- load the postgresql database +- load some test data +- copy all R packages (optional but recommended) +- setup for web folder development (optional) #### .env file @@ -86,18 +88,18 @@ cp docker/env.example .env For Windows -``` +```sh copy docker/env.example .env ``` -* `COMPOSE_PROJECT_NAME` set this to pecan, the prefix for all containers -* `PECAN_VERSION` set this to develop, the docker image we start with +- `COMPOSE_PROJECT_NAME` set this to pecan, the prefix for all containers +- `PECAN_VERSION` set this to develop, the docker image we start with Both of these variables should also be uncommented by removing the # preceding them. At the end you should see the following if you run the following command `egrep -v '^(#|$)' .env`. If you have a windows system, you will need to set the variable PWD as well, and for linux you will need to set UID and GID (for rstudio). For Linux -``` +```sh echo "COMPOSE_PROJECT_NAME=pecan" >> .env echo "PECAN_VERSION=develop" >> .env echo "UID=$(id -u)" >> .env @@ -106,14 +108,14 @@ echo "GID=$(id -g)" >> .env For MacOSX -``` +```sh echo "COMPOSE_PROJECT_NAME=pecan" >> .env echo "PECAN_VERSION=develop" >> .env ``` For Windows: -``` +```sh echo "COMPOSE_PROJECT_NAME=pecan" >> .env echo "PECAN_VERSION=develop" >> .env echo "PWD=%CD%" >> .env @@ -121,7 +123,7 @@ echo "PWD=%CD%" >> .env Once you have setup `docker-compose.override.yml` and the `.env` files, it is time to pull all docker images that will be used. Doing this will make sure you have the latest version of those images on your local system. -``` +```sh docker-compose pull ``` @@ -131,11 +133,10 @@ The goal of the development is to share the development folder with your contain If you have uncommented the volumes in `docker-compose.override.yml` you will need to create the folders. Assuming you have not modified the values, you can do this with: -``` +```sh mkdir -p $HOME/volumes/pecan/{lib,pecan,portainer,postgres,rabbitmq,traefik} ``` - The following volumes are specified: - **pecan_home** : is the checked out folder of PEcAn. This is shared with the executor and rstudio container allowing you to share and compile PEcAn. (defaults to current folder) @@ -153,7 +154,7 @@ These folders will hold all the persistent data for each of the respective conta First we bring up postgresql (we will start RabbitMQ as well since it takes some time to start): -``` +```sh docker-compose up -d postgres rabbitmq ``` @@ -161,15 +162,14 @@ This will start postgresql and rabbitmq. We need to wait for a few minutes (you Once the database has finished starting up we will initialize the database. Now you can load the database using the following commands. The first command will make sure we have the latest version of the image, the second command will actually load the information into the database. -``` +```sh docker pull pecan/db docker run --rm --network pecan_pecan pecan/db ``` - Once that is done we create two users for BETY, first user is the guest user that you can use to login in the BETY interface. The second user is a user with admin rights. -``` +```sh docker-compose run --rm bety user guestuser guestuser "Guest User" guestuser@example.com 4 4 docker-compose run --rm bety user carya illinois "Carya Demo User" carya@example.com 1 1 ``` @@ -178,7 +178,7 @@ docker-compose run --rm bety user carya illinois "Carya Demo User" carya@example Once the database is loaded we can add some example data, some of the example runs and runs for the ED model, assume some of this data is available. This can take some time, but all the data needed will be copied to the `/data` folder in the pecan containers. As with the database we first pull the latest version of the image, and then execute the image to copy all the data: -``` +```sh docker pull pecan/data:develop docker run -ti --rm --network pecan_pecan --volume pecan_pecan:/data --env FQDN=docker pecan/data:develop ``` @@ -206,26 +206,26 @@ docker run -ti --rm -v pecan_lib:/rlib pecan/base:develop cp -a /usr/local/lib/R If you want to use the web interface, you will need to: -1. Uncomment the web section from the `docker-compose.override.yml` file. This section includes three lines at the top of the file, just under the `services` section. Uncomment the lines that start `web:`, ` volumes:`, and `- pecan_web:`. +1. Uncomment the web section from the `docker-compose.override.yml` file. This section includes three lines at the top of the file, just under the `services` section. Uncomment the lines that start `web:`, `volumes:`, and `- pecan_web:`. 2. Then copy the config.php from the docker/web folder. You can do this using For Linux/MacOSX -``` +```sh cp docker/web/config.docker.php web/config.php ``` For Windows -``` +```sh copy docker\web\config.docker.php web\config.php ``` -### PEcAn Development +## PEcAn Development To begin development we first have to bring up the full PEcAn stack. This assumes you have done once the steps above. You don\'t need to stop any running containers, you can use the following command to start all containers. At this point you have PEcAn running in docker. -``` +```sh docker-compose up -d ``` @@ -241,7 +241,7 @@ To compile the PEcAn code you can use the make command in either the rstudio con You can submit your workflow either in the executor container or in rstudio container. For example to run the `docker.sipnet.xml` workflow located in the tests folder you can use: -``` +```sh docker-compose exec executor bash # inside the container cd /pecan/tests @@ -252,9 +252,9 @@ A better way of doing this is developed as part of GSOC, in which case you can l # PEcAn URLs -You can check the RabbitMQ server used by pecan using https://rabbitmq.pecan.localhost on the same server that the docker stack is running on. You can use rstudio either with http://server/rstudio or at http://rstudio.pecan.localhost. To check the traefik dashboard you can use http://traefik.pecan.localhost. +You can check the RabbitMQ server used by pecan using on the same server that the docker stack is running on. You can use rstudio either with or at . To check the traefik dashboard you can use . -If the stack is running on a remote machine, you can use ssh and port forwarding to connect to the server. For example `ssh -L 8000:localhost:80` will allow you to use http://rabbitmq.pecan.localhost:8000/ in your browser to connect to the remote PEcAn server RabbitMQ. +If the stack is running on a remote machine, you can use ssh and port forwarding to connect to the server. For example `ssh -L 8000:localhost:80` will allow you to use in your browser to connect to the remote PEcAn server RabbitMQ. # Directory Structure @@ -298,7 +298,7 @@ Small scripts that are used as part of the development and installation of PEcAn If you want to start from scratch and remove all old data, but keep your pecan checked out folder, you can remove the folders where you have written the data (see `folders` below). You will also need to remove any of the docker managed volumes. To see all volumes you can do `docker volume ls -q -f name=pecan`. If you are sure, you can either remove them one by one, or remove them all at once using the command below. **THIS DESTROYS ALL DATA IN DOCKER MANAGED VOLUMES.**. -``` +```sh docker volume rm $(docker volume ls -q -f name=pecan) ``` @@ -308,7 +308,7 @@ If you changed the docker-compose.override.yml file to point to a location on di If you want to reset the pecan lib folder that is mounted across all machines, for example when there is a new version of PEcAn or a a new version of R, you will need to delete the volume pecan_lib, and repopulate it. To delete the volume use the following command, and then look at "copy R packages" to copy the data again. -``` +```sh docker-compose down docker volume rm pecan_lib ``` @@ -323,19 +323,19 @@ This will leverage of NFS to mount the file system in your local docker image, c First install nfs server: -``` +```sh apt-get install nfs-kernel-server ``` Next export your home directory: -``` +```sh echo -e "$PWD\t127.0.0.1(rw,no_subtree_check,all_squash,anonuid=$(id -u),anongid=$(id -g))" | sudo tee -a /etc/exports ``` And export the filesystem. -``` +```sh sudo exportfs -va ``` @@ -343,7 +343,7 @@ At this point you have exported your home directory, only to your local machine. Finally we can modify the `docker-compose.override.yml` file to allow for writing files to your PEcAn folder as you: -``` +```sh volumes: pecan_home: driver_opts: diff --git a/README.md b/README.md index 1595e8ab1da..0374cf90341 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,15 @@ [![GitHub Actions CI](https://github.com/PecanProject/pecan/workflows/CI/badge.svg)](https://github.com/PecanProject/pecan/actions) -[![Slack](https://img.shields.io/badge/slack-login-green.svg)](https://pecanproject.slack.com/) +[![Slack](https://img.shields.io/badge/slack-login-green.svg)](https://pecanproject.slack.com/) [![Slack](https://img.shields.io/badge/slack-join_chat-green.svg)](https://join.slack.com/t/pecanproject/shared_invite/enQtMzkyODUyMjQyNTgzLWEzOTM1ZjhmYWUxNzYwYzkxMWVlODAyZWQwYjliYzA0MDA0MjE4YmMyOTFhMjYyMjYzN2FjODE4N2Y4YWFhZmQ) [![DOI](https://zenodo.org/badge/4469/PecanProject/pecan.svg)](https://zenodo.org/badge/latestdoi/4469/PecanProject/pecan) - - ## Our Vision + #### Ecosystem science, policy, and management informed by the best available data and models ## Our Mission -#### Develop and promote accessible tools for reproducible ecosystem modeling and forecasting +#### Develop and promote accessible tools for reproducible ecosystem modeling and forecasting ## What is PEcAn? @@ -31,18 +30,22 @@ See our ["Tutorials Page"](https://pecanproject.github.io/tutorials.html) that p ### Installation Complete instructions on how to install PEcAn can be found in the [documentation here](https://pecanproject.github.io/pecan-documentation/develop/pecan-manual-setup.html). To get PEcAn up and running you can use one of three methods: + 1. Run a [Virtual Machine](https://pecanproject.github.io/pecan-documentation/develop/install-vm.html#install-vm). This is recommended for students and new users, and provides a consistent, tested environment for each release. + 2. Use [Docker](https://pecanproject.github.io/pecan-documentation/develop/install-docker.html#install-docker). This is recommended, especially for development and production deployment. -3. Install all of the PEcAn R packages on your own Linux or MacOS computer or server. This can be done by [installing from r-universe](https://pecanproject.github.io/pecan-documentation/develop/r-universe.html): -``` r + +3. Install all of the PEcAn R packages on your own Linux or MacOS computer or server. This can be done by [installing from r-universe](https://pecanproject.github.io/pecan-documentation/develop/r-universe.html): + +```R # Enable repository from pecanproject options(repos = c( pecanproject = 'https://pecanproject.r-universe.dev', CRAN = 'https://cloud.r-project.org')) # Download and install PEcAn.all in R install.packages('PEcAn.all') +``` -``` This, however, may have limited functionality without also installing other components of PEcAn, in particular [BETYdb](https://pecanproject.github.io/pecan-documentation/develop/osinstall.html#install-bety). ### Website @@ -50,6 +53,7 @@ This, however, may have limited functionality without also installing other comp Visit our [webpage](https://pecanproject.github.io) to keep up with latest news, version, and information about the PEcAn Project #### Web Interface demo + The fastest way to begin modeling ecosystems is through the PEcAn web interface. We have a [demo website](http://pecan.ncsa.illinois.edu/pecan/01-introduction.php) that runs the current version of PEcAn. Using this instance you can perform a run using either ED or SIPNET at any of the predefined sites. @@ -82,12 +86,12 @@ University of Illinois/NCSA Open Source License Copyright (c) 2012, University of Illinois, NCSA. All rights reserved. PEcAn project -www.pecanproject.org + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal with the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -- Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimers. -- Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimers in the documentation and/or other materials provided with the distribution. -- Neither the names of University of Illinois, NCSA, nor the names of its contributors may be used to endorse or promote products derived from this Software without specific prior written permission. +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimers. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimers in the documentation and/or other materials provided with the distribution. +* Neither the names of University of Illinois, NCSA, nor the names of its contributors may be used to endorse or promote products derived from this Software without specific prior written permission. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. diff --git a/contrib/README.md b/contrib/README.md index c53bff1b9b5..00d54434d75 100644 --- a/contrib/README.md +++ b/contrib/README.md @@ -1 +1,3 @@ -This folder contains components that either build on, leverage, or add capabilties to PEcAn. +# README.md + + This folder contains components that either build on, leverage, or add capabilities to PEcAn \ No newline at end of file diff --git a/documentation/README.md b/documentation/README.md index f802adf0a73..b457094e574 100644 --- a/documentation/README.md +++ b/documentation/README.md @@ -1,3 +1,5 @@ -This folder contains published articles describing the development and application of PEcAn as well as tutorials. +# Readme.md -The full documentation can be found in the book_source directory, and is published at https://pecanproject.github.io/pecan-documentation/ with each new release. +This folder contains published articles describing the development and application of PEcAn as well as tutorials. + +The full documentation can be found in the book_source directory, and is published at with each new release.