diff --git a/base/db/DESCRIPTION b/base/db/DESCRIPTION index 76b55aed168..c2a94da9ac4 100644 --- a/base/db/DESCRIPTION +++ b/base/db/DESCRIPTION @@ -71,7 +71,8 @@ Suggests: rmarkdown (>= 2.19), testthat (>= 2.0.0), tidyverse, - withr + withr, + openssl License: BSD_3_clause + file LICENSE VignetteBuilder: knitr Copyright: Authors diff --git a/base/db/NAMESPACE b/base/db/NAMESPACE index 938b1cfe2d4..7379be2d1de 100644 --- a/base/db/NAMESPACE +++ b/base/db/NAMESPACE @@ -30,6 +30,7 @@ export(derive.traits) export(dplyr.count) export(fancy_scientific) export(get.id) +export(get.new.site) export(get.trait.data) export(get.trait.data.pft) export(get_postgres_envvars) diff --git a/base/db/R/get.new.site.R b/base/db/R/get.new.site.R new file mode 100644 index 00000000000..c44354aae1c --- /dev/null +++ b/base/db/R/get.new.site.R @@ -0,0 +1,123 @@ +##' Get new site info using provided site information +##' +##' @title Get New Site Info +##' @param site a dataframe with site information including id, lat, lon, and time_zone. +##' @param con Database connection object. +##' @param latlon Optional global latlon object which will store updated lat/lon. +##' @return a dataframe with new site information on lat, lon and time_zone +##' @export +##' @author Abhinav Pandey +##' +##' @examples +##' get.new.site(site=data.frame(id=1,lat=40.1,lon=-88.2,time_zone="UTC"),con=NULL,latlon=NULL) + +get.new.site <- function(site, con = NULL, latlon = NULL) { + if (is.null(con)) { + PEcAn.logger::logger.debug("DB connection is closed. Trying to generate a new site ID or use pre-existing one.") + # No DB connection present. Generate a new ID using one of below steps: + + if (is.null(site$id) | is.na(site$id)) { + if ((!is.null(site$lat) && !is.null(site$lon)) && + (!is.na(site$lat) && !is.na(site$lon)) + ) { + site.id <- paste0(lat, "_", lon) + new.site <- data.frame( + id = as.numeric(site.id), + lat = site$lat, + lon = site$lon + ) + str_ns <- paste0(new.site$lat, "_", new.site$lon) + } # lat/lon present but ID is absent + # Use Pre-existing normalised lat/lon + else { + PEcAn.logger::logger.warn("Site dataframe does not have an id column") + site.id <- generate_new_siteID() + PEcAn.logger::logger.info(paste0("Generated siteID:", site.id)) + # Optionally, create a new site data frame with the random ID + new.site <- data.frame( + id = site.id, + lat = NULL, + lon = NULL + ) + str_ns <- paste0(new.site$id %/% 1e+09, "_", new.site$id %% 1e+09) + # We used a different str_ns as identifier here due to absence of lat/lon + } + # ID as well as lat/lon absent. + # Return a WARN as we will be unable to identify such an instance due to lack of information. + # We'll try to Generate a new ID similar to previous ones. + } else { + if ((!is.null(site$lat) && !is.null(site$lon)) && + (!is.na(site$lat) && !is.na(site$lon)) + ) { + new.site <- data.frame( + id = as.numeric(site$id), + lat = site$lat, + lon = site$lon + ) + str_ns <- paste0(new.site$lat, "-", new.site$lon) + } # siteId as well as lat/lon present + else { + new.site <- data.frame( + id = site$id, + lat = NULL, + lon = NULL + ) + str_ns <- paste0(new.site$id %/% 1e+09, "-", new.site$id %% 1e+09) + } # siteId present but lat/lon absent + } + } else { + # Check if site dataframe has an id column + if (is.null(site$id) | is.na(site$id)) { + PEcAn.logger::logger.warn("Site dataframe does not have an id column. Generating a unique ID") + if ((!is.null(site$lat) && !is.null(site$lon)) && + (!is.na(site$lat) && !is.na(site$lon)) + ) { + PEcAn.logger::logger.info(paste0("Generated siteID using lat and lon:", site.id)) + site.id <- generate_new_siteID(site$lat, site$lon) + new.site <- data.frame( + id = as.numeric(site.id), + lat = site$lat, + lon = site$lon + ) + str_ns <- paste0(site$lat, "-", site$lon) + } else { + PEcAn.logger::logger.severe("Missing site-id, site lat & site-lon. Stopping the process due to lack of information") + } + } else { + # setup site database number, lat, lon and name and copy for format.vars if new input + if ((!is.null(site$lat) && !is.null(site$lon)) | + (!is.na(site$lat) && !is.na(site$lon)) + ) { + new.site <- data.frame( + id = as.numeric(site$id), + lat = site$lat, + lon = site$lon + ) + str_ns <- paste0(site$lat, "_", site$lon) + } else { + latlon <- query.site(site$id, con = con)[c("lat", "lon")] + new.site <- data.frame( + id = as.numeric(site$id), + lat = latlon$lat, + lon = latlon$lon + ) + str_ns <- paste0(new.site$lat, "_", new.site$lon) + } + } + } + + site.info <- list(new.site = new.site, str_ns = str_ns) + + return(site.info) +} + + +# Function to generate a new siteID using hashing (db-less runs ONLY) +generate_new_siteID <- function(lat, lon) { + latlon_str <- paste0(round(lat, 8), round(lon, 8)) + hash <- openssl::sha256(latlon_str) + + # Extracting first 10 characters of hash as a UID + uid <- substr(as.character(hash), 1, 10) + return(uid) +} diff --git a/base/db/man/get.new.site.Rd b/base/db/man/get.new.site.Rd new file mode 100644 index 00000000000..f00c6103513 --- /dev/null +++ b/base/db/man/get.new.site.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get.new.site.R +\name{get.new.site} +\alias{get.new.site} +\title{Get New Site Info} +\usage{ +get.new.site(site, con = NULL, latlon = NULL) +} +\arguments{ +\item{site}{a dataframe with site information including id, lat, lon, and time_zone.} + +\item{con}{Database connection object.} + +\item{latlon}{Optional global latlon object which will store updated lat/lon.} +} +\value{ +a dataframe with new site information on lat, lon and time_zone +} +\description{ +Get new site info using provided site information +} +\examples{ +get.new.site(site=data.frame(id=1,lat=40.1,lon=-88.2,time_zone="UTC"),con=NULL,latlon=NULL) +} +\author{ +Abhinav Pandey +} diff --git a/base/workflow/R/do_conversions.R b/base/workflow/R/do_conversions.R index 50fd71e812e..f69311f5c84 100644 --- a/base/workflow/R/do_conversions.R +++ b/base/workflow/R/do_conversions.R @@ -9,6 +9,7 @@ ##' @author Ryan Kelly, Rob Kooper, Betsy Cowdery, Istem Fer do_conversions <- function(settings, overwrite.met = FALSE, overwrite.fia = FALSE, overwrite.ic = FALSE) { + if (PEcAn.settings::is.MultiSettings(settings)) { return(PEcAn.settings::papply(settings, do_conversions)) } diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv index 42e3077f261..944b62b4db8 100644 --- a/docker/depends/pecan_package_dependencies.csv +++ b/docker/depends/pecan_package_dependencies.csv @@ -241,6 +241,7 @@ "neonUtilities","*","modules/data.land","Imports",FALSE "nimble","*","modules/assim.sequential","Imports",FALSE "nneo","*","modules/data.atmosphere","Imports",FALSE +"openssl","*","base/db","Suggests",FALSE "optparse","*","base/settings","Imports",FALSE "parallel","*","modules/assim.batch","Imports",FALSE "parallel","*","modules/data.atmosphere","Suggests",FALSE diff --git a/modules/data.atmosphere/R/met.process.R b/modules/data.atmosphere/R/met.process.R index 1380e324744..3f9b5e371bd 100644 --- a/modules/data.atmosphere/R/met.process.R +++ b/modules/data.atmosphere/R/met.process.R @@ -133,11 +133,12 @@ met.process <- function(site, input_met, start_date, end_date, model, # setup site database number, lat, lon and name and copy for format.vars if new input - latlon <- PEcAn.DB::query.site(site$id, con = con)[c("lat", "lon")] - new.site <- data.frame(id = as.numeric(site$id), - lat = latlon$lat, - lon = latlon$lon) - str_ns <- paste0(new.site$id %/% 1e+09, "-", new.site$id %% 1e+09) + latlon <- NULL + site.info <- PEcAn.DB::get.new.site(site, con=con, latlon = latlon) + + # extract new.site and str_ns from site.info + new.site <- site.info$new.site + str_ns <- site.info$str_ns if (is.null(format.vars$lat)) { format.vars$lat <- new.site$lat diff --git a/modules/data.atmosphere/tests/testthat/test.met.process.R b/modules/data.atmosphere/tests/testthat/test.met.process.R index 01e819c0744..b39e6198737 100644 --- a/modules/data.atmosphere/tests/testthat/test.met.process.R +++ b/modules/data.atmosphere/tests/testthat/test.met.process.R @@ -1,4 +1,4 @@ -test_that("`met.process` able to call .download.raw.met.module based on met process stage params", { +test_that("'met.process' able to call .download.raw.met.module based on met process stage params", { input_met <- list(source = 'CRUNCEP', id = '1') mockery::stub(met.process, 'PEcAn.DB::db.open', 1) @@ -8,7 +8,7 @@ test_that("`met.process` able to call .download.raw.met.module based on met proc mockery::stub(met.process, 'PEcAn.DB::query.format.vars', list()) mockery::stub(met.process, 'PEcAn.DB::dbfile.check', list(id = 1)) mockery::stub(met.process, 'assign', 1) - mockery::stub(met.process, 'PEcAn.DB::query.site', list(lat = 0, lon = 0)) + mockery::stub(met.process, 'PEcAn.DB::get.new.site', list(new.site = data.frame(id = 1, lat = 0, lon = 0), str_ns = "0-0")) mockery::stub(met.process, 'met.process.stage', list(download.raw = TRUE, met2cf = FALSE, standardize = FALSE, met2model = FALSE)) mocked_res <- mockery::mock(1) diff --git a/modules/data.land/R/ic_process.R b/modules/data.land/R/ic_process.R index 2c402692835..5e799dde61b 100644 --- a/modules/data.land/R/ic_process.R +++ b/modules/data.land/R/ic_process.R @@ -6,6 +6,7 @@ ##' @param input Taken from settings$run$inputs. This should include id, path, and source ##' @param dir settings$database$dbfiles ##' @param overwrite Default = FALSE. whether to force ic_process to proceed +##' @param site Current site information ##' ##' @author Istem Fer, Hamze Dokoohaki ic_process <- function(settings, input, dir, overwrite = FALSE){ @@ -13,7 +14,7 @@ ic_process <- function(settings, input, dir, overwrite = FALSE){ #--------------------------------------------------------------------------------------------------# # Extract info from settings and setup - site <- settings$run$site + site <- settings$run$site model <- list() model$type <- settings$model$type model$id <- settings$model$id @@ -49,15 +50,25 @@ ic_process <- function(settings, input, dir, overwrite = FALSE){ con <- PEcAn.DB::db.open(dbparms$bety) on.exit(PEcAn.DB::db.close(con), add = TRUE) - #grab site lat and lon info - latlon <- PEcAn.DB::query.site(site$id, con = con)[c("lat", "lon")] # setup site database number, lat, lon and name and copy for format.vars if new input - new.site <- data.frame(id = as.numeric(site$id), - lat = latlon$lat, - lon = latlon$lon) + latlon <- NULL + if(is.null(site$lat) | is.null(site$lon)) { + site.info <- PEcAn.DB::get.new.site(site, con=con, latlon = latlon) - new.site$name <- settings$run$site$name + # extract new.site and str_ns from site.info + new.site <- site.info$new.site + str_ns <- site.info$str_ns + } else { + latlon <- list(lon = site$lat, lon=site$lon) + new.site <- data.frame( + id = as.numeric(site$id), + lat = site$lat, + lon = site$lon + ) + str_ns <- paste0(site$lat, "-", site$lon) + } + new.site$name <- settings$run$site$name str_ns <- paste0(new.site$id %/% 1e+09, "-", new.site$id %% 1e+09) diff --git a/modules/data.land/R/soil_process.R b/modules/data.land/R/soil_process.R index 6ee57347b21..988aecfe1db 100644 --- a/modules/data.land/R/soil_process.R +++ b/modules/data.land/R/soil_process.R @@ -30,6 +30,7 @@ soil_process <- function(settings, input, dbfiles, overwrite = FALSE,run.local=T # set up bety connection con <- PEcAn.DB::db.open(dbparms$bety) on.exit(PEcAn.DB::db.close(con), add = TRUE) + # get site info latlon <- PEcAn.DB::query.site(site$id, con = con)[c("lat", "lon")] new.site <- data.frame(id = as.numeric(site$id), diff --git a/modules/data.land/man/ic_process.Rd b/modules/data.land/man/ic_process.Rd index dcf245e41f8..524ffc3f95f 100644 --- a/modules/data.land/man/ic_process.Rd +++ b/modules/data.land/man/ic_process.Rd @@ -14,6 +14,8 @@ ic_process(settings, input, dir, overwrite = FALSE) \item{dir}{settings$database$dbfiles} \item{overwrite}{Default = FALSE. whether to force ic_process to proceed} + +\item{site}{Current site information} } \description{ ic_process