diff --git a/CHANGELOG.md b/CHANGELOG.md index bbeba7cc98..16dd1fa233 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ For more information about this file see also [Keep a Changelog](http://keepacha - Added new feature of preparing initial conditions for MODIS LAI, AGB, ISCN SOC, and soil moisture across NA anchor sites. - Added GEDI AGB preparation workflow. - Added new feature of downloading datasets from the NASA DAAC ORNL database. +- Extended downscale function and created 'downscale_hrly' so that it handles more frequent data - Added 'aggregate' as a new feature for downscaled data ### Fixed diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE index bb8aa415da..1ec9253385 100644 --- a/modules/assim.sequential/NAMESPACE +++ b/modules/assim.sequential/NAMESPACE @@ -18,6 +18,7 @@ export(Prep_OBS_SDA) export(Remote_Sync_launcher) export(SDA_OBS_Assembler) export(SDA_control) +export(SDA_downscale_hrly) export(SDA_remote_launcher) export(SDA_timeseries_plot) export(adj.ens) @@ -60,6 +61,7 @@ export(tobit_model_censored) export(y_star_create) import(furrr) import(lubridate) +import(ncdf4) import(nimble) importFrom(dplyr,"%>%") importFrom(lubridate,"%m+%") diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R new file mode 100644 index 0000000000..25da4c6215 --- /dev/null +++ b/modules/assim.sequential/R/downscale_function_hrly.R @@ -0,0 +1,103 @@ +#' SDA Downscale Function for Hourly Data +#' +#' This function uses the randomForest model to downscale forecast data (hourly) to unmodeled locations using covariates and site locations +#' +#' @author Harunobu Ishii +#' @param nc_file In quotes, file path for .nc containing ensemble data. +#' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat". +#' @param yyyy In string, format is yyyy(year of interest) +#' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder +#' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member. +#' @export + +SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){ + + # Read the input data and site coordinates + nc_data <- ncdf4::nc_open(nc_file) + on.exit(ncdf4::nc_close(nc_data)) + input_data <- ncdf4::ncvar_get(nc_data, "NEE") + covariate_names <- names(covariates) + + + # Extract time and units + time <- nc_data$dim$time$vals + time_units <- nc_data$dim$time$units + time_origin_str <- substr(time_units, 12, 31) + + # Check if timezone is specified in the time units string + if (grepl("UTC|GMT", time_units)) { + time_origin <- lubridate::ymd_hm(time_origin_str, tz = "UTC") + } else if (grepl("EST", time_units)) { + time_origin <- lubridate::ymd_hm(time_origin_str, tz = "EST") + } else { + time_origin <- lubridate::ymd_hm(time_origin_str, tz = "UTC") # Default to UTC if not specified + } + + # Timereadable + if (grepl("hours", time_units)) { + time_readable <- time_origin + lubridate::dhours(time) + } else if (grepl("seconds", time_units)) { + time_readable <- time_origin + lubridate::dseconds(time) + } else { + stop("Unsupported time units") + } + + # Extract predictors from covariates raster using site coordinates + site_coordinates <- terra::vect(readr::read_csv(coords), geom=c("lon", "lat"), crs="EPSG:4326") + predictors <- as.data.frame(terra::extract(covariates, site_coordinates,ID = FALSE)) + + downscale_output<- list() + + # Train & Test split + sample <- sample(1:nrow(predictors), size = round(0.75*nrow(predictors))) + + # Predict for each time stamp of the year selected + time_indices <- which(year(time_readable) == yyyy) + for (index in time_indices) { + data <- input_data[index, , ] + carbon_data <- as.data.frame(data) + names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data))) + + # Combine carbon data and covariates/predictors and split into training/test + full_data <- cbind(carbon_data, predictors) + train_data <- full_data[sample, ] + test_data <- full_data[-sample, ] + + # Combine each ensemble member with all predictors + models <- list() + maps <- list() + predictions <- list() + ensembles <- list() + for (i in seq_along(carbon_data)) { + ensemble_col <- paste0("ensemble", i) + formula <- stats::as.formula(paste(ensemble_col, "~", paste(covariate_names, collapse = " + "))) + models[[i]] <- randomForest::randomForest(formula, + data = train_data, + ntree = 1000, + na.action = stats::na.omit, + keep.forest = TRUE, + importance = TRUE) + + maps[[i]] <- terra::predict(covariates, model = models[[i]], na.rm = TRUE) + predictions[[i]] <- stats::predict(models[[i]], test_data) + } + + # Organize the results into a single output list + curr_downscaled <- list( data = list(training = train_data, testing = test_data), + models = models, + maps = maps, + predictions = predictions + ) + + # Rename each element of the output list with appropriate ensemble numbers + for (i in 1:length(curr_downscaled$data)) { + names(curr_downscaled$data[[i]]) <- paste0("ensemble", seq(1:ncol(carbon_data))) + } + names(curr_downscaled$models) <- paste0("ensemble", seq(1:ncol(carbon_data))) + names(curr_downscaled$maps) <- paste0("ensemble", seq(1:ncol(carbon_data))) + names(curr_downscaled$predictions) <- paste0("ensemble", seq(1:ncol(carbon_data))) + + downscale_output[[as.character(time_readable[index])]]<-curr_downscaled + } + return(downscale_output) +} diff --git a/modules/assim.sequential/man/SDA_downscale_hrly.Rd b/modules/assim.sequential/man/SDA_downscale_hrly.Rd new file mode 100644 index 0000000000..6cfe7eba90 --- /dev/null +++ b/modules/assim.sequential/man/SDA_downscale_hrly.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/downscale_function_hrly.R +\name{SDA_downscale_hrly} +\alias{SDA_downscale_hrly} +\title{SDA Downscale Function for Hourly Data} +\usage{ +SDA_downscale_hrly(nc_file, coords, yyyy, covariates) +} +\arguments{ +\item{nc_file}{In quotes, file path for .nc containing ensemble data.} + +\item{coords}{In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".} + +\item{yyyy}{In string, format is yyyy(year of interest)} + +\item{covariates}{SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder} +} +\value{ +It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member. +} +\description{ +This function uses the randomForest model to downscale forecast data (hourly) to unmodeled locations using covariates and site locations +} +\author{ +Harunobu Ishii +}