Merge pull request #92 from lawinslow/master

Code for dataset RC1
USGS-R · Jul 21, 2016 · 4682732 · 4682732
2 parents 630bebe + 271c0aa
commit 4682732
Show file tree

Hide file tree

Showing 18 changed files with 1,176 additions and 251 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.1.2
+Version: 4.2.1
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <[email protected]>
@@ -16,7 +16,10 @@ Imports:
     rLakeAnalyzer (>= 1.5),
     insol,
     plyr,
-    accelerometry
+    accelerometry, 
+    lubridate, 
+    dplyr, 
+    tidyr
 Enhances: geoknife
 Description: More about what it does (maybe more than one line)
 License: LICENSE file

diff --git a/NAMESPACE b/NAMESPACE
@@ -43,11 +43,13 @@ export(populate_base_lake_nml)
 export(prep_run_glm_kd)
 export(sens_seasonal_site)
 export(set_driver_url)
+export(summarize_notaro)
 import(GLMr)
 import(glmtools)
 import(lakeattributes)
 import(lubridate)
 import(rLakeAnalyzer)
+import(tidyr)
 importFrom(accelerometry,rle2)
 importFrom(insol,JD)
 importFrom(insol,insolation)

diff --git a/R/get_driver_nhd.R b/R/get_driver_nhd.R
@@ -27,6 +27,11 @@ get_driver_nhd = function(id, driver_name, loc_cache, timestep){
 
 		if(substr(driver_url, 0,7) == 'file://'){
 		  dest = sub('file://', '', driver_url)
+		 #if driver url is a zip file, pull out of zip
+		}else if(substr(pkg_info$dvr_url, nchar(pkg_info$dvr_url)-3,nchar(pkg_info$dvr_url)) == '.zip'){
+
+			unzip(pkg_info$dvr_url, files = paste0('drivers_GLM_', driver_name, '/', fname), exdir=dirname(dest), junkpaths=TRUE)
+
 		}else{
 		  if(!download_helper(driver_url, dest)){
 		    stop('failure downloading ', fname, '\n')
@@ -99,8 +104,12 @@ get_driver_index = function(driver_name, loc_cache=TRUE){
 		return(read.table(dest, sep='\t', header=TRUE, as.is=TRUE))
 	}
 
-	if(!download_helper(index_url, dest)){
-		stop('driver_index.tsv: unable to download for driver data:', driver_name)
+	if(substr(pkg_info$dvr_url, nchar(pkg_info$dvr_url)-3,nchar(pkg_info$dvr_url)) == '.zip'){
+		unzip(pkg_info$dvr_url, files = paste0('drivers_GLM_', driver_name, '/driver_index.tsv'), exdir=dirname(dest), junkpaths=TRUE)
+	}else{
+		if(!download_helper(index_url, dest)){
+			stop('driver_index.tsv: unable to download for driver data:', driver_name)
+		}
 	}
 
 	return(read.table(dest, sep='\t', header=TRUE, as.is=TRUE))

diff --git a/R/necsc_thermal_metrics_core.R b/R/necsc_thermal_metrics_core.R
@@ -20,7 +20,7 @@ necsc_thermal_metrics_core = function(run.path, site_id){
 	nml.file = file.path(run.path, 'glm2.nml')
 
 	all_wtr      = get_temp(nc.file, reference='surface')
-	bottom_wtr   = get_temp(nc.file, reference='bottom', z_out=0.1) #0.1m from bottom
+	bottom_wtr   = get_temp(nc.file, reference='bottom', z_out=0) #bottom temperature
 	surface_wtr  = get_temp(nc.file, reference='surface', z_out=0) #surface temps
 	all_ice      = get_ice(nc.file)
 	all_raw_wtr  = get_raw(nc.file, 'temp')

diff --git a/R/populate_base_lake_nml.R b/R/populate_base_lake_nml.R
@@ -21,7 +21,9 @@ populate_base_lake_nml = function(site_id, kd=get_kd_avg(site_id)$kd_avg, nml_te
 	#get default template
 	nml_obj = read_nml(nml_template)
 
-	initZ = c(0,0.2, zmax);
+	#make last depth slightly less than total depth to avoid
+	# precision-based GLM errors
+	initZ = c(0,0.2, (zmax-0.1));
 	initT = c(3,4,4);
 	initS = c(0,0,0);
 
@@ -52,7 +54,6 @@ populate_base_lake_nml = function(site_id, kd=get_kd_avg(site_id)$kd_avg, nml_te
 	'ch'=0.0014,
 	'cd'=0.0013,
 	'rain_sw'=FALSE,
-	'snow_sw'=TRUE,
 	'num_inflows'=0,
 	'outl_elvs'=1,
 	'outflow_fl'='outflow.csv',

diff --git a/R/prep_run_glm_kd.R b/R/prep_run_glm_kd.R
@@ -30,7 +30,7 @@
 #'@export
 prep_run_glm_kd <- function(site_id, path, years,
 																kd = getClarity(site_id, default.if.null=TRUE), 
-																nml_args=NULL, sed_heat=FALSE){
+																nml_args=NULL, sed_heat=FALSE, verbose=FALSE){
 
 
 	nml_obj = populate_base_lake_nml(site_id, kd=kd)
@@ -54,7 +54,7 @@ prep_run_glm_kd <- function(site_id, path, years,
 
 	nml_out_path = file.path(path, "glm2.nml")
 	write_nml(nml_obj, nml_out_path)
-	out_val = run_glm(path)
+	out_val = run_glm(path, verbose=verbose)
 
 
 	return(out_val)

diff --git a/R/summarize_var_notaro.R b/R/summarize_var_notaro.R
@@ -0,0 +1,68 @@
+#' @title Summary stats for Notaro
+#' 
+#' @param nc.file path to nc file
+#' 
+#' @return data.frame with summary statistics
+#' 
+#' 
+#' @import tidyr
+#' @import lubridate
+#' 
+#' @export 
+summarize_notaro = function(nc.file){
+  library(glmtools)
+  exclude.vars <- c("extc_coef", "salt", "precip", "wind")
+  var.names <- as.character(sim_vars(nc.file)$name)
+
+  lake.data = lapply(var.names[!var.names %in% exclude.vars], function(x) summarize_var_notaro(nc.file, x))
+  df.data <- data.frame()
+  for (i in 1:length(lake.data)){
+    df.data <- rbind(df.data, lake.data[[i]])
+  }
+  return(df.data)
+}
+
+
+# for the 1D vars, get depths and summarize, for annual scale
+summarize_var_notaro <- function(nc.file, var.name){
+  library(dplyr)
+  library(tidyr)
+  library(lubridate)
+
+  unit <- sim_var_units(nc.file, var.name)
+  is.1D <- glmtools:::.is_heatmap(nc.file, var.name)
+  value.name <- sprintf('%s%s', var.name, ifelse(unit=='', '',paste0(' (',unit,')')))
+  if (is.1D){
+    rename_depths <- function(depths){
+      unlist(lapply(strsplit(depths, '[_]'), function(x) sprintf('%s_%s', round(eval(parse(text=head(tail(x,2),1))), digits = 2), tail(x,1))))
+    }
+    get_depth <- function(names){
+      as.numeric(unname(unlist(sapply(names, function(x) strsplit(x,'[_]')[[1]][1]))))
+    }
+    get_stat <- function(names){
+      unname(unlist(sapply(names, function(x) strsplit(x,'[_]')[[1]][2])))
+    }
+    var <- get_var(nc.file, var.name, reference='surface') %>% 
+      mutate(base.date=as.POSIXct(paste0(lubridate::year(DateTime),'-01-01')), tz='UTC') %>% 
+      mutate(doy=as.numeric(DateTime-base.date)/86400+1) %>% 
+      select(-DateTime, -tz, -base.date) %>% select(doy, everything()) %>% group_by(doy) %>%  
+      summarize_each(c('mean','sd')) %>% 
+      setNames(c('doy',rename_depths(names(.)[-1L]))) %>% gather(key = doy) %>% 
+      setNames(c('doy','depth_stat','value')) %>% 
+      mutate(depth=get_depth(depth_stat), statistic=get_stat(depth_stat), variable=value.name) %>% 
+      select(doy, depth, statistic, value, variable) %>% 
+      filter(doy < 366)
+  } else {
+    var <- get_var(nc.file, var.name)%>% 
+      mutate(base.date=as.POSIXct(paste0(lubridate::year(DateTime),'-01-01')), tz='UTC') %>% 
+      mutate(doy=as.numeric(DateTime-base.date)/86400+1) %>% select_('doy', var.name) %>% group_by(doy) %>% 
+      summarize_each(c('mean','sd')) %>% gather(key = doy) %>% 
+      setNames(c('doy','statistic','value')) %>% 
+      mutate(depth=NA, variable=value.name) %>% 
+      select(doy, depth, statistic, value, variable) %>% 
+      filter(doy < 366)
+  }
+
+  return(var)
+}
+
diff --git a/demo/cluster_cont_calibrate_bias.R b/demo/cluster_cont_calibrate_bias.R
@@ -11,7 +11,7 @@ driver_url = paste0('http://', (Sys.info()["nodename"]),':4041/')
 library(parallel)
 
 #lets try 50 to start
-c1 = makePSOCKcluster(paste0('licon', 1:50), manual=TRUE, port=4046)
+c1 = makePSOCKcluster(paste0('licon', 1:60), manual=TRUE, port=4045)
 
 clusterExport(c1, varlist = 'local_url')
 clusterExport(c1, varlist = 'driver_url')
@@ -27,7 +27,7 @@ clusterCall(c1, function(){library(devtools)})
 #glmr_install     = clusterCall(c1, function(){install.packages('glmtools', repos=c('http://owi.usgs.gov/R', 'http://cran.rstudio.com'))})
 glmr_install     = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/GLMr_3.1.10.tar.gz'))})
 glmtools_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/glmtools_0.13.0.tar.gz'))})
-lakeattr_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/lakeattributes_0.8.4.tar.gz'))})
+lakeattr_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/lakeattributes_0.8.6.tar.gz'))})
 mdalakes_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/mda.lakes_3.0.4.tar.gz'))})
 
 library(lakeattributes)
@@ -36,142 +36,62 @@ library(dplyr)
 library(glmtools)
 
 ## setup run function
-
-run_cal = function(site_id, years=1979:2012, driver_function=get_driver_path, nml_args=list(), datasource=NA){
-
-
-  library(lakeattributes)
-  library(mda.lakes)
-  library(dplyr)
-  library(glmtools)
-
-  fastdir = tempdir()
-  if(file.exists('/mnt/ramdisk')){
-    fastdir = '/mnt/ramdisk'
-  }
-
-  secchi_conv = 1.7
-  tryCatch({
-
-
-    run_dir = file.path(fastdir, paste0(site_id, '_', sample.int(1e9, size=1)))
-    cat(run_dir, '\n')
-    dir.create(run_dir)
-
-    #rename for dplyr
-    nhd_id = site_id
-
-    data(wtemp)
-    obs = filter(wtemp, site_id == nhd_id) %>%
-      transmute(DateTime=date, Depth=depth, temp=wtemp)
-
-    #having a weird issue with resample_to_field, make unique
-    obs = obs[!duplicated(obs[,1:2]), ]
-
-    write.table(obs, file.path(run_dir, 'obs.tsv'), sep='\t', row.names=FALSE)
-
-    #get driver data
-    driver_path = driver_function(site_id)
-    driver_path = gsub('\\\\', '/', driver_path)
-
-
-    kds = get_kd_best(site_id, years=years, datasource = datasource)
-
-    kd_avg = secchi_conv/mean(kds$secchi_avg, na.rm=TRUE)
-
-    prep_run_glm_kd(site_id=site_id, 
-                    path=run_dir, 
-                    years=years,
-                    kd=kd_avg, 
-                    nml_args=c(list(
-                      dt=3600, subdaily=FALSE, nsave=24, 
-                      timezone=-6,
-                      csv_point_nlevs=0, 
-                      snow_albedo_factor=1.1, 
-                      meteo_fl=driver_path, 
-                      cd=getCD(site_id, method='Hondzo')), 
-                      nml_args))
-
-    cal.data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv'))
-    cal.data$site_id = site_id
-
-    unlink(run_dir, recursive=TRUE)
-
-    return(cal.data)
-
-  }, error=function(e){
-    unlink(run_dir, recursive=TRUE)
-    return(paste(e$call, e$message, Sys.info()["nodename"]))
-  })
-}
+source('demo/common_running_functions.R')
 
 
 driver_fun = function(site_id){
   nldas = read.csv(get_driver_path(site_id, driver_name = 'NLDAS', timestep = 'daily'), header=TRUE)
   drivers = driver_nldas_wind_debias(nldas)
   drivers = driver_add_burnin_years(drivers, nyears=2)
   drivers = driver_add_rain(drivers, month=7:9, rain_add=0.5) ##keep the lakes topped off
-  #drivers$time = drivers$time - as.difftime(2, units='days')
   driver_save(drivers)
 }
 
+
 #we want only ramdisk enabled nodes
 #ramdisk = clusterCall(c1, function(){file.exists('/mnt/ramdisk')})
 #c1 = c1[unlist(ramdisk)]
 data(wtemp)
 to_run = unique(intersect(intersect(wtemp$site_id, zmax$site_id), secchi$site_id))
 #to_run = to_run[to_run %in% read.table('~/managed_lakes_wilma_nhd.tsv', sep='\t', header=TRUE, as.is=TRUE)$id]
 
-run_name = '2016-04-06_cal_NLDAS'
-run_comment = "Markfort sheltering model for full calibration run with all available wtemp, secchi and depth. 1980-2014"
+run_name = '2016-04-12_cal_2014stop_NLDAS'
+run_comment = "standard, all Secchi 1980-2014 run to compare to GCMs"
 
-clusterCall(c1, function(){library(mda.lakes);set_driver_url(driver_url)})
+#clusterCall(c1, function(){library(mda.lakes);set_driver_url(driver_url)})
 
-out = clusterApplyLB(c1, to_run, run_cal, driver_function = driver_fun, nml_args=list(), years=1980:2014, datasource=NA)
-
-#out = clusterApplyLB(c1, to_run[1:50], run_cal)
-
-sprintf('%i lakes ran\n', sum(unlist(lapply(out, inherits, what='data.frame'))))
+out = clusterApplyLB(c1, to_run, run_cal, driver_function = driver_fun, nml_args=list(), 
+                     years=1980:2014, secchi_function=secchi_function=function(site_id){secchi_target_month(site_id, target_month=5:6)})
 
+outl = split_run_output(out)
+out_err = outl[['out_err']]
+nml     = outl[['nmls']]
 
 ##results
-out_df = do.call('rbind', out[unlist(lapply(out, inherits, what='data.frame'))])
-write.table(out_df, paste0('c:/WiLMA/results/', run_name, '.tsv'), sep='\t', row.names=FALSE)
+write.table(outl[['out_df']], paste0('c:/WiLMA/results/', run_name, '.tsv'), sep='\t', row.names=FALSE)
 
-out_err = out[!unlist(lapply(out, inherits, what='data.frame'))]
 save(out_err, file = paste0('c:/WiLMA/results/', run_name, '.Rdata'))
 
-rmarkdown::render('demo/document_calibration_overview_template.Rmd', output_file=paste0(run_name,'.html'), 
+save(nmls, file=paste0('c:/WiLMA/results/', run_name, 'nml.Rdata'))
+
+rmarkdown::render('demo/document_calibration_overview_template.Rmd', output_file=paste0(run_name,'.html'),
                   output_dir='c:/WiLMA/results/', params=list(out_df=out_df, run_message=run_comment))
 
 
 sqrt(mean((out_df$Observed_temp - out_df$Modeled_temp)^2, na.rm=TRUE))
 mean(out_df$Observed_temp - out_df$Modeled_temp, na.rm=TRUE)
 
-run_comment = "Markfort sheltering model for full calibration run with all available wtemp, secchi and depth. 1980-2014"
-
-clusterCall(c1, function(){library(mda.lakes);set_driver_url(driver_url)})
-
-out = clusterApplyLB(c1, to_run, run_cal, driver_function = driver_fun, nml_args=list(), years=1980:2014, datasource=NA)
+parent = '570e8688e4b0ef3b7ca24d8e'
+files = c(Sys.glob(paste0('c:/WiLMA/results/', run_name, '.*')), Sys.glob(paste0('c:/WiLMA/results/', run_name, 'nml.*')))
 
-#out = clusterApplyLB(c1, to_run[1:50], run_cal)
-
-sprintf('%i lakes ran\n', sum(unlist(lapply(out, inherits, what='data.frame'))))
+library(sbtools)
+authenticate_sb('[email protected]', pass)
+itm = item_create(parent, title=run_name)
 
+item_append_files(itm, files)
 
-##results
-out_df = do.call('rbind', out[unlist(lapply(out, inherits, what='data.frame'))])
-write.table(out_df, paste0('c:/WiLMA/results/', run_name, '.tsv'), sep='\t', row.names=FALSE)
 
-out_err = out[!unlist(lapply(out, inherits, what='data.frame'))]
-save(out_err, file = paste0('c:/WiLMA/results/', run_name, '.Rdata'))
 
-rmarkdown::render('demo/document_calibration_overview_template.Rmd', output_file=paste0(run_name,'.html'), 
-                  output_dir='c:/WiLMA/results/', params=list(out_df=out_df, run_message=run_comment))
-
-
-sqrt(mean((out_df$Observed_temp - out_df$Modeled_temp)^2, na.rm=TRUE))
-mean(out_df$Observed_temp - out_df$Modeled_temp, na.rm=TRUE)
 
 ################################################################################
 ## ACCESS