From 7902db9590eb8cba20468649b8f4b1fe88296562 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Mon, 11 Apr 2016 14:21:20 -0500 Subject: [PATCH] updates to run documentation #70 --- demo/cluster_cont_calibrate_bias.R | 73 ++++++---- ...document_calibration_overview_template.Rmd | 128 +++++++++++++++++- 2 files changed, 171 insertions(+), 30 deletions(-) diff --git a/demo/cluster_cont_calibrate_bias.R b/demo/cluster_cont_calibrate_bias.R index a3ca6f3..59c852b 100644 --- a/demo/cluster_cont_calibrate_bias.R +++ b/demo/cluster_cont_calibrate_bias.R @@ -28,7 +28,7 @@ clusterCall(c1, function(){library(devtools)}) glmr_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/GLMr_3.1.10.tar.gz'))}) glmtools_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/glmtools_0.13.0.tar.gz'))}) lakeattr_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/lakeattributes_0.8.4.tar.gz'))}) -mdalakes_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/mda.lakes_3.0.3.tar.gz'))}) +mdalakes_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/mda.lakes_3.0.4.tar.gz'))}) library(lakeattributes) library(mda.lakes) @@ -38,7 +38,7 @@ library(glmtools) ## setup run function run_cal = function(site_id, years=1979:2012, driver_function=get_driver_path, nml_args=list(), datasource=NA){ - + library(lakeattributes) library(mda.lakes) @@ -61,9 +61,10 @@ run_cal = function(site_id, years=1979:2012, driver_function=get_driver_path, nm #rename for dplyr nhd_id = site_id + data(wtemp) obs = filter(wtemp, site_id == nhd_id) %>% - transmute(DateTime=date, Depth=depth, temp=wtemp) - + transmute(DateTime=date, Depth=depth, temp=wtemp) + #having a weird issue with resample_to_field, make unique obs = obs[!duplicated(obs[,1:2]), ] @@ -79,16 +80,17 @@ run_cal = function(site_id, years=1979:2012, driver_function=get_driver_path, nm kd_avg = secchi_conv/mean(kds$secchi_avg, na.rm=TRUE) prep_run_glm_kd(site_id=site_id, - path=run_dir, - years=years, - kd=kd_avg, - nml_args=c(list( - dt=3600, subdaily=FALSE, nsave=24, - timezone=-6, - csv_point_nlevs=0, - snow_albedo_factor=1.1, - meteo_fl=driver_path), - nml_args)) + path=run_dir, + years=years, + kd=kd_avg, + nml_args=c(list( + dt=3600, subdaily=FALSE, nsave=24, + timezone=-6, + csv_point_nlevs=0, + snow_albedo_factor=1.1, + meteo_fl=driver_path, + cd=getCD(site_id, method='Hondzo')), + nml_args)) cal.data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv')) cal.data$site_id = site_id @@ -96,11 +98,11 @@ run_cal = function(site_id, years=1979:2012, driver_function=get_driver_path, nm unlink(run_dir, recursive=TRUE) return(cal.data) - + }, error=function(e){ - unlink(run_dir, recursive=TRUE) - return(paste(e$call, e$message, Sys.info()["nodename"])) - }) + unlink(run_dir, recursive=TRUE) + return(paste(e$call, e$message, Sys.info()["nodename"])) + }) } @@ -116,16 +118,16 @@ driver_fun = function(site_id){ #we want only ramdisk enabled nodes #ramdisk = clusterCall(c1, function(){file.exists('/mnt/ramdisk')}) #c1 = c1[unlist(ramdisk)] - +data(wtemp) to_run = unique(intersect(intersect(wtemp$site_id, zmax$site_id), secchi$site_id)) #to_run = to_run[to_run %in% read.table('~/managed_lakes_wilma_nhd.tsv', sep='\t', header=TRUE, as.is=TRUE)$id] -run_name = '2016-03-31_cal_NLDAS' -run_comment = "Full calibration run with all available wtemp, secchi and depth" +run_name = '2016-04-06_cal_NLDAS' +run_comment = "Markfort sheltering model for full calibration run with all available wtemp, secchi and depth. 1980-2014" clusterCall(c1, function(){library(mda.lakes);set_driver_url(driver_url)}) -out = clusterApplyLB(c1, to_run, run_cal, driver_function = driver_fun, nml_args=list(), years=1980:1999, datasource=NA) +out = clusterApplyLB(c1, to_run, run_cal, driver_function = driver_fun, nml_args=list(), years=1980:2014, datasource=NA) #out = clusterApplyLB(c1, to_run[1:50], run_cal) @@ -139,7 +141,32 @@ write.table(out_df, paste0('c:/WiLMA/results/', run_name, '.tsv'), sep='\t', row out_err = out[!unlist(lapply(out, inherits, what='data.frame'))] save(out_err, file = paste0('c:/WiLMA/results/', run_name, '.Rdata')) -rmarkdown::render('demo/document_calibration_overview_template.Rmd', output_file=paste0(run_name,'.pdf'), +rmarkdown::render('demo/document_calibration_overview_template.Rmd', output_file=paste0(run_name,'.html'), + output_dir='c:/WiLMA/results/', params=list(out_df=out_df, run_message=run_comment)) + + +sqrt(mean((out_df$Observed_temp - out_df$Modeled_temp)^2, na.rm=TRUE)) +mean(out_df$Observed_temp - out_df$Modeled_temp, na.rm=TRUE) + +run_comment = "Markfort sheltering model for full calibration run with all available wtemp, secchi and depth. 1980-2014" + +clusterCall(c1, function(){library(mda.lakes);set_driver_url(driver_url)}) + +out = clusterApplyLB(c1, to_run, run_cal, driver_function = driver_fun, nml_args=list(), years=1980:2014, datasource=NA) + +#out = clusterApplyLB(c1, to_run[1:50], run_cal) + +sprintf('%i lakes ran\n', sum(unlist(lapply(out, inherits, what='data.frame')))) + + +##results +out_df = do.call('rbind', out[unlist(lapply(out, inherits, what='data.frame'))]) +write.table(out_df, paste0('c:/WiLMA/results/', run_name, '.tsv'), sep='\t', row.names=FALSE) + +out_err = out[!unlist(lapply(out, inherits, what='data.frame'))] +save(out_err, file = paste0('c:/WiLMA/results/', run_name, '.Rdata')) + +rmarkdown::render('demo/document_calibration_overview_template.Rmd', output_file=paste0(run_name,'.html'), output_dir='c:/WiLMA/results/', params=list(out_df=out_df, run_message=run_comment)) diff --git a/demo/document_calibration_overview_template.Rmd b/demo/document_calibration_overview_template.Rmd index 91ae02f..1ccd55a 100644 --- a/demo/document_calibration_overview_template.Rmd +++ b/demo/document_calibration_overview_template.Rmd @@ -2,7 +2,7 @@ title: "Cal/Val Summary" author: "Luke Winslow" date: "March 22, 2016" -output: pdf_document +output: html_document params: out_df: !r data.frame() run_message: !r "default message" @@ -13,10 +13,11 @@ Run description: `r params$run_message` -```{r, echo=FALSE, message=FALSE} +```{r, echo=FALSE, message=FALSE, warning=FALSE} library(lubridate) library(glmtools) library(mda.lakes) +library(plyr) out_df = params$out_df @@ -28,6 +29,20 @@ out_df$month = month(out_df$DateTime) out_df$modLessObs = out_df$Modeled_temp - out_df$Observed_temp out_df$Depth_1 = floor(out_df$Depth/1)*1 +epi = calc_mod_obs_metric(out_df, 'epi.temperature') + +hypo = calc_mod_obs_metric(out_df, 'hypo.temperature') + +metric_calc_bylake = function(df){ + data.frame(rmse=sqrt(mean((df$mod - df$obs)^2, na.rm=TRUE)), bias=mean(df$mod - df$obs, na.rm=TRUE), ndates=nrow(df)) +} + + +epi_bylake = ddply(epi, 'site_id', metric_calc_bylake) +hypo_bylake = ddply(hypo, 'site_id', metric_calc_bylake) + +epi_bylake = subset(epi_bylake, ndates >=10) +hypo_bylake = subset(hypo_bylake, ndates >=10) ``` @@ -51,11 +66,24 @@ n lakes | `r length(unique(out_df$site_id)) ` RMSE | `r sqrt(mean((out_df$modLessObs)^2, na.rm=TRUE)) ` Mean error| `r mean(out_df$modLessObs, na.rm=TRUE)` +# Epi and Hypo lake-specific RMSE +These are RMSE calculated for each lake, then the 50th and 95th percentile of those per-lake values +are presented. +Layer | 50% | 95% +------|-----|----- +Epi | `r quantile(epi_bylake$rmse, probs=0.5)` | `r quantile(epi_bylake$rmse, probs=0.95)` +Hypo RMSE | `r quantile(hypo_bylake$rmse, probs=0.5)` | `r quantile(hypo_bylake$rmse, probs=0.95)` + +#Epi and Hypo lake-specific Bias stats + +Layer | 2.5% | 50% | 97.5 +------|-----|-----|------- +Epi Bias | `r quantile(epi_bylake$bias, probs=0.025)` | `r quantile(epi_bylake$bias, probs=0.5)` | `r quantile(epi_bylake$bias, probs=0.975)` +Hypo Bias | `r quantile(hypo_bylake$bias, probs=0.025)` | `r quantile(hypo_bylake$bias, probs=0.5)` | `r quantile(hypo_bylake$bias, probs=0.975)` -\newpage #Overall observed vs model: ```{r, echo=FALSE, fig.width=5, fig.height=7, warning=FALSE} @@ -65,12 +93,11 @@ plot(out_df$Observed_temp, out_df$Modeled_temp, col=rgb(0, 0, 0, 0.1), pch=16, x abline(0, 1) -epi = calc_mod_obs_metric(out_df, 'epi.temperature') plot(epi$obs, epi$mod, col=rgb(0, 0, 0, 0.1), pch=16, xlim=c(0,30), ylim=c(0,30), ylab='Modeled Epi', xaxt='n') abline(0, 1) -hypo = calc_mod_obs_metric(out_df, 'hypo.temperature') + plot(hypo$obs, hypo$mod, col=rgb(0, 0, 0, 0.1), pch=16, xlim=c(0,30), ylim=c(0,30), ylab='Modeled Hypo', xlab='Observed') abline(0, 1) @@ -83,7 +110,9 @@ Epi RMSE | `r sqrt(mean((epi$mod - epi$obs)^2, na.rm=TRUE))` | `r mean(epi$mod Hypo RMSE | `r sqrt(mean((hypo$mod - hypo$obs)^2, na.rm=TRUE))` | `r mean(hypo$mod - hypo$obs, na.rm=TRUE)` -\newpage + + + #Residuals across depth: ```{r, echo=FALSE} @@ -91,7 +120,7 @@ boxplot(modLessObs~Depth_1, out_df, ylab="Model - Observation", xlab='Depth (m)' abline(h=0) ``` -\newpage + #Seasonality of residuals: ```{r, echo=FALSE} @@ -101,4 +130,89 @@ abline(h=0) ``` +```{r, echo=FALSE} + +library(leaflet) +library(plyr) +library(lakeattributes) + +lake_stats = ddply(out_df, 'site_id', function(df){ + + rmse = sqrt(mean((df$Observed_temp - df$Modeled_temp)^2, na.rm=TRUE)) + bias = mean(df$Modeled_temp - df$Observed_temp, na.rm=TRUE) + n = nrow(df) + data.frame(rmse, bias, n) + +}) + +data(canopy) + +lake_stats = merge(lake_stats, lakeattributes::location, by='site_id') +lake_stats = merge(lake_stats, lakeattributes::area, by='site_id') +lake_stats = merge(lake_stats, lakeattributes::zmax, by='site_id') +lake_stats$secchi = 1.7/get_kd_avg(lake_stats$site_id)$kd_avg +lake_stats = merge(lake_stats, canopy, by='site_id') + +``` + +# +```{r, echo=FALSE} + +par(mfrow=c(2,1), mar=c(0,4,1,1), oma=c(4,0,0,0)) +plot((lake_stats$area_m2), lake_stats$bias, xlab='Lake area m^2', ylab='by-obs Bias', xaxt='n', log='x') +plot((lake_stats$area_m2), lake_stats$rmse, xlab='Lake area m^2', ylab='by-obs RMSE', log='x') +mtext(text='Log10(lake area m^2)', side=1, line=2) +``` + +```{r, echo=FALSE} +par(mfrow=c(2,1), mar=c(0,4,1,1), oma=c(4,0,0,0)) +plot(lake_stats$secchi, lake_stats$bias, xlab='Secchi(m)', ylab='by-obs Bias', xaxt='n') +plot(lake_stats$secchi, lake_stats$rmse, xlab='Secchi(m)', ylab='by-obs RMSE') +mtext(text='Secchi(m)', side=1, line=2) +``` + +```{r, echo=FALSE} +par(mfrow=c(2,1), mar=c(0,4,1,1), oma=c(4,0,0,0)) +plot(lake_stats$zmax_m, lake_stats$bias, log='x', xlab='Log(zmax)', ylab='by-obs Bias', xaxt='n') +plot(lake_stats$zmax_m, lake_stats$rmse, log='x', xlab='Log(zmax)', ylab='by-obs RMSE') +mtext(text='zmax', side=1, line=2) +# +# par(mfrow=c(2,1)) +# boxplot(bias~canopy_m, lake_stats) +# abline(h=0) +# height = as.numeric(names(tmp)) +# barplot(height~tmp) + + +``` + +## Map of Bias + +```{r, echo=FALSE} + +m = leaflet() %>% addTiles() %>% + addCircleMarkers(lng=lake_stats$lon, lat=lake_stats$lat, radius=4, stroke=TRUE, weight=1, + fillColor=colorNumeric(c('Blue', 'White', "Red"), (lake_stats$bias))((lake_stats$bias)), + fillOpacity=1, popup=paste0('Bias: ', lake_stats$bias, '
RMSE:', lake_stats$rmse, '
N:', + lake_stats$n, '
ID:', lake_stats$site_id)) + +m + + +``` + +## Map of RMSE + +```{r, echo=FALSE} + +m = leaflet() %>% addTiles() %>% + addCircleMarkers(lng=lake_stats$lon, lat=lake_stats$lat, radius=4, stroke=TRUE, weight=1, + fillColor=colorNumeric(c('White', "Red"), (lake_stats$rmse))((lake_stats$rmse)), + fillOpacity=1, popup=paste0('Bias: ', lake_stats$bias, '
RMSE:', lake_stats$rmse, '
N:', + lake_stats$n, '
ID:', lake_stats$site_id)) + +m + + +```