From 7902db9590eb8cba20468649b8f4b1fe88296562 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Mon, 11 Apr 2016 14:21:20 -0500
Subject: [PATCH] updates to run documentation #70

---
 demo/cluster_cont_calibrate_bias.R            |  73 ++++++----
 ...document_calibration_overview_template.Rmd | 128 +++++++++++++++++-
 2 files changed, 171 insertions(+), 30 deletions(-)

diff --git a/demo/cluster_cont_calibrate_bias.R b/demo/cluster_cont_calibrate_bias.R
index a3ca6f3..59c852b 100644
--- a/demo/cluster_cont_calibrate_bias.R
+++ b/demo/cluster_cont_calibrate_bias.R
@@ -28,7 +28,7 @@ clusterCall(c1, function(){library(devtools)})
 glmr_install     = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/GLMr_3.1.10.tar.gz'))})
 glmtools_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/glmtools_0.13.0.tar.gz'))})
 lakeattr_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/lakeattributes_0.8.4.tar.gz'))})
-mdalakes_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/mda.lakes_3.0.3.tar.gz'))})
+mdalakes_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/mda.lakes_3.0.4.tar.gz'))})
 
 library(lakeattributes)
 library(mda.lakes)
@@ -38,7 +38,7 @@ library(glmtools)
 ## setup run function
 
 run_cal = function(site_id, years=1979:2012, driver_function=get_driver_path, nml_args=list(), datasource=NA){
-
+  
   
   library(lakeattributes)
   library(mda.lakes)
@@ -61,9 +61,10 @@ run_cal = function(site_id, years=1979:2012, driver_function=get_driver_path, nm
     #rename for dplyr
     nhd_id = site_id
     
+    data(wtemp)
     obs = filter(wtemp, site_id == nhd_id) %>%
-            transmute(DateTime=date, Depth=depth, temp=wtemp)
-      
+      transmute(DateTime=date, Depth=depth, temp=wtemp)
+    
     #having a weird issue with resample_to_field, make unique
     obs = obs[!duplicated(obs[,1:2]), ]
     
@@ -79,16 +80,17 @@ run_cal = function(site_id, years=1979:2012, driver_function=get_driver_path, nm
     kd_avg = secchi_conv/mean(kds$secchi_avg, na.rm=TRUE)
     
     prep_run_glm_kd(site_id=site_id, 
-                            path=run_dir, 
-                            years=years,
-                            kd=kd_avg, 
-                            nml_args=c(list(
-                              dt=3600, subdaily=FALSE, nsave=24, 
-                              timezone=-6,
-                              csv_point_nlevs=0, 
-                              snow_albedo_factor=1.1, 
-                              meteo_fl=driver_path), 
-                              nml_args))
+                    path=run_dir, 
+                    years=years,
+                    kd=kd_avg, 
+                    nml_args=c(list(
+                      dt=3600, subdaily=FALSE, nsave=24, 
+                      timezone=-6,
+                      csv_point_nlevs=0, 
+                      snow_albedo_factor=1.1, 
+                      meteo_fl=driver_path, 
+                      cd=getCD(site_id, method='Hondzo')), 
+                      nml_args))
     
     cal.data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv'))
     cal.data$site_id = site_id
@@ -96,11 +98,11 @@ run_cal = function(site_id, years=1979:2012, driver_function=get_driver_path, nm
     unlink(run_dir, recursive=TRUE)
     
     return(cal.data)
-  
+    
   }, error=function(e){
-      unlink(run_dir, recursive=TRUE)
-      return(paste(e$call, e$message, Sys.info()["nodename"]))
-    })
+    unlink(run_dir, recursive=TRUE)
+    return(paste(e$call, e$message, Sys.info()["nodename"]))
+  })
 }
 
 
@@ -116,16 +118,16 @@ driver_fun = function(site_id){
 #we want only ramdisk enabled nodes
 #ramdisk = clusterCall(c1, function(){file.exists('/mnt/ramdisk')})
 #c1 = c1[unlist(ramdisk)]
-
+data(wtemp)
 to_run = unique(intersect(intersect(wtemp$site_id, zmax$site_id), secchi$site_id))
 #to_run = to_run[to_run %in% read.table('~/managed_lakes_wilma_nhd.tsv', sep='\t', header=TRUE, as.is=TRUE)$id]
 
-run_name = '2016-03-31_cal_NLDAS'
-run_comment = "Full calibration run with all available wtemp, secchi and depth"
+run_name = '2016-04-06_cal_NLDAS'
+run_comment = "Markfort sheltering model for full calibration run with all available wtemp, secchi and depth. 1980-2014"
 
 clusterCall(c1, function(){library(mda.lakes);set_driver_url(driver_url)})
 
-out = clusterApplyLB(c1, to_run, run_cal, driver_function = driver_fun, nml_args=list(), years=1980:1999, datasource=NA)
+out = clusterApplyLB(c1, to_run, run_cal, driver_function = driver_fun, nml_args=list(), years=1980:2014, datasource=NA)
 
 #out = clusterApplyLB(c1, to_run[1:50], run_cal)
 
@@ -139,7 +141,32 @@ write.table(out_df, paste0('c:/WiLMA/results/', run_name, '.tsv'), sep='\t', row
 out_err = out[!unlist(lapply(out, inherits, what='data.frame'))]
 save(out_err, file = paste0('c:/WiLMA/results/', run_name, '.Rdata'))
 
-rmarkdown::render('demo/document_calibration_overview_template.Rmd', output_file=paste0(run_name,'.pdf'), 
+rmarkdown::render('demo/document_calibration_overview_template.Rmd', output_file=paste0(run_name,'.html'), 
+                  output_dir='c:/WiLMA/results/', params=list(out_df=out_df, run_message=run_comment))
+
+
+sqrt(mean((out_df$Observed_temp - out_df$Modeled_temp)^2, na.rm=TRUE))
+mean(out_df$Observed_temp - out_df$Modeled_temp, na.rm=TRUE)
+
+run_comment = "Markfort sheltering model for full calibration run with all available wtemp, secchi and depth. 1980-2014"
+
+clusterCall(c1, function(){library(mda.lakes);set_driver_url(driver_url)})
+
+out = clusterApplyLB(c1, to_run, run_cal, driver_function = driver_fun, nml_args=list(), years=1980:2014, datasource=NA)
+
+#out = clusterApplyLB(c1, to_run[1:50], run_cal)
+
+sprintf('%i lakes ran\n', sum(unlist(lapply(out, inherits, what='data.frame'))))
+
+
+##results
+out_df = do.call('rbind', out[unlist(lapply(out, inherits, what='data.frame'))])
+write.table(out_df, paste0('c:/WiLMA/results/', run_name, '.tsv'), sep='\t', row.names=FALSE)
+
+out_err = out[!unlist(lapply(out, inherits, what='data.frame'))]
+save(out_err, file = paste0('c:/WiLMA/results/', run_name, '.Rdata'))
+
+rmarkdown::render('demo/document_calibration_overview_template.Rmd', output_file=paste0(run_name,'.html'), 
                   output_dir='c:/WiLMA/results/', params=list(out_df=out_df, run_message=run_comment))
 
 
diff --git a/demo/document_calibration_overview_template.Rmd b/demo/document_calibration_overview_template.Rmd
index 91ae02f..1ccd55a 100644
--- a/demo/document_calibration_overview_template.Rmd
+++ b/demo/document_calibration_overview_template.Rmd
@@ -2,7 +2,7 @@
 title: "Cal/Val Summary"
 author: "Luke Winslow"
 date: "March 22, 2016"
-output: pdf_document
+output: html_document
 params:
   out_df: !r data.frame()
   run_message: !r "default message"
@@ -13,10 +13,11 @@ Run description: `r params$run_message`
 
 
 
-```{r, echo=FALSE, message=FALSE}
+```{r, echo=FALSE, message=FALSE, warning=FALSE}
 library(lubridate)
 library(glmtools)
 library(mda.lakes)
+library(plyr)
 
 out_df = params$out_df
 
@@ -28,6 +29,20 @@ out_df$month      = month(out_df$DateTime)
 out_df$modLessObs = out_df$Modeled_temp - out_df$Observed_temp
 out_df$Depth_1   = floor(out_df$Depth/1)*1
 
+epi = calc_mod_obs_metric(out_df, 'epi.temperature')
+
+hypo = calc_mod_obs_metric(out_df, 'hypo.temperature')
+
+metric_calc_bylake = function(df){
+  data.frame(rmse=sqrt(mean((df$mod - df$obs)^2, na.rm=TRUE)), bias=mean(df$mod - df$obs, na.rm=TRUE), ndates=nrow(df))
+}
+
+
+epi_bylake = ddply(epi, 'site_id', metric_calc_bylake)
+hypo_bylake = ddply(hypo, 'site_id', metric_calc_bylake)
+
+epi_bylake = subset(epi_bylake, ndates >=10)
+hypo_bylake = subset(hypo_bylake, ndates >=10)
 
 ```
 
@@ -51,11 +66,24 @@ n lakes   | `r length(unique(out_df$site_id)) `
 RMSE      | `r sqrt(mean((out_df$modLessObs)^2, na.rm=TRUE)) `
 Mean error| `r mean(out_df$modLessObs, na.rm=TRUE)`
 
+# Epi and Hypo lake-specific RMSE
 
+These are RMSE calculated for each lake, then the 50th and 95th percentile of those per-lake values 
+are presented. 
 
+Layer | 50% | 95% 
+------|-----|-----
+Epi | `r quantile(epi_bylake$rmse, probs=0.5)` | `r quantile(epi_bylake$rmse, probs=0.95)`
+Hypo RMSE | `r quantile(hypo_bylake$rmse, probs=0.5)` | `r quantile(hypo_bylake$rmse, probs=0.95)` 
+
+#Epi and Hypo lake-specific Bias stats
+
+Layer | 2.5% | 50% | 97.5 
+------|-----|-----|-------
+Epi Bias  | `r quantile(epi_bylake$bias, probs=0.025)` | `r quantile(epi_bylake$bias, probs=0.5)` | `r quantile(epi_bylake$bias, probs=0.975)` 
+Hypo Bias | `r quantile(hypo_bylake$bias, probs=0.025)` | `r quantile(hypo_bylake$bias, probs=0.5)` | `r quantile(hypo_bylake$bias, probs=0.975)` 
 
 
-\newpage
 #Overall observed vs model:
 ```{r, echo=FALSE, fig.width=5, fig.height=7, warning=FALSE}
 
@@ -65,12 +93,11 @@ plot(out_df$Observed_temp, out_df$Modeled_temp, col=rgb(0, 0, 0, 0.1), pch=16, x
 
 abline(0, 1)
 
-epi = calc_mod_obs_metric(out_df, 'epi.temperature')
 
 plot(epi$obs, epi$mod, col=rgb(0, 0, 0, 0.1), pch=16, xlim=c(0,30), ylim=c(0,30), ylab='Modeled Epi', xaxt='n')
 abline(0, 1)
 
-hypo = calc_mod_obs_metric(out_df, 'hypo.temperature')
+
 plot(hypo$obs, hypo$mod, col=rgb(0, 0, 0, 0.1), pch=16, xlim=c(0,30), ylim=c(0,30), ylab='Modeled Hypo', xlab='Observed')
 abline(0, 1)
 
@@ -83,7 +110,9 @@ Epi RMSE  | `r sqrt(mean((epi$mod - epi$obs)^2, na.rm=TRUE))` | `r mean(epi$mod
 Hypo RMSE | `r sqrt(mean((hypo$mod - hypo$obs)^2, na.rm=TRUE))` | `r mean(hypo$mod - hypo$obs, na.rm=TRUE)`
 
 
-\newpage
+
+
+
 #Residuals across depth:
 
 ```{r, echo=FALSE}
@@ -91,7 +120,7 @@ boxplot(modLessObs~Depth_1, out_df, ylab="Model - Observation", xlab='Depth (m)'
 abline(h=0)
 ```
 
-\newpage
+
 #Seasonality of residuals:
 
 ```{r, echo=FALSE}
@@ -101,4 +130,89 @@ abline(h=0)
 ```
 
 
+```{r, echo=FALSE}
+
+library(leaflet)
+library(plyr)
+library(lakeattributes)
+
+lake_stats = ddply(out_df, 'site_id', function(df){
+  
+  rmse = sqrt(mean((df$Observed_temp - df$Modeled_temp)^2, na.rm=TRUE))
+  bias = mean(df$Modeled_temp - df$Observed_temp, na.rm=TRUE)
+  n = nrow(df)
+  data.frame(rmse, bias, n)
+  
+})
+
+data(canopy)
+
+lake_stats = merge(lake_stats, lakeattributes::location, by='site_id')
+lake_stats = merge(lake_stats, lakeattributes::area, by='site_id')
+lake_stats = merge(lake_stats, lakeattributes::zmax, by='site_id')
+lake_stats$secchi = 1.7/get_kd_avg(lake_stats$site_id)$kd_avg
+lake_stats = merge(lake_stats, canopy, by='site_id')
+
+```
+
+#
+```{r, echo=FALSE}
+
+par(mfrow=c(2,1), mar=c(0,4,1,1), oma=c(4,0,0,0))
+plot((lake_stats$area_m2), lake_stats$bias, xlab='Lake area m^2', ylab='by-obs Bias', xaxt='n', log='x')
+plot((lake_stats$area_m2), lake_stats$rmse, xlab='Lake area m^2', ylab='by-obs RMSE', log='x')
+mtext(text='Log10(lake area m^2)', side=1, line=2)
 
+```
+
+```{r, echo=FALSE}
+par(mfrow=c(2,1), mar=c(0,4,1,1), oma=c(4,0,0,0))
+plot(lake_stats$secchi, lake_stats$bias, xlab='Secchi(m)', ylab='by-obs Bias', xaxt='n')
+plot(lake_stats$secchi, lake_stats$rmse, xlab='Secchi(m)', ylab='by-obs RMSE')
+mtext(text='Secchi(m)', side=1, line=2)
+```
+
+```{r, echo=FALSE}
+par(mfrow=c(2,1), mar=c(0,4,1,1), oma=c(4,0,0,0))
+plot(lake_stats$zmax_m, lake_stats$bias, log='x', xlab='Log(zmax)', ylab='by-obs Bias', xaxt='n')
+plot(lake_stats$zmax_m, lake_stats$rmse, log='x', xlab='Log(zmax)', ylab='by-obs RMSE')
+mtext(text='zmax', side=1, line=2)
+# 
+# par(mfrow=c(2,1))
+# boxplot(bias~canopy_m, lake_stats)
+# abline(h=0)
+# height = as.numeric(names(tmp))
+# barplot(height~tmp)
+
+
+```
+
+## Map of Bias
+
+```{r, echo=FALSE}
+
+m = leaflet() %>% addTiles() %>% 
+  addCircleMarkers(lng=lake_stats$lon, lat=lake_stats$lat, radius=4, stroke=TRUE, weight=1, 
+                   fillColor=colorNumeric(c('Blue', 'White', "Red"), (lake_stats$bias))((lake_stats$bias)),
+                   fillOpacity=1, popup=paste0('Bias: ', lake_stats$bias, '<br>RMSE:', lake_stats$rmse, '<br>N:', 
+                                               lake_stats$n, '<br>ID:', lake_stats$site_id))
+
+m
+
+
+```
+
+## Map of RMSE
+
+```{r, echo=FALSE}
+
+m = leaflet() %>% addTiles() %>% 
+  addCircleMarkers(lng=lake_stats$lon, lat=lake_stats$lat, radius=4, stroke=TRUE, weight=1, 
+                   fillColor=colorNumeric(c('White', "Red"), (lake_stats$rmse))((lake_stats$rmse)),
+                   fillOpacity=1, popup=paste0('Bias: ', lake_stats$bias, '<br>RMSE:', lake_stats$rmse, '<br>N:', 
+                                               lake_stats$n, '<br>ID:', lake_stats$site_id))
+
+m
+
+
+```