Skip to content

Commit

Permalink
updates to run documentation #70
Browse files Browse the repository at this point in the history
  • Loading branch information
Luke Winslow committed Apr 11, 2016
1 parent 9903979 commit 7902db9
Show file tree
Hide file tree
Showing 2 changed files with 171 additions and 30 deletions.
73 changes: 50 additions & 23 deletions demo/cluster_cont_calibrate_bias.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ clusterCall(c1, function(){library(devtools)})
glmr_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/GLMr_3.1.10.tar.gz'))})
glmtools_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/glmtools_0.13.0.tar.gz'))})
lakeattr_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/lakeattributes_0.8.4.tar.gz'))})
mdalakes_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/mda.lakes_3.0.3.tar.gz'))})
mdalakes_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/mda.lakes_3.0.4.tar.gz'))})

library(lakeattributes)
library(mda.lakes)
Expand All @@ -38,7 +38,7 @@ library(glmtools)
## setup run function

run_cal = function(site_id, years=1979:2012, driver_function=get_driver_path, nml_args=list(), datasource=NA){


library(lakeattributes)
library(mda.lakes)
Expand All @@ -61,9 +61,10 @@ run_cal = function(site_id, years=1979:2012, driver_function=get_driver_path, nm
#rename for dplyr
nhd_id = site_id

data(wtemp)
obs = filter(wtemp, site_id == nhd_id) %>%
transmute(DateTime=date, Depth=depth, temp=wtemp)
transmute(DateTime=date, Depth=depth, temp=wtemp)

#having a weird issue with resample_to_field, make unique
obs = obs[!duplicated(obs[,1:2]), ]

Expand All @@ -79,28 +80,29 @@ run_cal = function(site_id, years=1979:2012, driver_function=get_driver_path, nm
kd_avg = secchi_conv/mean(kds$secchi_avg, na.rm=TRUE)

prep_run_glm_kd(site_id=site_id,
path=run_dir,
years=years,
kd=kd_avg,
nml_args=c(list(
dt=3600, subdaily=FALSE, nsave=24,
timezone=-6,
csv_point_nlevs=0,
snow_albedo_factor=1.1,
meteo_fl=driver_path),
nml_args))
path=run_dir,
years=years,
kd=kd_avg,
nml_args=c(list(
dt=3600, subdaily=FALSE, nsave=24,
timezone=-6,
csv_point_nlevs=0,
snow_albedo_factor=1.1,
meteo_fl=driver_path,
cd=getCD(site_id, method='Hondzo')),
nml_args))

cal.data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv'))
cal.data$site_id = site_id

unlink(run_dir, recursive=TRUE)

return(cal.data)

}, error=function(e){
unlink(run_dir, recursive=TRUE)
return(paste(e$call, e$message, Sys.info()["nodename"]))
})
unlink(run_dir, recursive=TRUE)
return(paste(e$call, e$message, Sys.info()["nodename"]))
})
}


Expand All @@ -116,16 +118,16 @@ driver_fun = function(site_id){
#we want only ramdisk enabled nodes
#ramdisk = clusterCall(c1, function(){file.exists('/mnt/ramdisk')})
#c1 = c1[unlist(ramdisk)]

data(wtemp)
to_run = unique(intersect(intersect(wtemp$site_id, zmax$site_id), secchi$site_id))
#to_run = to_run[to_run %in% read.table('~/managed_lakes_wilma_nhd.tsv', sep='\t', header=TRUE, as.is=TRUE)$id]

run_name = '2016-03-31_cal_NLDAS'
run_comment = "Full calibration run with all available wtemp, secchi and depth"
run_name = '2016-04-06_cal_NLDAS'
run_comment = "Markfort sheltering model for full calibration run with all available wtemp, secchi and depth. 1980-2014"

clusterCall(c1, function(){library(mda.lakes);set_driver_url(driver_url)})

out = clusterApplyLB(c1, to_run, run_cal, driver_function = driver_fun, nml_args=list(), years=1980:1999, datasource=NA)
out = clusterApplyLB(c1, to_run, run_cal, driver_function = driver_fun, nml_args=list(), years=1980:2014, datasource=NA)

#out = clusterApplyLB(c1, to_run[1:50], run_cal)

Expand All @@ -139,7 +141,32 @@ write.table(out_df, paste0('c:/WiLMA/results/', run_name, '.tsv'), sep='\t', row
out_err = out[!unlist(lapply(out, inherits, what='data.frame'))]
save(out_err, file = paste0('c:/WiLMA/results/', run_name, '.Rdata'))

rmarkdown::render('demo/document_calibration_overview_template.Rmd', output_file=paste0(run_name,'.pdf'),
rmarkdown::render('demo/document_calibration_overview_template.Rmd', output_file=paste0(run_name,'.html'),
output_dir='c:/WiLMA/results/', params=list(out_df=out_df, run_message=run_comment))


sqrt(mean((out_df$Observed_temp - out_df$Modeled_temp)^2, na.rm=TRUE))
mean(out_df$Observed_temp - out_df$Modeled_temp, na.rm=TRUE)

run_comment = "Markfort sheltering model for full calibration run with all available wtemp, secchi and depth. 1980-2014"

clusterCall(c1, function(){library(mda.lakes);set_driver_url(driver_url)})

out = clusterApplyLB(c1, to_run, run_cal, driver_function = driver_fun, nml_args=list(), years=1980:2014, datasource=NA)

#out = clusterApplyLB(c1, to_run[1:50], run_cal)

sprintf('%i lakes ran\n', sum(unlist(lapply(out, inherits, what='data.frame'))))


##results
out_df = do.call('rbind', out[unlist(lapply(out, inherits, what='data.frame'))])
write.table(out_df, paste0('c:/WiLMA/results/', run_name, '.tsv'), sep='\t', row.names=FALSE)

out_err = out[!unlist(lapply(out, inherits, what='data.frame'))]
save(out_err, file = paste0('c:/WiLMA/results/', run_name, '.Rdata'))

rmarkdown::render('demo/document_calibration_overview_template.Rmd', output_file=paste0(run_name,'.html'),
output_dir='c:/WiLMA/results/', params=list(out_df=out_df, run_message=run_comment))


Expand Down
128 changes: 121 additions & 7 deletions demo/document_calibration_overview_template.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
title: "Cal/Val Summary"
author: "Luke Winslow"
date: "March 22, 2016"
output: pdf_document
output: html_document
params:
out_df: !r data.frame()
run_message: !r "default message"
Expand All @@ -13,10 +13,11 @@ Run description: `r params$run_message`



```{r, echo=FALSE, message=FALSE}
```{r, echo=FALSE, message=FALSE, warning=FALSE}
library(lubridate)
library(glmtools)
library(mda.lakes)
library(plyr)
out_df = params$out_df
Expand All @@ -28,6 +29,20 @@ out_df$month = month(out_df$DateTime)
out_df$modLessObs = out_df$Modeled_temp - out_df$Observed_temp
out_df$Depth_1 = floor(out_df$Depth/1)*1
epi = calc_mod_obs_metric(out_df, 'epi.temperature')
hypo = calc_mod_obs_metric(out_df, 'hypo.temperature')
metric_calc_bylake = function(df){
data.frame(rmse=sqrt(mean((df$mod - df$obs)^2, na.rm=TRUE)), bias=mean(df$mod - df$obs, na.rm=TRUE), ndates=nrow(df))
}
epi_bylake = ddply(epi, 'site_id', metric_calc_bylake)
hypo_bylake = ddply(hypo, 'site_id', metric_calc_bylake)
epi_bylake = subset(epi_bylake, ndates >=10)
hypo_bylake = subset(hypo_bylake, ndates >=10)
```

Expand All @@ -51,11 +66,24 @@ n lakes | `r length(unique(out_df$site_id)) `
RMSE | `r sqrt(mean((out_df$modLessObs)^2, na.rm=TRUE)) `
Mean error| `r mean(out_df$modLessObs, na.rm=TRUE)`

# Epi and Hypo lake-specific RMSE

These are RMSE calculated for each lake, then the 50th and 95th percentile of those per-lake values
are presented.

Layer | 50% | 95%
------|-----|-----
Epi | `r quantile(epi_bylake$rmse, probs=0.5)` | `r quantile(epi_bylake$rmse, probs=0.95)`
Hypo RMSE | `r quantile(hypo_bylake$rmse, probs=0.5)` | `r quantile(hypo_bylake$rmse, probs=0.95)`

#Epi and Hypo lake-specific Bias stats

Layer | 2.5% | 50% | 97.5
------|-----|-----|-------
Epi Bias | `r quantile(epi_bylake$bias, probs=0.025)` | `r quantile(epi_bylake$bias, probs=0.5)` | `r quantile(epi_bylake$bias, probs=0.975)`
Hypo Bias | `r quantile(hypo_bylake$bias, probs=0.025)` | `r quantile(hypo_bylake$bias, probs=0.5)` | `r quantile(hypo_bylake$bias, probs=0.975)`


\newpage
#Overall observed vs model:
```{r, echo=FALSE, fig.width=5, fig.height=7, warning=FALSE}
Expand All @@ -65,12 +93,11 @@ plot(out_df$Observed_temp, out_df$Modeled_temp, col=rgb(0, 0, 0, 0.1), pch=16, x
abline(0, 1)
epi = calc_mod_obs_metric(out_df, 'epi.temperature')
plot(epi$obs, epi$mod, col=rgb(0, 0, 0, 0.1), pch=16, xlim=c(0,30), ylim=c(0,30), ylab='Modeled Epi', xaxt='n')
abline(0, 1)
hypo = calc_mod_obs_metric(out_df, 'hypo.temperature')
plot(hypo$obs, hypo$mod, col=rgb(0, 0, 0, 0.1), pch=16, xlim=c(0,30), ylim=c(0,30), ylab='Modeled Hypo', xlab='Observed')
abline(0, 1)
Expand All @@ -83,15 +110,17 @@ Epi RMSE | `r sqrt(mean((epi$mod - epi$obs)^2, na.rm=TRUE))` | `r mean(epi$mod
Hypo RMSE | `r sqrt(mean((hypo$mod - hypo$obs)^2, na.rm=TRUE))` | `r mean(hypo$mod - hypo$obs, na.rm=TRUE)`


\newpage



#Residuals across depth:

```{r, echo=FALSE}
boxplot(modLessObs~Depth_1, out_df, ylab="Model - Observation", xlab='Depth (m)', ylim=c(-10, 10))
abline(h=0)
```

\newpage

#Seasonality of residuals:

```{r, echo=FALSE}
Expand All @@ -101,4 +130,89 @@ abline(h=0)
```


```{r, echo=FALSE}
library(leaflet)
library(plyr)
library(lakeattributes)
lake_stats = ddply(out_df, 'site_id', function(df){
rmse = sqrt(mean((df$Observed_temp - df$Modeled_temp)^2, na.rm=TRUE))
bias = mean(df$Modeled_temp - df$Observed_temp, na.rm=TRUE)
n = nrow(df)
data.frame(rmse, bias, n)
})
data(canopy)
lake_stats = merge(lake_stats, lakeattributes::location, by='site_id')
lake_stats = merge(lake_stats, lakeattributes::area, by='site_id')
lake_stats = merge(lake_stats, lakeattributes::zmax, by='site_id')
lake_stats$secchi = 1.7/get_kd_avg(lake_stats$site_id)$kd_avg
lake_stats = merge(lake_stats, canopy, by='site_id')
```

#
```{r, echo=FALSE}
par(mfrow=c(2,1), mar=c(0,4,1,1), oma=c(4,0,0,0))
plot((lake_stats$area_m2), lake_stats$bias, xlab='Lake area m^2', ylab='by-obs Bias', xaxt='n', log='x')
plot((lake_stats$area_m2), lake_stats$rmse, xlab='Lake area m^2', ylab='by-obs RMSE', log='x')
mtext(text='Log10(lake area m^2)', side=1, line=2)
```

```{r, echo=FALSE}
par(mfrow=c(2,1), mar=c(0,4,1,1), oma=c(4,0,0,0))
plot(lake_stats$secchi, lake_stats$bias, xlab='Secchi(m)', ylab='by-obs Bias', xaxt='n')
plot(lake_stats$secchi, lake_stats$rmse, xlab='Secchi(m)', ylab='by-obs RMSE')
mtext(text='Secchi(m)', side=1, line=2)
```

```{r, echo=FALSE}
par(mfrow=c(2,1), mar=c(0,4,1,1), oma=c(4,0,0,0))
plot(lake_stats$zmax_m, lake_stats$bias, log='x', xlab='Log(zmax)', ylab='by-obs Bias', xaxt='n')
plot(lake_stats$zmax_m, lake_stats$rmse, log='x', xlab='Log(zmax)', ylab='by-obs RMSE')
mtext(text='zmax', side=1, line=2)
#
# par(mfrow=c(2,1))
# boxplot(bias~canopy_m, lake_stats)
# abline(h=0)
# height = as.numeric(names(tmp))
# barplot(height~tmp)
```

## Map of Bias

```{r, echo=FALSE}
m = leaflet() %>% addTiles() %>%
addCircleMarkers(lng=lake_stats$lon, lat=lake_stats$lat, radius=4, stroke=TRUE, weight=1,
fillColor=colorNumeric(c('Blue', 'White', "Red"), (lake_stats$bias))((lake_stats$bias)),
fillOpacity=1, popup=paste0('Bias: ', lake_stats$bias, '<br>RMSE:', lake_stats$rmse, '<br>N:',
lake_stats$n, '<br>ID:', lake_stats$site_id))
m
```

## Map of RMSE

```{r, echo=FALSE}
m = leaflet() %>% addTiles() %>%
addCircleMarkers(lng=lake_stats$lon, lat=lake_stats$lat, radius=4, stroke=TRUE, weight=1,
fillColor=colorNumeric(c('White', "Red"), (lake_stats$rmse))((lake_stats$rmse)),
fillOpacity=1, popup=paste0('Bias: ', lake_stats$bias, '<br>RMSE:', lake_stats$rmse, '<br>N:',
lake_stats$n, '<br>ID:', lake_stats$site_id))
m
```

0 comments on commit 7902db9

Please sign in to comment.