Skip to content

Commit b596d1b

Browse files
authored
Add RMSE to output diagnostics (#73)
1 parent d5e31d2 commit b596d1b

File tree

4 files changed

+19
-11
lines changed

4 files changed

+19
-11
lines changed

NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ importFrom(lubridate,ymd_hms)
3131
importFrom(stats,approx)
3232
importFrom(stats,lm)
3333
importFrom(stats,na.omit)
34+
importFrom(stats,predict)
3435
importFrom(utils,browseURL)
3536
importFrom(utils,head)
3637
importFrom(utils,read.csv)

R/models.R

+15-9
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
#' diffusion theory; see Hutchinson and Mosier (1981) and Nakano et al. (2004).
1515
#'
1616
#' For each model type, the following columns are returned:
17-
#' * Model statistics \code{AIC}, \code{r.squared}, \code{sigma},
18-
#' and \code{p.value};
17+
#' * Model statistics \code{AIC}, \code{r.squared}, \code{RMSE},
18+
#' and \code{p.value};
1919
#' * Flux (slope) statistics \code{flux.estimate} and \code{flux.std.error};
2020
#' * Intercept statistics \code{int.estimate} and \code{int.std.error};
2121
#' * For the robust linear regression model only,
@@ -38,7 +38,7 @@
3838
#' 1981. \url{http://dx.doi.org/10.2136/sssaj1981.03615995004500020017x}
3939
#' @importFrom broom glance tidy
4040
#' @importFrom MASS rlm
41-
#' @importFrom stats lm
41+
#' @importFrom stats lm predict
4242
#' @export
4343
#' @examples
4444
#' # Toy data - linear
@@ -64,9 +64,10 @@ ffi_fit_models <- function(time, conc, area, volume) {
6464
# Linear model overall metrics. 'glance' produces 12 different ones;
6565
# we keep the first 5 (adjR2, R2, sigma, statistic, p-value)
6666
lin_model_stats <- glance(mod)[c("r.squared", "sigma", "p.value", "AIC")]
67+
6768
names(lin_model_stats) <- paste0("lin_", names(lin_model_stats))
6869

69-
# Slope and intercept statistics
70+
# Slope and intercept statistics
7071
tmod <- tidy(mod)
7172
lin_slope_stats <- tmod[2, c("estimate", "std.error")]
7273
names(lin_slope_stats) <- paste0("lin_flux.", names(lin_slope_stats))
@@ -84,11 +85,13 @@ ffi_fit_models <- function(time, conc, area, volume) {
8485
rob_model_stats <- glance(robust)[c("sigma", "converged", "AIC")]
8586
tmod <- tidy(robust)
8687
rob_slope_stats <- tmod[2, c("estimate", "std.error")]
88+
8789
# rob_int_stats <- tmod[1, c("estimate", "std.error")]
8890
},
8991
error = function(e) {
9092
warning("Could not fit robust linear model")
91-
rob_model_stats <- data.frame(sigma = NA_real_, converged = FALSE, AIC = NA_real_)
93+
rob_model_stats <- data.frame(sigma = NA_real_, converged = FALSE,
94+
AIC = NA_real_, RMSE = NA_real_)
9295
rob_slope_stats <- data.frame(estimate = NA_real_, std.error = NA_real_)
9396
# rob_int_stats <- data.frame(estimate = NA_real_, std.error = NA_real_)
9497
})
@@ -101,11 +104,11 @@ ffi_fit_models <- function(time, conc, area, volume) {
101104
# int_stats <- cbind(int_stats, rob_int_stats)
102105

103106
# Add polynomial regression as a QA/QC check
104-
poly_model_stats <- data.frame(r.squared = NA_real_, AIC = NA_real_)
107+
poly_model_stats <- data.frame(r.squared = NA_real_, AIC = NA_real_, RMSE = NA_real_)
105108
if(length(time) > 3) {
106109
try({
107110
poly <- lm(conc ~ poly(time, 3))
108-
poly_model_stats <- glance(poly)[c("r.squared", "AIC")]
111+
poly_model_stats <- glance(poly)[c("r.squared", "sigma", "AIC")]
109112
})
110113
}
111114

@@ -117,11 +120,11 @@ ffi_fit_models <- function(time, conc, area, volume) {
117120

118121
# The HM1981 approach is based on an exponential model, so derive fit
119122
# statistics by log-transforming the data
120-
if(!is.na(slope_stats$HM81_flux.estimate)) {
121-
ffi_message("NOTE: HM81_flux.estimate is not NA, implying nonlinear data")
123+
if(is.na(slope_stats$HM81_flux.estimate)) {
122124
hm81_model_stats <- data.frame(r.squared = NA_real_, sigma = NA_real_,
123125
p.value = NA_real_, AIC = NA_real_)
124126
} else {
127+
ffi_message("NOTE: HM81_flux.estimate is not NA, implying nonlinear data")
125128
# The time values are probably normalized, i.e. starting at zero
126129
# Add a (presumably) tiny offset so we don't get log(0) errors
127130
mod <- lm(conc ~ log(time + 0.01))
@@ -131,6 +134,9 @@ ffi_fit_models <- function(time, conc, area, volume) {
131134
names(hm81_model_stats) <- paste0("HM81_", names(hm81_model_stats))
132135
model_stats <- cbind(model_stats, hm81_model_stats)
133136

137+
# Change "sigma" to "RMSE"; more intuitive for most users
138+
names(model_stats) <- gsub("sigma", "RMSE", names(model_stats))
139+
134140
# Combine, sort columns, and return
135141
out <- cbind(model_stats, slope_stats, int_stats)
136142
return(out[sort(names(out))])

man/ffi_fit_models.Rd

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vignettes/intro-to-fluxfinder.Rmd

+2-1
Original file line numberDiff line numberDiff line change
@@ -275,8 +275,9 @@ x <- round(x, 3)
275275
From the diagnostics returned by `ffi_compute_fluxes`:
276276

277277
* `HM81_flux.estimate` is not `NA`, which only occurs with saturating behavior;
278-
* The `lin_AIC` (`r x$lin_AIC`) and `rob_AIC` (`r x$rob_AIC`) values are similar, so no indication of influential outliers;
278+
* The `lin_AIC` (`r x$lin_AIC`) and `rob_AIC` (`r x$rob_AIC`) [Akaike information criterion](https://en.wikipedia.org/wiki/Akaike_information_criterion) values are similar, so no indication of influential outliers;
279279
* The `lin_r.squared` (`r x$lin_r.squared`) and `poly_r.squared` (`r x$poly_r.squared`) values are _very_ different, suggesting a failure of the linear model;
280+
* The [root mean square error](https://en.wikipedia.org/wiki/Root_mean_square_deviation) (RMSE) of the linear model is much higher than the other models' values;
280281
* The `HM81_r.squared` (`r x$HM81_r.squared`) and `HM81_AIC` (`r x$HM81_AIC`) are considerably higher and lower, respectively, than the linear model.
281282

282283
All of these metrics point to a common conclusion: a linear model is _not_

0 commit comments

Comments
 (0)