-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathToxKin_gnls_run.R
114 lines (97 loc) · 9.52 KB
/
ToxKin_gnls_run.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# Fitting of a generalized nonlinear least squares (gnls) models to the data
# variables and parameters are as for the nls model fit
# In addition, the weights argument determines the residual variance function. NULL indicates homoscedastic residual variance,
# whereas the varIdent(), varFixed(), varExp() and varPower() functions are the considered heteroscedastic variance assumptions
# The estimated parameters returned from the nls() model are used as initial values for the gnls() model fits
toxicant.gnls <- gnls(toxicant~one_compartment(C_0,k_1,k_2,C_exp,Time), start=as.list(coef(toxicant.nls)),
weights=NULL,
data=toxkin_dat); summary(toxicant.gnls) # only one sigma^2 is estimated
toxicant.gnls_Id <- gnls(toxicant~one_compartment(C_0,k_1,k_2,C_exp,Time), start=as.list(coef(toxicant.nls)),
weights=varIdent(form = ~ 1|phase),
data=toxkin_dat); summary(toxicant.gnls_Id) # two sigma^2 are estimated
toxicant.gnls_Fix <- gnls(toxicant~one_compartment(C_0,k_1,k_2,C_exp,Time), start=as.list(coef(toxicant.nls)),
weights=varFixed(~ toxicant),
data=toxkin_dat); summary(toxicant.gnls_Fix) # sigma^2 scaled to y_i
toxicant.gnls_Exp <- gnls(toxicant~one_compartment(C_0,k_1,k_2,C_exp,Time), start=as.list(coef(toxicant.nls)),
weights=varExp(form=~toxicant),
data=toxkin_dat); summary(toxicant.gnls_Exp)
toxicant.gnls_Pow <- gnls(toxicant~one_compartment(C_0,k_1,k_2,C_exp,Time), start=as.list(coef(toxicant.nls)),
weights=varPower(form=~toxicant),
data=toxkin_dat); summary(toxicant.gnls_Pow) # Doesn't work for Cu01
# The anova() function performs an analysis of variance (ANOVA) to compare the model fits of the considered gnls() models to the data
# The anova() function returns a table showing statistics such as degrees of freedom, log likelihood, goodness of fit relative to
# model complexity (AIC and BIC), and the likelihood ratio and the associated P-value of nested models
# These statistics help to determine if more complex models (with more parameters) provide a significantly better fit than simpler models.
anova(toxicant.gnls,toxicant.gnls_Fix,toxicant.gnls_Id,toxicant.gnls_Exp,toxicant.gnls_Pow)
p_model <- anova(toxicant.gnls,toxicant.gnls_Id,toxicant.gnls_Fix,toxicant.gnls_Exp,toxicant.gnls_Pow)
write.csv(p_model,paste0(dataset,"_AIC_BIC_logLik.csv")) # Write output of anova() function for the considered models to file
# Select the model with the lowest BIC (best goodness of relative to number of model parameters) and take summary of that model
summ_toxic.gnls <- summary(list(toxicant.gnls,toxicant.gnls_Id,toxicant.gnls_Fix,toxicant.gnls_Exp,toxicant.gnls_Pow)[[which.min(p_model$BIC)]])
write.csv(summary(toxicant.gnls)$tTable, paste0(dataset,"_gnls.csv")) # Write output homoscedastic model to file
sapply(list(compute_BAF(summary(toxicant.gnls)), c(sigma=toxicant.gnls$sigma)),
function(x) write.table(x, paste0(dataset,"_gnls.csv"), append=T, sep=",", col.names=F)) # Write BAF+sigma homoscedastic model to file
write.table(summ_toxic.gnls$tTable, paste0(dataset,"_gnls.csv"), append=T, sep=",") # Write output heteroscedastic model with lowest BIC to file
sapply(list(compute_BAF(summ_toxic.gnls), c(sigma=summ_toxic.gnls$sigma, delta=coef(summ_toxic.gnls$modelStruct$varStruct, allCoef=TRUE))),
function(x) write.table(x, paste0(dataset,"_gnls.csv"), append=T, sep=",", col.names=F)) # Write BAF+sigma heteroscedastic model with lowest BIC to file
write.table("This file contains the gnls model output for the homoscedastic model and the best heteroscedastic model",
paste0(dataset,"_gnls.csv"), append=T, sep=",", col.names=F, row.names=F) # Describe the model output that the data in the file describes
### Generate residuals plots (standardized residuals vs fitted values) for all 5 gnls() models
# A range is set for the y-axis (residuals) and the x-axis (fitted values) using the values of the toxicant
y_range <- c(-3.1,3.1); x_range <- range(toxkin_dat$toxicant)
# The plot() function is used for generating residual plots, with the first argument being the gnls model fit.
# main determines the title of the plot, ylim and xlim the ranges of the y- and x-axes, respectively
# pch determines the symbol for the data points, ylab="" and xlab="" omit axis labels
p01 <- plot(toxicant.gnls, main="Homoscedastic", xlab="", ylim=y_range, xlim=x_range, pch=20)
p02 <- plot(toxicant.gnls_Fix, main="varFixed", xlab="", ylab="", ylim=y_range, xlim=x_range, pch=20)
p03 <- plot(toxicant.gnls_Id, main="varIdent", xlab="", ylab="", ylim=y_range, xlim=x_range, pch=20)
p04 <- plot(toxicant.gnls_Exp, main="varExp", xlab="", ylim=y_range, xlim=x_range, pch=20)
p05 <- plot(toxicant.gnls_Pow, main="varPower", ylab="", ylim=y_range, xlim=x_range, pch=20)
# The residuals are saved as a .pdf file. The pdf() function takes the file name and then the height and width of the pdf in inches
# The grid.arrange() function takes the five residual plots as figure panels and orders due to the numbers of columns given to ncol
# bottom="" omits any outer label at the x-axis
# Running dev.off() closes the pdf file and ensures that no more plots are saved in the pdf file
pdf(paste0(dataset,"_ModRes_scedastic.pdf"), height=6.5, width=10)
grid.arrange(p01, p02, p03, p04, p05,
ncol=3, bottom="")
dev.off()
### Show fitted models using ggplot
# seq() generates a sequence of numbers that represent the time values in the data set
# The sequence starts at the minimum value, with the floor() function rounding it down to the nearest integer
# The sequence ends at the maximum value, with the ceiling() function rounding it up to the nearest integer
t_range <- seq(floor(min(toxkin_dat$Time)), ceiling(max(toxkin_dat$Time)), 1)
# Create a data.frame named y_vs_t for the 5 different models, toxicant values are generated using the one_compartment() model function and
# the estimated gnls() model coefficients are inputs along with the generated sequence of numbers and C_expsr (medium exposure concentration)
y_vs_t <- data.frame(var_t=t_range,
homosk = one_compartment(coef(toxicant.gnls)[1],coef(toxicant.gnls)[2],coef(toxicant.gnls)[3],
C_expsr,t_range),
var_Id = one_compartment(coef(toxicant.gnls_Id)[1],coef(toxicant.gnls_Id)[2],coef(toxicant.gnls_Id)[3],
C_expsr,t_range),
var_Fix = one_compartment(coef(toxicant.gnls_Fix)[1],coef(toxicant.gnls_Fix)[2],coef(toxicant.gnls_Fix)[3],
C_expsr,t_range),
var_Exp = one_compartment(coef(toxicant.gnls_Exp)[1],coef(toxicant.gnls_Exp)[2],coef(toxicant.gnls_Exp)[3],
C_expsr,t_range),
var_Pow = one_compartment(coef(toxicant.gnls_Pow)[1],coef(toxicant.gnls_Pow)[2],coef(toxicant.gnls_Pow)[3],
C_expsr,t_range))
# Generate a plot with multiple lines, each representing the predictions from the 5 different models assigned to object p1.
# The ggplot() function uses the y_vs_t data.frame and sets the aesthetic mapping for the x-axis to t_range using aes(x = t_range).
# Then, to plot a line for each model, predicted values are taken from columns of the y_vs_t data.frame, the line color
# is set to the variance functions, and the linewidth argument sets the line width
# theme(.) customizes the theme using: legend.position = c(0.775, 0.9) to places the legend at the specified coordinates within the plot area,
# legend.text = element_text(size=11) sets the font size of the legend text to 11, legend.title = element_text("",size=0) sets the
# legend title to an empty string with a size of 0 (effectively removing it), legend.margin=margin(0.1,0.1,0.1,0.1,'cm') sets the margin around the legend.
# ggtitle(paste0(toxic, " exposure")) sets the title of the plot to the string concatenated from the toxic variable and " exposure",
# ylab(expression("Content in" ~ paste(mu, g, "/g"))) sets the y-axis label to "Content in µg/g" using the expression function to properly format the Greek letter mu (and the unit "g/g".
p1 <- ggplot(data=y_vs_t, aes(x=t_range)) +
geom_line(aes(y = homosk, colour = "Homoscedastic"), linewidth=1) +
geom_line(aes(y = var_Id, colour = "var Id"), linewidth=1) +
geom_line(aes(y = var_Fix, colour = "var Fixed"), linewidth=1) +
geom_line(aes(y = var_Exp, colour = "var Exp"), linewidth=1) +
geom_line(aes(y = var_Pow, colour = "var Pow"), linewidth=1) +
theme(legend.position=c(0.775,0.9), legend.text=element_text(size=11), legend.title=element_text("",size=0), legend.margin=margin(0.1,0.1,0.1,0.1,'cm')) +
ggtitle(paste0(toxic," exposure")) + ylab(expression("Content in"~paste(mu,g,"/g"))) + xlab("") #
# Generate a pdf file named according to the dataset variable with the suffix "_gnls.pdf". The pdf contains the plot p1 with additional black
# points representing data from the toxkin_dat data.frame, where the toxicant variable is plotted against the Time variable.
# The height and widht of the pdf file are set to 6 and 7 inches, respectively. The dev.off() function closes and saves the pdf file.
pdf(paste0(dataset,"_gnls.pdf"), height=6, width=7)
p1 + geom_point(data=toxkin_dat, aes(y=toxicant,x=Time), color='black')
dev.off()