-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsim.ts.R
282 lines (252 loc) · 13.1 KB
/
sim.ts.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
library(dlnm)
library(data.table)
library(dlmtree)
library(mgcv)
library(coda)
##### Sim setup #####
set.seed(sim.num)
nburn <- 2000
niter <- 5000
ntrees <- 20
nthin <- 10
lags <- 20
eval.erc.grid <- 3:30
sim <- list(sim.num = sim.num, n = n, lags = lags, erc = erc, trc = trc,
mod = list(), summary = list(), metrics <- list())
##### Create data #####
data("chicagoNMMAPS")
setDT(chicagoNMMAPS)
dat <- chicagoNMMAPS[month > 4 & month < 10, .(year, month, dow, temp)]
dat[, paste0("temp.", 0:lags) := shift(temp, 0:lags, type = "lag")]
dat <- dat[sample(which(complete.cases(dat)), n)]
dlnm <- ts.dlnm(erc, trc)
cols <- paste0("temp.", 0:lags)
f <- colSums(sapply(1:n, function(i) dlnm(as.numeric(dat[i, ..cols]), 0:lags)))
truth <- sapply(0:lags, function(t) dlnm(eval.erc.grid, t))
sim$truth <- truth
dat$y <- f + rnorm(n, 0, error * sd(f))
cor(dat$y, f)
sim[['cor']] <- cor(dat$y, f)
cols <- paste0("temp.", 0:lags)
temp_dat <- as.matrix(dat[, ..cols])
temp_splits <- seq(min(temp_dat), max(temp_dat), length.out = 12)
se <- sd(temp_dat) / 2
cen = 20
out_trans <- function(x) x
##### Monotone-TDLNM #####
m_name <- "m-tdlnm"
mlist <- list()
s0 <- diag(lags + 1) * 0.561^2
for (i in 1:restarts) {
# cat(".")
set.seed(i)
mlist[[i]] <- tdlnm(y ~ 1,
data = dat,
exposure.data = temp_dat,
exposure.splits = temp_splits,
exposure.se = se,
n.trees = ntrees, n.burn = nburn, n.iter = niter, n.thin = nthin,
monotone.sigma = s0, altmcmc = 1,
monotone = T, verbose = F)
}
sim$mod[[m_name]] <- combine.models(mlist)
sim$summary[[m_name]] <- summary(sim$mod[[m_name]], cenval = cen,
pred.at = eval.erc.grid, verbose = F,
conf.level = 0.95)
sim$summary[[m_name]]$matfit <- out_trans(sim$summary[[m_name]]$matfit)
sim$summary[[m_name]]$cilower <- out_trans(sim$summary[[m_name]]$cilower)
sim$summary[[m_name]]$ciupper <- out_trans(sim$summary[[m_name]]$ciupper)
s <- lapply(mlist, summary, pred.at = eval.erc.grid, verbose = F, mcmc = T)
s_mcmc <- do.call(
mcmc.list,
lapply(1:restarts, function(i) {
d <- as.data.table(as.data.frame.table(s[[i]]$dlm_mcmc))
mcmc(dcast(d, Var3 ~ Var1 + Var2, value.var = "Freq")[, Var3 := NULL][])
})
)
sim$summary[[m_name]]$gelman_rubin <- gelman.diag(s_mcmc, autoburnin = F, multivariate = F)
sim$metrics[[m_name]] <- list(
bias = sim$summary[[m_name]]$matfit - truth,
mse = (sim$summary[[m_name]]$matfit - truth)^2,
cov = mean((sim$summary[[m_name]]$cilower - 0.05 <= truth) & (sim$summary[[m_name]]$ciupper + 0.05 >= truth)),
tp = sum((colSums(truth > 0) > 0) & (sim$summary[[m_name]]$splitProb >= 0.95)) / sum(colSums(truth > 0) > 0),
tn = sum((colSums(truth > 0) == 0) & (sim$summary[[m_name]]$splitProb < 0.95)) / sum(colSums(truth > 0) == 0),
fp = sum((colSums(truth > 0) == 0) & (sim$summary[[m_name]]$splitProb) >= 0.95) / sum(colSums(truth > 0) == 0),
fn = sum((colSums(truth > 0) > 0) & (sim$summary[[m_name]]$splitProb < 0.95)) / sum(colSums(truth > 0) > 0),
convergence_gr = median(sim$summary[[m_name]]$gelman_rubin$psrf[, 1], na.rm = T))
##### Monotone-TDLNM informative priors #####
m_name <- "m-tdlnm_ip"
truth_lags <- which(colSums(truth > 0) > 0)
g0 <- rep(0, lags + 1) # prior prob 0.005-0.995
g0[truth_lags] <- 4.119 # prior prob 0.95-0.995
s0 <- diag(lags + 1) * 2.701^2
for (s in truth_lags)
s0[s, s] <- 0.599^2
ts0 <- rep(1, lags)
ts0[truth_lags] <- 10
ts0 <- ts0 / sum(ts0)
mlist <- list()
for (i in 1:restarts) {
# cat(".")
set.seed(i)
mlist[[i]] <- tdlnm(y ~ 1,
data = dat,
exposure.data = temp_dat,
exposure.splits = temp_splits,
exposure.se = se,
n.trees = ntrees, n.burn = nburn, n.iter = niter, n.thin = nthin,
monotone.gamma0 = g0, monotone.sigma = s0,
time.split.prob = ts0, altmcmc = 1,
monotone = T, verbose = F)
}
sim$mod[[m_name]] <- combine.models(mlist)
sim$summary[[m_name]] <- summary(sim$mod[[m_name]], cenval = cen,
pred.at = eval.erc.grid, verbose = F,
conf.level = 0.95)
sim$summary[[m_name]]$matfit <- out_trans(sim$summary[[m_name]]$matfit)
sim$summary[[m_name]]$cilower <- out_trans(sim$summary[[m_name]]$cilower)
sim$summary[[m_name]]$ciupper <- out_trans(sim$summary[[m_name]]$ciupper)
s <- lapply(mlist, summary, pred.at = eval.erc.grid, verbose = F, mcmc = T)
s_mcmc <- do.call(
mcmc.list,
lapply(1:restarts, function(i) {
d <- as.data.table(as.data.frame.table(s[[i]]$dlm_mcmc))
mcmc(dcast(d, Var3 ~ Var1 + Var2, value.var = "Freq")[, Var3 := NULL][])
})
)
sim$summary[[m_name]]$gelman_rubin <- gelman.diag(s_mcmc, autoburnin = F, multivariate = F)
sim$metrics[[m_name]] <- list(
bias = sim$summary[[m_name]]$matfit - truth,
mse = (sim$summary[[m_name]]$matfit - truth)^2,
cov = mean((sim$summary[[m_name]]$cilower - 0.05 <= truth) & (sim$summary[[m_name]]$ciupper + 0.05 >= truth)),
tp = sum((colSums(truth > 0) > 0) & (sim$summary[[m_name]]$splitProb >= 0.95)) / sum(colSums(truth > 0) > 0),
tn = sum((colSums(truth > 0) == 0) & (sim$summary[[m_name]]$splitProb < 0.95)) / sum(colSums(truth > 0) == 0),
fp = sum((colSums(truth > 0) == 0) & (sim$summary[[m_name]]$splitProb) >= 0.95) / sum(colSums(truth > 0) == 0),
fn = sum((colSums(truth > 0) > 0) & (sim$summary[[m_name]]$splitProb < 0.95)) / sum(colSums(truth > 0) > 0),
convergence_gr = median(sim$summary[[m_name]]$gelman_rubin$psrf[, 1], na.rm = T))
#### TDLNM #####
m_name <- "tdlnm"
mlist <- list()
for (i in 1:restarts) {
# cat(".")
set.seed(i * 100)
mlist[[i]] <- tdlnm(y ~ 1,
data = dat,
exposure.data = temp_dat,
exposure.splits = temp_splits,
exposure.se = se,
n.trees = 20, n.burn = nburn, n.iter = niter, n.thin = nthin,
verbose = F)
}
sim$mod[[m_name]] <- combine.models(mlist)
sim$summary[[m_name]] <- summary(sim$mod[[m_name]], cenval = cen,
pred.at = eval.erc.grid, verbose = F)
sim$summary[[m_name]]$matfit <- out_trans(sim$summary[[m_name]]$matfit)
sim$summary[[m_name]]$cilower <- out_trans(sim$summary[[m_name]]$cilower)
sim$summary[[m_name]]$ciupper <- out_trans(sim$summary[[m_name]]$ciupper)
s <- lapply(mlist, summary, pred.at = eval.erc.grid, verbose = F, mcmc = T)
s_mcmc <- do.call(
mcmc.list,
lapply(1:restarts, function(i) {
d <- as.data.table(as.data.frame.table(s[[i]]$dlm_mcmc))
mcmc(dcast(d, Var3 ~ Var1 + Var2, value.var = "Freq")[, Var3 := NULL][])
})
)
sim$summary[[m_name]]$gelman_rubin <- gelman.diag(s_mcmc, autoburnin = F, multivariate = F)
sim$metrics[[m_name]] <- list(
bias = (sim$summary[[m_name]]$matfit - truth),
mse = ((sim$summary[[m_name]]$matfit - truth)^2),
cov = mean((sim$summary[[m_name]]$cilower <= truth) & (sim$summary[[m_name]]$ciupper >= truth)),
tp = sum((colSums(truth > 0) > 0) & (colSums(sim$summary[[m_name]]$cilower > 0 | sim$summary[[m_name]]$ciupper < 0) > 0)) / sum(colSums(truth > 0) > 0),
tn = sum((colSums(truth > 0) == 0) & (colSums(sim$summary[[m_name]]$cilower > 0 | sim$summary[[m_name]]$ciupper < 0) == 0)) / sum(colSums(truth > 0) == 0),
fp = sum((colSums(truth > 0) == 0) & (colSums(sim$summary[[m_name]]$cilower > 0 | sim$summary[[m_name]]$ciupper < 0) > 0)) / sum(colSums(truth > 0) == 0),
fn = sum((colSums(truth > 0) > 0) & (colSums(sim$summary[[m_name]]$cilower > 0 | sim$summary[[m_name]]$ciupper < 0) == 0)) / sum(colSums(truth > 0) > 0),
convergence_gr = median(sim$summary[[m_name]]$gelman_rubin$psrf[, 1], na.rm = T))
#
#
# ##### TDLNM informative prior #####
m_name <- "tdlnm_ip"
truth_lags <- which(colSums(truth > 0) > 0)
ts0 <- rep(1, lags)
ts0[truth_lags] <- 10
ts0 <- ts0 / sum(ts0)
mlist <- list()
for (i in 1:restarts) {
# cat(".")
set.seed(i * 100)
mlist[[i]] <- tdlnm(y ~ 1,
data = dat,
exposure.data = temp_dat,
exposure.splits = temp_splits,
exposure.se = se,
n.trees = 20, n.burn = nburn, n.iter = niter, n.thin = nthin,
time.split.prob = ts0,
verbose = F)
}
sim$mod[[m_name]] <- combine.models(mlist)
sim$summary[[m_name]] <- summary(sim$mod[[m_name]], cenval = cen,
pred.at = eval.erc.grid, verbose = F)
sim$summary[[m_name]]$matfit <- out_trans(sim$summary[[m_name]]$matfit)
sim$summary[[m_name]]$cilower <- out_trans(sim$summary[[m_name]]$cilower)
sim$summary[[m_name]]$ciupper <- out_trans(sim$summary[[m_name]]$ciupper)
s <- lapply(mlist, summary, pred.at = eval.erc.grid, verbose = F, mcmc = T)
s_mcmc <- do.call(
mcmc.list,
lapply(1:restarts, function(i) {
d <- as.data.table(as.data.frame.table(s[[i]]$dlm_mcmc))
mcmc(dcast(d, Var3 ~ Var1 + Var2, value.var = "Freq")[, Var3 := NULL][])
})
)
sim$summary[[m_name]]$gelman_rubin <- gelman.diag(s_mcmc, autoburnin = F, multivariate = F)
sim$metrics[[m_name]] <- list(
bias = (sim$summary[[m_name]]$matfit - truth),
mse = ((sim$summary[[m_name]]$matfit - truth)^2),
cov = mean((sim$summary[[m_name]]$cilower <= truth) & (sim$summary[[m_name]]$ciupper >= truth)),
tp = sum((colSums(truth > 0) > 0) & (colSums(sim$summary[[m_name]]$cilower > 0 | sim$summary[[m_name]]$ciupper < 0) > 0)) / sum(colSums(truth > 0) > 0),
tn = sum((colSums(truth > 0) == 0) & (colSums(sim$summary[[m_name]]$cilower > 0 | sim$summary[[m_name]]$ciupper < 0) == 0)) / sum(colSums(truth > 0) == 0),
fp = sum((colSums(truth > 0) == 0) & (colSums(sim$summary[[m_name]]$cilower > 0 | sim$summary[[m_name]]$ciupper < 0) > 0)) / sum(colSums(truth > 0) == 0),
fn = sum((colSums(truth > 0) > 0) & (colSums(sim$summary[[m_name]]$cilower > 0 | sim$summary[[m_name]]$ciupper < 0) == 0)) / sum(colSums(truth > 0) > 0),
convergence_gr = median(sim$summary[[m_name]]$gelman_rubin$psrf[, 1], na.rm = T))
##### GAM #####
m_name <- "gam"
cb <- crossbasis(temp_dat, c(0, lags),
arglag = list(fun = "ps", df = 10, intercept = T),
argvar = list(fun = "ps", df = 10))
pen <- cbPen(cb)
sim$mod[[m_name]] <- gam(y ~ cb,
data = dat, paraPen = list(cb = pen))
sim$summary[[m_name]] <- crosspred(cb, sim$mod[[m_name]], at = eval.erc.grid, bylag = 1, cen = cen)
sim$summary[[m_name]]$matfit <- out_trans(sim$summary[[m_name]]$matfit)
sim$summary[[m_name]]$matlow <- out_trans(sim$summary[[m_name]]$matlow)
sim$summary[[m_name]]$mathigh <- out_trans(sim$summary[[m_name]]$mathigh)
sim$metrics[[m_name]] <- list(
bias = (sim$summary[[m_name]]$matfit - truth),
mse = ((sim$summary[[m_name]]$matfit - truth)^2),
cov = mean((sim$summary[[m_name]]$matlow <= truth) & (sim$summary[[m_name]]$mathigh >= truth)),
tp = sum((colSums(truth > 0) > 0) & (colSums(sim$summary[[m_name]]$matlow > 0 | sim$summary[[m_name]]$mathigh < 0) > 0)) / sum(colSums(truth > 0) > 0),
tn = sum((colSums(truth > 0) == 0) & (colSums(sim$summary[[m_name]]$matlow > 0 | sim$summary[[m_name]]$mathigh < 0) == 0)) / sum(colSums(truth > 0) == 0),
fp = sum((colSums(truth > 0) == 0) & (colSums(sim$summary[[m_name]]$matlow > 0 | sim$summary[[m_name]]$mathigh < 0) > 0)) / sum(colSums(truth > 0) == 0),
fn = sum((colSums(truth > 0) > 0) & (colSums(sim$summary[[m_name]]$matlow > 0 | sim$summary[[m_name]]$mathigh < 0) == 0)) / sum(colSums(truth > 0) > 0))
##### GAM informative prior #####
m_name <- "gam_ip"
cb <- crossbasis(temp_dat, c(0, lags),
arglag = list(fun = "ps", df = 10, intercept = T),
argvar = list(fun = "ps", df = 10))
pen <- cbPen(cb, addSlag = rep(0:1, c(4, 6)))
sim$mod[[m_name]] <- gam(y ~ cb,
data = dat, paraPen = list(cb = pen))
sim$summary[[m_name]] <- crosspred(cb, sim$mod[[m_name]], at = eval.erc.grid, bylag = 1, cen = cen)
sim$summary[[m_name]]$matfit <- out_trans(sim$summary[[m_name]]$matfit)
sim$summary[[m_name]]$matlow <- out_trans(sim$summary[[m_name]]$matlow)
sim$summary[[m_name]]$mathigh <- out_trans(sim$summary[[m_name]]$mathigh)
sim$metrics[[m_name]] <- list(
bias = (sim$summary[[m_name]]$matfit - truth),
mse = ((sim$summary[[m_name]]$matfit - truth)^2),
cov = mean((sim$summary[[m_name]]$matlow <= truth) & (sim$summary[[m_name]]$mathigh >= truth)),
tp = sum((colSums(truth > 0) > 0) & (colSums(sim$summary[[m_name]]$matlow > 0 | sim$summary[[m_name]]$mathigh < 0) > 0)) / sum(colSums(truth > 0) > 0),
tn = sum((colSums(truth > 0) == 0) & (colSums(sim$summary[[m_name]]$matlow > 0 | sim$summary[[m_name]]$mathigh < 0) == 0)) / sum(colSums(truth > 0) == 0),
fp = sum((colSums(truth > 0) == 0) & (colSums(sim$summary[[m_name]]$matlow > 0 | sim$summary[[m_name]]$mathigh < 0) > 0)) / sum(colSums(truth > 0) == 0),
fn = sum((colSums(truth > 0) > 0) & (colSums(sim$summary[[m_name]]$matlow > 0 | sim$summary[[m_name]]$mathigh < 0) == 0)) / sum(colSums(truth > 0) > 0))
sim$mod <- NULL
save(sim, file = paste0("sim.ts.out.newMCMC/", erc, ".", trc, ".", n, ".", error,
".", sim.num, ".rda"))