NaN output in mif2 algorithm #172
Replies: 5 comments
-
@theresasophia, I believe I see what is happening. You ask two questions. In the following, I address the second one first. Computational complexity of particle filter and iterated filtering computationsWhy does execution take longer than you would like? Notice that the latent state Perhaps the best way of speeding things up is to increase the Euler step size. On the importance of allowing for overdispersion.Why is The [Note that this behavior is changed as of version 1.8.9.1, which, in these circumstances, returns an unweighted mean of the particles, thus avoiding the So the question becomes, why does the model fit the data so poorly? A few library(pomp)
library(magrittr)
library(plyr)
library(reshape2)
library(ggplot2)
library(scales)
library(foreach)
library(doParallel)
registerDoParallel()
stopifnot(packageVersion("pomp") >= "1.7")
#observational model, observations are poisson
rmeas <- Csnippet("
cases1 = rpois(H1);
cases2 = rpois(H2);
cases3 = rpois(H3);
")
#allow for the possibility of NAs in the data
dmeas <-Csnippet("
if (ISNA(cases1)) {
lik = (give_log) ? 0 : 1;
} else {
lik = dpois(cases1, H1, 1) +
dpois(cases2, H2, 1) +
dpois(cases3, H3, 1);
lik = (give_log) ? lik : exp(lik);
}")
# process model is Markovian SIRS with 3 age classes
sir.step <- Csnippet("
double rate[19];
double dN[19];
double Beta1;
double Beta2;
double Beta3;
double I;
I= I1+I2+I3;
Beta1 = beta1*(1 + beta11 * cos(M_2PI/52*t + phi));
Beta2 = beta2*(1 + beta11 * cos(M_2PI/52*t + phi));
Beta3 = beta3*(1 + beta11 * cos(M_2PI/52*t + phi));
rate[0] = alpha*N;
rate[1] = Beta1*I/N;
rate[2] = delta1;
rate[3] = Beta2*I/N;
rate[4] = delta2;
rate[5] = Beta3*I/N;
rate[6] = mu;
rate[7] = gamma;
rate[8] = delta1;
rate[9] = gamma;
rate[10] = delta2;
rate[11] = gamma;
rate[12] = mu;
rate[13] = delta1;
rate[14] = omega;
rate[15] = delta2;
rate[16] = omega;
rate[17] = mu;
rate[18] = omega;
dN[0] = rpois(rate[0]*dt); // births are Poisson
reulermultinom(2, S1, &rate[1], dt, &dN[1]);
reulermultinom(2, S2, &rate[3], dt, &dN[3]);
reulermultinom(2, S3, &rate[5], dt, &dN[5]);
reulermultinom(2, I1, &rate[7], dt, &dN[7]);
reulermultinom(2, I2, &rate[9], dt, &dN[9]);
reulermultinom(2, I3, &rate[11], dt, &dN[11]);
reulermultinom(2, R1, &rate[13], dt, &dN[13]);
reulermultinom(2, R2, &rate[15], dt, &dN[15]);
reulermultinom(2, R3, &rate[17], dt, &dN[17]);
S1 += dN[0] - dN[1] - dN[2] + dN[14];
S2 += dN[2] - dN[3] - dN[4] + dN[16];
S3 += dN[4] - dN[5] - dN[6] + dN[18];
I1 += dN[1] - dN[7] - dN[8];
I2 += dN[3] + dN[8] - dN[9] - dN[10];
I3 += dN[5] + dN[10] - dN[11] - dN[12];
R1 += dN[7] - dN[13] - dN[14];
R2 += dN[9] + dN[13] - dN[15] - dN[16];
R3 += dN[11] + dN[15] - dN[17] - dN[18];
H1 += dN[1];
H2 += dN[3];
H3 += dN[5];
")
# to and from estimation scale
toEst <- Csnippet("Tbeta1 = log(beta1);
Tbeta2 = log(beta2);
Tbeta3 = log(beta3);
Tbeta11 = logit(beta11);
")
fromEst <- Csnippet("Tbeta1 = exp(beta1);
Tbeta2 = exp(beta2);
Tbeta3 = exp(beta3);
Tbeta11 = expit(beta11);
")
# initalizer
sir_initializer <- Csnippet("S1= 4872721; I1= 2871; R1= 124408; H1=0;
S2= 54942298; I2= 639; R2= 57063; H2=0;
S3= 19990218; I3= 174; R3= 9608; H3=0;
")
# define parameters
params <- c(beta1=1.287395e+01, beta2=2.409750e-01, beta3=4.034036e-01,
beta11=1.509189e-01, phi= -1.706477e-03, gamma=1, delta1=1/(5*52),
delta2=1/(55*52), alpha=1/(80*52), mu=1/(20*52), N=80000000, omega=1/(1*52))
# read in the data
# add at t=0 a row of NAs to not have problems with the accumulator variable since
# t0 is much less than t[1]
read.table("data.txt") %>%
rbind(data.frame(time=0,cases1=NA,cases2=NA,cases3=NA)) %>%
arrange(time) -> dat
# define the pomp object
pomp(data = dat,
times="time",
t0=1-6*52,
dmeasure = dmeas,
rmeasure = rmeas,
rprocess = euler.sim(step.fun = sir.step, delta.t = 1/10),
statenames = c("S1", "I1", "R1", "H1", "S2", "I2", "R2", "H2","S3","I3", "R3", "H3"),
paramnames = names(params),
zeronames=c("H1", "H2", "H3"),
initializer=sir_initializer,
toEstimationScale=toEst,
fromEstimationScale=fromEst,
params = params
) -> sir The following plots a few simulations along with the data: simulate(sir,nsim=5,as.data.frame=TRUE,include.data=TRUE) %>%
subset(time>0,select=c(time,sim,cases1,cases2,cases3)) %>%
melt(id=c("time","sim")) %>%
ggplot(aes(x=time,y=value,color=ifelse(sim=="data","data","simulation"),
alpha=(sim=="data"),
group=sim))+
scale_alpha_discrete(breaks=c(`TRUE`=1,`FALSE`=0.8))+
labs(color="")+
guides(alpha=FALSE)+
geom_line()+
facet_wrap(~variable,ncol=1,scales="free_y")+theme_bw() The Poisson variability in the measurement model is not sufficient to explain the data at these parameter values. One way to add overdispersion is to replace the Poisson measurement model with a negative binomial model: pomp(sir,
dmeasure = Csnippet("
if (ISNA(cases1)) {
lik = (give_log) ? 0 : 1;
} else {
lik = dnbinom_mu(cases1, 1/od, H1, 1) +
dnbinom_mu(cases2, 1/od, H2, 1) +
dnbinom_mu(cases3, 1/od, H3, 1);
lik = (give_log) ? lik : exp(lik);}"),
rmeasure = Csnippet("
cases1 = rnbinom_mu(1/od,H1);
cases2 = rnbinom_mu(1/od,H2);
cases3 = rnbinom_mu(1/od,H3);"),
toEstimationScale=Csnippet("
Tbeta1 = log(beta1);
Tbeta2 = log(beta2);
Tbeta3 = log(beta3);
Tbeta11 = logit(beta11);
Tod = log(od);"),
fromEstimationScale=Csnippet("
Tbeta1 = exp(beta1);
Tbeta2 = exp(beta2);
Tbeta3 = exp(beta3);
Tbeta11 = expit(beta11);
Tod = exp(od);"),
statenames = c("H1", "H2","H3"),
paramnames = c(names(params),"od")
) -> sir
coef(sir,"od") <- 1
simulate(sir,nsim=5,as.data.frame=TRUE,include.data=TRUE) %>%
subset(time>0,select=c(time,sim,cases1,cases2,cases3)) %>%
melt(id=c("time","sim")) %>%
ggplot(aes(x=time,y=value,color=ifelse(sim=="data","data","simulation"),
alpha=(sim=="data"),
group=sim))+
scale_alpha_discrete(breaks=c(`TRUE`=1,`FALSE`=0.8))+
labs(color="")+
guides(alpha=FALSE)+
geom_line()+
facet_wrap(~variable,ncol=1,scales="free_y")+theme_bw() Note that we've added a lot of overdispersion. But the model is now capable stew(file="pfilter.rda",seed=1320290398,kind="L'Ecuyer",{
system.time(
foreach (i=1:10,.packages='pomp',.options.multicore=list(set.seed=TRUE)) %dopar%
try(pfilter(sir,Np=1000)) -> pf
) -> etime
})
logmeanexp(sapply(pf,logLik),se=TRUE)
## se
## -1.100925e+04 5.838841e-01
etime
## user system elapsed
## 260.208 1.876 29.593 Already we see a substantial improvement in the log likelihood relative to the pf %>% sapply(eff.sample.size) %>% melt() %>%
ggplot(aes(x=Var1,group=Var2,y=value))+geom_line()+
scale_y_continuous(trans=log1p_trans())+
labs(x="time",y="ESS")+
theme_bw() [The same plot, applied to the Poisson model, would show an effective sample Now let's move on to trying to maximize the likelihood. I started by following stew(file="local_search.rda",seed=900242057,kind="L'Ecuyer",{
system.time({
foreach (i=1:10,.packages='pomp',.combine=c,.options.multicore=list(set.seed=TRUE)) %dopar% {
mif2(
sir,
Np=1000,
Nmif=20,
cooling.type="geometric",
cooling.fraction.50=0.5,
transform=TRUE,
rw.sd=rw.sd(beta1=0.002,beta2=0.002,beta3=0.002,beta11=0.002,phi=0.002,od=0.01)
)
} -> mifs
}) -> etime
})
etime
## user system elapsed
## 5185.296 4.104 575.688
sapply(mifs,coef)
## [,1] [,2] [,3] [,4] [,5]
## beta1 1.206207e+01 1.202399e+01 1.153665e+01 1.164324e+01 1.179013e+01
## beta2 2.570209e-01 2.717509e-01 2.862369e-01 2.770161e-01 2.740267e-01
## beta3 4.041913e-01 3.865958e-01 4.173082e-01 4.256647e-01 4.062243e-01
## beta11 1.389955e-01 1.466958e-01 1.462300e-01 1.434030e-01 1.408440e-01
## phi 1.437375e-02 7.642879e-02 1.006428e-01 1.977255e-02 6.100431e-02
## gamma 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## delta1 3.846154e-03 3.846154e-03 3.846154e-03 3.846154e-03 3.846154e-03
## delta2 3.496503e-04 3.496503e-04 3.496503e-04 3.496503e-04 3.496503e-04
## alpha 2.403846e-04 2.403846e-04 2.403846e-04 2.403846e-04 2.403846e-04
## mu 9.615385e-04 9.615385e-04 9.615385e-04 9.615385e-04 9.615385e-04
## N 8.000000e+07 8.000000e+07 8.000000e+07 8.000000e+07 8.000000e+07
## omega 1.923077e-02 1.923077e-02 1.923077e-02 1.923077e-02 1.923077e-02
## od 1.390146e-01 1.427957e-01 1.328448e-01 1.246410e-01 1.447729e-01
## [,6] [,7] [,8] [,9] [,10]
## beta1 1.127621e+01 1.198252e+01 1.154654e+01 1.111633e+01 1.140463e+01
## beta2 2.918447e-01 2.694642e-01 2.749054e-01 3.032488e-01 2.950158e-01
## beta3 4.495577e-01 3.888155e-01 4.356756e-01 4.503078e-01 4.223831e-01
## beta11 1.401555e-01 1.411478e-01 1.325803e-01 1.374305e-01 1.421792e-01
## phi 5.232921e-02 6.402672e-02 6.531577e-02 8.128937e-02 2.992768e-02
## gamma 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## delta1 3.846154e-03 3.846154e-03 3.846154e-03 3.846154e-03 3.846154e-03
## delta2 3.496503e-04 3.496503e-04 3.496503e-04 3.496503e-04 3.496503e-04
## alpha 2.403846e-04 2.403846e-04 2.403846e-04 2.403846e-04 2.403846e-04
## mu 9.615385e-04 9.615385e-04 9.615385e-04 9.615385e-04 9.615385e-04
## N 8.000000e+07 8.000000e+07 8.000000e+07 8.000000e+07 8.000000e+07
## omega 1.923077e-02 1.923077e-02 1.923077e-02 1.923077e-02 1.923077e-02
## od 1.314755e-01 1.727626e-01 1.561911e-01 1.362144e-01 1.457950e-01 Note that the parameter estimates are all now finite. Let's look at some mifs %>%
conv.rec(c("loglik","nfail","beta1","beta2","beta3","beta11","phi","od")) %>%
melt() %>%
ggplot(aes(x=iteration,y=value,color=variable,group=L1))+
geom_line()+
guides(color=FALSE)+
labs(x="MIF2 Iteration",y="")+
facet_wrap(~variable,scales="free_y",ncol=2)+
theme_bw()
The likelihood climbs quite rapidly and, encouragingly, all filtering failures What are the likelihoods at the end of these stew(file="loglik_local.rda",seed=900242057,kind="L'Ecuyer",{
foreach (mf=mifs,.packages='pomp',.combine=rbind) %dopar% {
evals <- replicate(10,logLik(pfilter(mf,Np=1000)))
logmeanexp(evals,se=TRUE)
} -> liks
})
liks %>% as.data.frame() %>% rename(c(V1="loglik")) %>% arrange(-loglik)
## loglik se
## 1 -10304.25 6.642149
## 2 -10320.71 8.179374
## 3 -10335.08 7.467220
## 4 -10338.77 4.218007
## 5 -10352.40 4.029680
## 6 -10354.17 1.912765
## 7 -10372.13 1.800901
## 8 -10388.87 3.757840
## 9 -10408.12 2.601812
## 10 -10436.88 5.944798 Although we haven't used sufficiently many particles here to get precise estimates, it is clear that the likelihood has improved by several hundred log units. best <- which.max(liks[,1])
simulate(mifs[[best]],nsim=5,as.data.frame=TRUE,include.data=TRUE) %>%
subset(time>0,select=c(time,sim,cases1,cases2,cases3)) %>%
melt(id=c("time","sim")) %>%
ggplot(aes(x=time,y=value,color=ifelse(sim=="data","data","simulation"),
alpha=(sim=="data"),
group=sim))+
scale_alpha_discrete(breaks=c(`TRUE`=1,`FALSE`=0.8))+
labs(color="")+
guides(alpha=FALSE)+
geom_line()+
facet_wrap(~variable,ncol=1,scales="free_y")+theme_bw()
Although the data look far more plausible as a realization of this model, there is clearly still room for improvement. |
Beta Was this translation helpful? Give feedback.
-
Thank you very much for this great and detailed answer! This was very helpful for me. I was surprised how much the likelihood improved after changing the random-walk intensities, however as I understood there are no precise rules for how to choose this. Since I am planning to include other parameters into the estimation I wonder if you could share your experience of how to find the 'right' intensities? Does this depend on how 'unsure' I am about my initial guess of the parameter? |
Beta Was this translation helpful? Give feedback.
-
You are right that I know of no precise rules for choosing the intensity My first step was to diagnose the need for overdispersion in your model. Having implemented the negative-binomial model as above, I ran some
Looking at the convergence plots, as above, we see the following. Note that the log likelihood is not trending upward noticeably and we
By this logic, if we reduce the perturbation sizes, we will slow down These observations suggest the next steps. Continue to apply References
|
Beta Was this translation helpful? Give feedback.
-
Thank you very much for this really helpful answer. I appreciate the detail you provide very much and I am highly motivated to continue working on this. |
Beta Was this translation helpful? Give feedback.
-
On this topic, as of version 1.8.9.1, |
Beta Was this translation helpful? Give feedback.
-
The problem I am facing is that after running a local search with the mif2 algorithm the object mifs_local contains several entries where all fitted parameters are NaN (e.g. mif_local[[2]] in the code below) whereas for others I get some output. Surprisingly, this does not produce an error (why?), however I see an error when trying to evaluate the likelihood with pfilter that dmeasure returns non-finite value.
Another big issue is that, even with a very low number of particles and mif2 iterations, I am facing extremely long running times although the code is parallelized and I have access to a cluster. I am wondering if this is normal or if I can change something to speed up my calculations?
Here is my model:
The output of the particle filter is
Now running the mif2 algorithm
Here I obtain for example
BUT
which gives the following error when evaluating pfilter
Beta Was this translation helpful? Give feedback.
All reactions