-
Notifications
You must be signed in to change notification settings - Fork 14
/
Stan-Ydich-XnomSsubj-MbinomBetaOmegaKappa.R
206 lines (198 loc) · 9.35 KB
/
Stan-Ydich-XnomSsubj-MbinomBetaOmegaKappa.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# Stan-Ydich-XnomSsubj-MbernBetaOmegaKappa.R
# Accompanies the book:
# Kruschke, J. K. (2014). Doing Bayesian Data Analysis:
# A Tutorial with R, JAGS, and Stan. 2nd Edition. Academic Press / Elsevier.
# Adapted for Stan by Joe Houpt
source("DBDA2E-utilities.R")
#===============================================================================
genMCMC = function( data , sName="s" , yName="y" ,
numSavedSteps=50000 , saveName=NULL , thinSteps=1 ) {
require(rstan)
#-----------------------------------------------------------------------------
# THE DATA.
# N.B.: This function expects the data to be a data frame,
# with one component named y being a vector of integer 0,1 values,
# and one component named s being a factor of subject identifiers.
y = as.numeric(data[,yName])
s = as.numeric(data[,sName]) # ensures consecutive integer levels
if ( any( y!=0 & y!=1 ) ) { stop("All y values must be 0 or 1.") }
z = aggregate( y , by=list(s) , FUN=sum )$x
N = aggregate( rep(1,length(y)) , by=list(s) , FUN=sum )$x
Nsubj = length(unique(s))
# Specify the data in a list, for later shipment to JAGS:
dataList = list(
z = z ,
N = N ,
n_subj = Nsubj
)
#-----------------------------------------------------------------------------
# INTIALIZE THE CHAINS.
# Initial values of MCMC chains based on data:
initsList = function() {
thetaInit = rep(0,Nsubj)
for ( sIdx in 1:Nsubj ) { # for each subject
includeRows = ( s == sIdx ) # identify rows of this subject
yThisSubj = y[includeRows] # extract data of this subject
resampledY = sample( yThisSubj , replace=TRUE ) # resample
thetaInit[sIdx] = sum(resampledY)/length(resampledY)
}
thetaInit = 0.001+0.998*thetaInit # keep away from 0,1
meanThetaInit = mean( thetaInit )
kappaInit = 100 # lazy, start high and let burn-in find better value
return( list( theta=thetaInit , omega=meanThetaInit ,
kappaMinusTwo=kappaInit-2 ) )
}
#-----------------------------------------------------------------------------
# RUN THE CHAINS
parameters = c( "theta","omega","kappa") # The parameters to be monitored
burnInSteps = 500 # Number of steps to burn-in the chains
nChains = 4 # nChains should be 2 or more for diagnostics
# Translate to C++ and compile to DSO:
stanDso <- stan_model( file="Ydich-XnomSsubj-MbinomBetaOmegaKappa.stan" )
# Get MC sample of posterior:
stanFit <- sampling( object=stanDso ,
data = dataList ,
#pars = parameters , # optional
chains = nChains ,
iter = ( ceiling(numSavedSteps/nChains)*thinSteps
+burnInSteps ) ,
warmup = burnInSteps ,
thin = thinSteps ,
init = initsList ) # optional
# Or, accomplish above in one "stan" command; note stanDso is not separate.
# For consistency with JAGS-oriented functions in DBDA2E collection,
# convert stan format to coda format:
codaSamples = mcmc.list( lapply( 1:ncol(stanFit) ,
function(x) { mcmc(as.array(stanFit)[,x,]) } ) )
# resulting codaSamples object has these indices:
# codaSamples[[ chainIdx ]][ stepIdx , paramIdx ]
if ( !is.null(saveName) ) {
save( codaSamples , file=paste(saveName,"Mcmc.Rdata",sep="") )
save( stanFit , file=paste(saveName,"StanFit.Rdata",sep="") )
save( stanDso , file=paste(saveName,"StanDso.Rdata",sep="") )
}
return( codaSamples )
} # end function
#===============================================================================
smryMCMC = function( codaSamples , compVal=0.5 , rope=NULL ,
diffIdVec=NULL , compValDiff=0.0 , ropeDiff=NULL ,
saveName=NULL ) {
mcmcMat = as.matrix(codaSamples,chains=TRUE)
Ntheta = length(grep("theta",colnames(mcmcMat)))
summaryInfo = NULL
rowIdx = 0
# overall omega:
summaryInfo = rbind( summaryInfo ,
summarizePost( mcmcMat[,"omega"] ,
compVal=compVal , ROPE=rope ) )
rowIdx = rowIdx+1
rownames(summaryInfo)[rowIdx] = "omega"
# kappa:
summaryInfo = rbind( summaryInfo ,
summarizePost( mcmcMat[,"kappa"] ,
compVal=NULL , ROPE=NULL ) )
rowIdx = rowIdx+1
rownames(summaryInfo)[rowIdx] = "kappa"
# individual theta's:
for ( tIdx in 1:Ntheta ) {
parName = paste0("theta[",tIdx,"]")
summaryInfo = rbind( summaryInfo ,
summarizePost( mcmcMat[,parName] , compVal=compVal , ROPE=rope ) )
rowIdx = rowIdx+1
rownames(summaryInfo)[rowIdx] = parName
}
# differences of individual theta's:
if ( !is.null(diffIdVec) ) {
Nidx = length(diffIdVec)
for ( t1Idx in 1:(Nidx-1) ) {
for ( t2Idx in (t1Idx+1):Nidx ) {
parName1 = paste0("theta[",diffIdVec[t1Idx],"]")
parName2 = paste0("theta[",diffIdVec[t2Idx],"]")
summaryInfo = rbind( summaryInfo ,
summarizePost( mcmcMat[,parName1]-mcmcMat[,parName2] ,
compVal=compValDiff , ROPE=ropeDiff ) )
rowIdx = rowIdx+1
rownames(summaryInfo)[rowIdx] = paste0(parName1,"-",parName2)
}
}
}
# save:
if ( !is.null(saveName) ) {
write.csv( summaryInfo , file=paste(saveName,"SummaryInfo.csv",sep="") )
}
show( summaryInfo )
return( summaryInfo )
}
#===============================================================================
plotMCMC = function( codaSamples , data , sName="s" , yName="y" ,
compVal=0.5 , rope=NULL ,
diffIdVec=NULL , compValDiff=0.0 , ropeDiff=NULL ,
saveName=NULL , saveType="jpg" ) {
#-----------------------------------------------------------------------------
# N.B.: This function expects the data to be a data frame,
# with one component named y being a vector of integer 0,1 values,
# and one component named s being a factor of subject identifiers.
y = data[,yName]
s = as.numeric(data[,sName]) # ensures consecutive integer levels
# Now plot the posterior:
mcmcMat = as.matrix(codaSamples,chains=TRUE)
chainLength = NROW( mcmcMat )
# Plot omega, kappa:
openGraph(width=3.5*2,height=3.0)
par( mfrow=c(1,2) )
par( mar=c(3.5,3,1,1) , mgp=c(2.0,0.7,0) )
postInfo = plotPost( mcmcMat[,"kappa"] , compVal=NULL , ROPE=NULL ,
xlab=bquote(kappa) , main="" ,
xlim=c( min(mcmcMat[,"kappa"]),
quantile(mcmcMat[,"kappa"],probs=c(0.990)) ) )
postInfo = plotPost( mcmcMat[,"omega"] , compVal=compVal , ROPE=rope ,
xlab=bquote(omega) , main="Group Mode" ,
xlim=quantile(mcmcMat[,"omega"],probs=c(0.005,0.995)) )
if ( !is.null(saveName) ) {
saveGraph( file=paste(saveName,"PostOmega",sep=""), type=saveType)
}
# Plot individual theta's and differences:
if ( !is.null(diffIdVec) ) {
Nidx = length(diffIdVec)
openGraph(width=2.5*Nidx,height=2.0*Nidx)
par( mfrow=c(Nidx,Nidx) )
for ( t1Idx in 1:Nidx ) {
for ( t2Idx in 1:Nidx ) {
parName1 = paste0("theta[",diffIdVec[t1Idx],"]")
parName2 = paste0("theta[",diffIdVec[t2Idx],"]")
if ( t1Idx > t2Idx) {
# plot.new() # empty plot, advance to next
par( mar=c(3.5,3.5,1,1) , mgp=c(2.0,0.7,0) , pty="s" )
nToPlot = 700
ptIdx = round(seq(1,chainLength,length=nToPlot))
plot ( mcmcMat[ptIdx,parName2] , mcmcMat[ptIdx,parName1] , cex.lab=1.75 ,
xlab=parName2 , ylab=parName1 , col="skyblue" )
abline(0,1,lty="dotted")
} else if ( t1Idx == t2Idx ) {
par( mar=c(3.5,1,1,1) , mgp=c(2.0,0.7,0) , pty="m" )
postInfo = plotPost( mcmcMat[,parName1] , cex.lab = 1.75 ,
compVal=compVal , ROPE=rope , cex.main=1.5 ,
xlab=parName1 , main="" )
includeRows = ( s == diffIdVec[t1Idx] ) # rows of this subject in data
dataPropor = sum(y[includeRows])/sum(includeRows)
points( dataPropor , 0 , pch="+" , col="red" , cex=3 )
} else if ( t1Idx < t2Idx ) {
par( mar=c(3.5,1,1,1) , mgp=c(2.0,0.7,0) , pty="m" )
postInfo = plotPost( mcmcMat[,parName1]-mcmcMat[,parName2] ,
compVal=compValDiff , ROPE=ropeDiff , cex.main=1.5 ,
xlab=paste0(parName1,"-",parName2) , main="" ,
cex.lab=1.75 )
includeRows1 = ( s == diffIdVec[t1Idx] ) # rows of this subject in data
dataPropor1 = sum(y[includeRows1])/sum(includeRows1)
includeRows2 = ( s == diffIdVec[t2Idx] ) # rows of this subject in data
dataPropor2 = sum(y[includeRows2])/sum(includeRows2)
points( dataPropor1-dataPropor2 , 0 , pch="+" , col="red" , cex=3 )
}
}
}
}
if ( !is.null(saveName) ) {
saveGraph( file=paste(saveName,"PostTheta",sep=""), type=saveType)
}
}
#===============================================================================