template stan

karchjd · Jan 24, 2018 · 7e8a7bf · 7e8a7bf
1 parent 07e2238
commit 7e8a7bf
Show file tree

Hide file tree

Showing 15 changed files with 246 additions and 109 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,4 @@
 inst/doc
 README.html
 .DS_Store
+NAMESPACE
diff --git a/NAMESPACE b/NAMESPACE
@@ -3,8 +3,10 @@
 export(generateAR)
 export(generateLGCM)
 export(gppFit)
-export(gppModel)
+export(gppModelOld)
 export(gppSetStart)
+export(parseModel)
 export(predictGPPM)
 export(simulateData)
 import(OpenMx)
+import(rstan)
diff --git a/R/GPPM.R b/R/GPPM.R
@@ -0,0 +1,46 @@
+new_GPPM <- function(mFormula,kFormula,myData){
+  stopifnot(is.character(mFormula))
+  stopifnot(is.character(kFormula))
+  stopifnot(is.data.frame(myData))
+
+  structure(list(
+    mFormula=mFormula, #formula for the mean
+    kFormula=kFormula, #formula for the covariance
+    data=myData,       #data must be a data frame
+    parsedModel=NA,    #model in a parsed format
+    dataForStan=NA,    #data as used for stan
+    stanCode=NA,       #generated stan code
+    stanOut =NA,       #stan output
+    fitRes=NA          #all the fitting results
+  ),class='GPPM')
+}
+
+
+
+parseModel <- function()
+
+
+convertData <- function()
+#' @export
+#' @import rstan
+gppModel <- function(meanFunction,covFunction,myData){
+  theModel <- new_GPPModel(meanFunction,covFunction,myData)
+
+  theModel$dataForStan <-
+
+  theModel$parsedModel <- parseModel(theModel$meanFunction,theModel$covFunction,theModel$data)
+
+  stuffForStan <- toStan(theModel$parsedModel,theModel$data)
+
+  theModel$stanCode <- stuffForStan$stanCode
+
+
+  theModel$stanOut <- fitStan(theModel$stanCode,theModel$dataForStan)
+
+  theModel$fitRes <- fromStan(theModel$stanOut)
+
+  return(theModel)
+}
+
+
+
diff --git a/R/ParsedModel.R b/R/ParsedModel.R
diff --git a/R/extractors.R b/R/extractors.R
diff --git a/R/gpStanModel.R b/R/gpStanModel.R
diff --git a/R/gppFit.R b/R/gppFit.R
@@ -1,6 +1,6 @@
 #' @export
 gppFit <- function(gpModel,...){
-  stopifnot(class(gpModel)=='GPPM')
+  stopifnot(class(gpModel)=='GPPMOld')
   #fit
   fittedModel <- gpModel
   fittedModel$omx <- mxModel(fittedModel$omx,mxCI(names(c(omxGetParameters(fittedModel$omx)))))

diff --git a/R/gppModel.R → R/gppModelOld.R b/R/gppModel.R → R/gppModelOld.R
@@ -107,7 +107,7 @@ generateCov <- function(sIndex,tIndex,covFunction,covVars){
 
 #' @export
 #' @import OpenMx
-gppModel <- function(meanFunction,covFunction,myData){
+gppModelOld <- function(meanFunction,covFunction,myData){
   ##create empty open mx model
   yColsOrig <- names(myData)[grep("Y[[:digit:]]+",names(myData))]
   yCols <- gsub('Y','',yColsOrig)
@@ -189,7 +189,7 @@ gppModel <- function(meanFunction,covFunction,myData){
   names(startParas) <- c(parsedModel$meanPars,parsedModel$covPars)
   gpModel <- list(omx=model,mf=meanFunction,cf=covFunction,data=myData,startParas=startParas,mlParas=startParas,mll=NULL,parsedModel=parsedModel,data=myData)
   gpModel$mlParas[1:length(gpModel$mlParas)] <-NA
-  class(gpModel) <- 'GPPM'
+  class(gpModel) <- 'GPPMOld'
   return(gpModel)
 }
 

diff --git a/R/stanInferface.R b/R/stanInferface.R
@@ -0,0 +1,11 @@
+toStan <- function(){
+
+}
+
+fitStan <- function(){
+
+}
+
+fromStan <- function(){
+
+}
diff --git a/R/stanTemplate.stan b/R/stanTemplate.stan
@@ -0,0 +1,33 @@
+data{
+  int<lower=1> nPer;
+  int<lower=1> nTime[nPer];
+  int<lower=1> maxTime;
+  int<lower=1> nPreds;
+  matrix[maxTime,nPreds] X[nPer];
+  matrix[nPer,maxTime] Y;
+}
+
+parameters{
+  real paras;
+}
+
+transformed parameters{
+  matrix[nPer,maxTime] mu;
+  matrix[maxTime,maxTime] Sigma[nPer];
+  matrix[maxTime,maxTime] cholSigma[nPer];
+  for (i in 1:nPer){
+    mu[i] = <meanfunction>;
+    for(j in 1:nTime[i]){
+      for(k in 1:nTime[i]){
+        Sigma[i,j,k] = <covfunction>;
+      }
+    }
+    L[i,1:nTime[i],1:nTime[i]] = cholesky_decompose(Sigma[i,1:nTime[i],1:nTime[i]]);
+  }
+}
+
+model{
+  for (i in 1:nPer){
+    Y[i,1:nTime[i]] ~  multi_normal_cholesky(mu[i,1:nTime[i]], L[i,1:nTime[i],1:nTime[i]]);
+  }
+}
diff --git a/demo/example2LGCM.R b/demo/example2LGCM.R
@@ -1,9 +1,24 @@
+convertFromWide <- function(myData){
+    res <- as.data.frame(matrix(NA,nrow=nrow(myData)*ncol(myData)/2,ncol=3))
+    names(res) <- c('ID','t','Y')
+    counter <- 1
+    for (i in 1:nrow(myData)){
+      for(j in 1:10){
+      res[counter,'ID'] <- i
+      res[counter,'t'] <- myData[i,paste0('t',j)]
+      res[counter,'Y'] <- myData[i,paste0('Y',j)]
+      counter <- counter + 1
+      }
+    }
+    return(res)
+}
+
 require(gppmr)
 require(OpenMx)
 require(MASS)
   ##settings
   nP <- 300
-  nT <- 20
+  nT <- 10
 
   ##define latent growth curve model using SEM software
   lgcModel <- generateLGCM(nT)
@@ -20,10 +35,15 @@ require(MASS)
   colnames(tMatrix) <- paste0('t',1:nT)
   myData <- as.data.frame(cbind(tMatrix,yMatrix))
 
-  # ##fit data using GPPM
-  gpModel <- gppModel('muI+muS*t','varI+covIS*(t+t!)+varS*t*t!+omxApproxEquals(t,t!,0.0001)*sigma',myData)
+  ##fit data using GPPMOld
+  gpModel <- gppModelOld('muI+muS*t','varI+covIS*(t+t!)+varS*t*t!+omxApproxEquals(t,t!,0.0001)*sigma',myData)
   gpModelFit <- gppFit(gpModel)
 
+  ##fit data using GPPMNew
+  longData <- convertFromWide(myData)
+  longData$IQ <- 0
+  gpModel <- gppModel('muI+IQ+muS*t','varI+covIS*(t+t!)+varS*t*t!+(t==t!)*sigma',longData)
+
   ##compare results
   lgcmSame <- all.equal(gpModelFit$mlParas,omxGetParameters(semModel)[names(gpModelFit$mlParas)],check.attributes=FALSE,tolerance=0.0001)
   message(sprintf('Estimated parameters for the LGCM model are the same: %s',lgcmSame))
diff --git a/demo/example2LGCMWStan.R b/demo/example2LGCMWStan.R
@@ -14,54 +14,93 @@ padLists <- function(Xlist,Ylist){
   return(list(Xlist=Xlist,Ylist=Ylist,measuresPerPerson=measuresPerPerson))
 }
 
-require(gppmr)
-require(OpenMx)
-require(MASS)
-require(rstan)
-require(tictoc)
-  ##settings
-  nP <- 300
-  nT <- 20
+extractStuff <- function(stanRes,name,index){
+  P <- stanRes$data$P[index]
+  switch(name,
+          Sigma={
+            res <- matrix(nrow=P,ncol=P)
+            for (i in 1:P){
+              for (j in 1:P){
 
-  ##define latent growth curve model using SEM software
-  lgcModel <- generateLGCM(nT)
-  trueModel <- omxSetParameters(lgcModel,labels=c('muI','muS','varI','varS','covIS','sigma'),values=c(10,3,4,10,0.5,10))
+              }
+            }
+          },
+         {
+           error()
+         }
+  )
+  return(res)
+}
+ require(gppmr)
+ require(OpenMx)
+ require(MASS)
+ require(rstan)
+ require(tictoc)
+   ##settings
+   nP <- 40
+   nT <- 15
 
-  ##simulate data
-  yMatrix <- simulateData(trueModel,N=nP)
-  tMatrix <- matrix(rep(0:(nT-1),each=nP),nrow = nP,ncol = nT)
-  yMatrix[rbinom(prod(dim(yMatrix)),1,0.2)] <- NA
+   trueVarI <- 4
+   ##define latent growth curve model using SEM software
+   lgcModel <- generateLGCM(nT)
+   trueModel <- omxSetParameters(lgcModel,labels=c('muI','muS','varI','varS','covIS','sigma'),values=c(10,3,trueVarI,10,0.5,10))
 
+   ##simulate data
+   nReps <- 100
+   stanIn <- rep(NA,nReps)
+   mxIn <- stanIn
+
+     yMatrix <- simulateData(trueModel,N=nP)
+     tMatrix <- matrix(rep(0:(nT-1),each=nP),nrow = nP,ncol = nT)
+     yMatrix[rbinom(prod(dim(yMatrix)),1,0.2)] <- NA
+
+
+     ##fit data using SEM
+     semModel <- mxModel(lgcModel,mxData(yMatrix,type = "raw"))
+     tic()
+     semModel <- mxRun(semModel,silent = TRUE)
+     toc()
+     colnames(tMatrix) <- paste0('t',1:nT)
+     myData <- as.data.frame(cbind(tMatrix,yMatrix))
+
+     # ##fit data using GPPM
+     gpModel <- gppModelOld('muI+muS*t','varI+covIS*(t+t!)+varS*t*t!+omxApproxEquals(t,t!,0.0001)*sigma',myData)
+     tic()
+     # gpModelFit <- gppFit(gpModel)
+     toc()
+     #fit data using stan
+     theModel <- stan_model(file = 'lgcm.stan')
+     Ylist <- list()
+     Xlist <- list()
+     for (i in 1:nrow(yMatrix)){
+       notIsNa <- !is.na(yMatrix[i,])
+       Ylist[[i]] <- yMatrix[i,notIsNa]
+       Xlist[[i]] <- tMatrix[i,notIsNa]
+     }
+     newLists <- padLists(Xlist,Ylist)
+     dataForStan <- list(X=newLists$Xlist,Y=newLists$Ylist,N=nrow(yMatrix),P=newLists$measuresPerPerson,maxP=max(newLists$measuresPerPerson))
+     tic()
+     stanRes <- optimizing(theModel,dataForStan,hessian = TRUE)
+     stanCov <- solve(-stanRes$hessian)
+     stanRes$data <- dataForStan
+     toc()
+     ##compare results
+     # lgcmSame <- all.equal(gpModelFit$mlParas,omxGetParameters(semModel)[names(gpModelFit$mlParas)],check.attributes=FALSE,tolerance=0.0001)
+     lgcmSame <- TRUE
+     omxParas <- omxGetParameters(semModel)
+     lgcmSame2 <- all.equal(omxParas,stanRes$par[names(omxParas)],check.attributes=FALSE,tolerance=0.01)
+     message(sprintf('Estimated parameters for the LGCM model are the same: %s',lgcmSame && lgcmSame2))
+
+     #variances
+
+      mxCov <- vcov(semModel)[rownames(sampCov),colnames(sampCov)]
+      stan196 <- 1.96*sqrt(stanCov['varI','varI'])
+      mx196 <- 1.96*sqrt(mxCov['varI','varI'])
+      mxML <- omxGetParameters(semModel)['varI']
+      stanML <- stanRes$par['varI']
+#
+      #is true parameter in confidence intervals?
+      stanIn[j] <- trueVarI > (stanML-stan196) & trueVarI < (stanML+stan196)
+      mxIn[j] <- trueVarI > (mxML-mx196) & trueVarI < (mxML+mx196)
 
-  ##fit data using SEM
-  semModel <- mxModel(lgcModel,mxData(yMatrix,type = "raw"))
-  tic()
-  semModel <- mxRun(semModel,silent = TRUE)
-  toc()
-  colnames(tMatrix) <- paste0('t',1:nT)
-  myData <- as.data.frame(cbind(tMatrix,yMatrix))
 
-  # ##fit data using GPPM
-  gpModel <- gppModel('muI+muS*t','varI+covIS*(t+t!)+varS*t*t!+omxApproxEquals(t,t!,0.0001)*sigma',myData)
-  tic()
-  gpModelFit <- gppFit(gpModel)
-  toc()
-  #fit data using stan
-  theModel <- stan_model(file = 'lgcm.stan')
-  Ylist <- list()
-  Xlist <- list()
-  for (i in 1:nrow(yMatrix)){
-    notIsNa <- !is.na(yMatrix[i,])
-    Ylist[[i]] <- yMatrix[i,notIsNa]
-    Xlist[[i]] <- tMatrix[i,notIsNa]
-  }
-  newLists <- padLists(Xlist,Ylist)
-  dataForStan <- list(X=newLists$Xlist,Y=newLists$Ylist,N=nrow(yMatrix),P=newLists$measuresPerPerson,maxP=max(newLists$measuresPerPerson))
-  tic()
-  stanRes <- optimizing(theModel,dataForStan)  
-  toc()
-
-  ##compare results
-  lgcmSame <- all.equal(gpModelFit$mlParas,omxGetParameters(semModel)[names(gpModelFit$mlParas)],check.attributes=FALSE,tolerance=0.0001)
-  lgcmSame2 <- all.equal(omxGetParameters(semModel)[names(gpModelFit$mlParas)],stanRes$par[names(gpModelFit$mlParas)],check.attributes=FALSE,tolerance=0.0001)
-  message(sprintf('Estimated parameters for the LGCM model are the same: %s',lgcmSame && lgcmSame2))
diff --git a/demo/example2LGCWStan.R b/demo/example2LGCWStan.R