2.13 - maintenance

- rgampois2 to match Stan's neg_binom_2() - ulam() with cmdstan=TRUE should not return unwanted variables anymore - ulam() with cmstan=TRUE should support warmup argument correctly now
rmcelreath · Aug 26, 2020 · 3b48ec8 · 3b48ec8
1 parent 50b82ec
commit 3b48ec8
Show file tree

Hide file tree

Showing 17 changed files with 89 additions and 27 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: rethinking
 Type: Package
 Title: Statistical Rethinking book package
-Version: 2.12
-Date: 2020-06-26
+Version: 2.13
+Date: 2020-08-26
 Author: Richard McElreath
 Maintainer: Richard McElreath <[email protected]>
 Imports: coda, MASS, mvtnorm, loo, shape

diff --git a/R/distributions.r b/R/distributions.r
@@ -179,6 +179,12 @@ rgampois <- function( n , mu , scale ) {
     rnbinom( n , size=shape , prob=prob )
 }
 
+rgampois2 <- function (n , mu , scale) {
+    p_p <- scale / (scale + mu)
+    p_n <- scale
+    rnbinom(n, size = p_n, prob = p_p)
+}
+
 # laplace (double exponential)
 dlaplace <- function(x,location=0,lambda=1,log=FALSE) {
     # f(y) = (1/(2b)) exp( -|y-a|/b )

diff --git a/R/sim.r b/R/sim.r
@@ -41,7 +41,11 @@ sim_core <- function( fit , data , post , vars , n , refresh=0 , replace=list()
         }
         # get simulation partner function
         rlik <- flik
-        if ( ll==FALSE ) substr( rlik , 1 , 1 ) <- "r"
+        if ( ll==FALSE ) {
+            substr( rlik , 1 , 1 ) <- "r"
+            # hook for gampois, which needs rgampois2
+            if ( rlik=="rgampois" ) rlik <- "rgampois2"
+        }
 
         # check for aggregated binomial, but only when ll==TRUE
         aggregated_binomial <- FALSE
@@ -295,6 +299,8 @@ function( fit , data , n=1000 , post , refresh=0.1 , replace=list() , ... ) {
     # get simulation partner function
     rlik <- flik
     substr( rlik , 1 , 1 ) <- "r"
+    # hook for gampois, which needs rgampois2
+    if ( rlik=="rgampois" ) rlik <- "rgampois2"
 
     # pull out parameters in likelihood
     pars <- vector(mode="list",length=length(lik[[3]])-1)

diff --git a/R/ulam-class.R b/R/ulam-class.R
@@ -17,6 +17,11 @@ setMethod("precis", "ulam",
 function( object , depth=1 , pars , prob=0.89 , digits=2 , sort=NULL , decreasing=FALSE , lp__=FALSE , omit=NULL ,... ) {
     low <- (1-prob)/2
     upp <- 1-low
+
+    # when fit with cmdstan, all parameters/variable in result
+    # so want to filter at minimum by object@pars
+    if ( missing(pars) ) pars <- object@pars
+
     result <- summary(object@stanfit,pars=pars,probs=c(low,upp))$summary[,c(1,3:7)]
     result <- as.data.frame( result )
 
@@ -42,11 +47,13 @@ function( object , depth=1 , pars , prob=0.89 , digits=2 , sort=NULL , decreasin
     return( new( "precis" , result , digits=digits ) )
 })
 
-
+# models fit with cmdstan=TRUE include all parameters/variables
+# so need to trim what is returned using object@pars
 setMethod("extract.samples","ulam",
-function(object,n,clean=TRUE,...) {
+function(object,n,clean=TRUE,pars,...) {
     #require(rstan)
-    p <- rstan::extract(object@stanfit,...)
+    if (missing(pars)) pars <- object@pars
+    p <- rstan::extract(object@stanfit,pars=pars,...)
     # get rid of dev and lp__
     if ( clean==TRUE ) {
         p[['dev']] <- NULL

diff --git a/R/ulam-function.R b/R/ulam-function.R
@@ -19,7 +19,7 @@ ulam_options <- new.env(parent=emptyenv())
 ulam_options$use_cmdstan <- FALSE
 set_ulam_cmdstan <- function(x=TRUE) assign( "use_cmdstan" , x , env=ulam_options )
 
-ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 , iter=1000 , control=list(adapt_delta=0.95) , distribution_library=ulam_dists , macro_library=ulam_macros , custom , constraints , declare_all_data=TRUE , log_lik=FALSE , sample=TRUE , messages=TRUE , pre_scan_data=TRUE , coerce_int=TRUE , sample_prior=FALSE , file=NULL , cmdstan=ulam_options$use_cmdstan , threads=1 , grain=1 , ... ) {
+ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 , iter=1000 , warmup , control=list(adapt_delta=0.95) , distribution_library=ulam_dists , macro_library=ulam_macros , custom , constraints , declare_all_data=TRUE , log_lik=FALSE , sample=TRUE , messages=TRUE , pre_scan_data=TRUE , coerce_int=TRUE , sample_prior=FALSE , file=NULL , cmdstan=ulam_options$use_cmdstan , threads=1 , grain=1 , ... ) {
 
     if ( !is.null(file) ) {
         rds_file_name <- concat( file , ".rds" )
@@ -33,6 +33,8 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 ,
     if ( !missing(data) )
         data <- as.list(data)
 
+    if ( missing(warmup) ) warmup <- floor(iter/2)
+
     # check for previous fit passed instead of formula
     prev_stanfit <- FALSE
     if ( class(flist)=="ulam" ) {
@@ -1403,7 +1405,7 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 ,
             if ( prev_stanfit==FALSE ) {
                 if ( cmdstan==FALSE )
                     # rstan interface
-                    stanfit <- stan( model_code = model_code , data = data , pars=use_pars , chains=chains , cores=cores , iter=iter , control=control , ... )
+                    stanfit <- stan( model_code = model_code , data = data , pars=use_pars , chains=chains , cores=cores , iter=iter , control=control , warmup=warmup , ... )
                 else {
                     # use cmdstanr interface
                     require( cmdstanr , quietly=TRUE )
@@ -1414,24 +1416,57 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 ,
                         compile=filex[[3]],
                         cpp_options=list(stan_threads=TRUE) )
                     # set_num_threads( threads )
-                    cmdstanfit <- mod$sample( data=data , chains=chains , parallel_chains=cores , iter_sampling=iter , adapt_delta=as.numeric(control[['adapt_delta']]) , threads_per_chain=threads , ... )
+                    # iter means only post-warmup samples for cmdstanr
+                    # so need to compute iter explicitly
+                    cmdstanfit <- mod$sample( 
+                        data=data , 
+                        chains=chains , parallel_chains=cores , 
+                        iter_warmup=warmup, iter_sampling=floor(iter-warmup) , 
+                        save_warmup=TRUE ,
+                        adapt_delta=as.numeric(control[['adapt_delta']]) ,
+                        threads_per_chain=threads , ... )
                     stanfit <- rstan::read_stan_csv(cmdstanfit$output_files())
                 }
-            } else
-                # rstan interface, previous stanfit object
-                stanfit <- stan( fit = prev_stanfit_object , data = data , pars=use_pars , 
-                         chains=chains , cores=cores , iter=iter , control=control , ... )
+            } else {
+                if ( cmdstan==FALSE ) {
+                    # rstan interface, previous stanfit object
+                    stanfit <- stan( fit = prev_stanfit_object , data = data , pars=use_pars , 
+                         chains=chains , cores=cores , iter=iter , control=control , warmup=warmup , ... )
+                } else {
+                    # SAME AS ABOVE FOR NOW - how to referece exe?
+                    # use cmdstanr interface
+                    require( cmdstanr , quietly=TRUE )
+                    filex <- cmdstanr_model_write( model_code )
+                    mod <- cmdstan_model(
+                        stan_file=filex[[1]],
+                      # exe_file=filex[[2]],
+                        compile=filex[[3]],
+                        cpp_options=list(stan_threads=TRUE) )
+                    # set_num_threads( threads )
+                    # iter means only post-warmup samples for cmdstanr
+                    # so need to compute iter explicitly
+                    cmdstanfit <- mod$sample( 
+                        data=data , 
+                        chains=chains , parallel_chains=cores , 
+                        iter_warmup=warmup, iter_sampling=floor(iter-warmup) , 
+                        save_warmup=TRUE ,
+                        adapt_delta=as.numeric(control[['adapt_delta']]) ,
+                        threads_per_chain=threads , ... )
+                    stanfit <- rstan::read_stan_csv(cmdstanfit$output_files())
+                }
+            }
         } else {
             # WITH explicit start list
+            # Still needs cmdstanr interface
             f_init <- "random"
             if ( class(start)=="list" ) f_init <- function() return(start)
             if ( class(start)=="function" ) f_init <- start
             if ( prev_stanfit==FALSE )
                 stanfit <- stan( model_code = model_code , data = data , pars=use_pars , 
-                         chains=chains , cores=cores , iter=iter , control=control , init=f_init , ... )
+                         chains=chains , cores=cores , iter=iter , control=control , init=f_init , warmup=warmup , ... )
             else
                 stanfit <- stan( fit = prev_stanfit_object , data = data , pars=use_pars , 
-                         chains=chains , cores=cores , iter=iter , control=control , init=f_init , ... )
+                         chains=chains , cores=cores , iter=iter , control=control , init=f_init , warmup=warmup , ... )
         }
     }
 

diff --git a/R/utilities.r b/R/utilities.r
@@ -276,6 +276,9 @@ var2 <- function( x , na.rm=TRUE ) {
     # use E(x^2) - E(x)^2 form
     mean(x^2) - mean(x)^2
 }
+# aliases
+sd_pop <- sd2
+var_pop <- var2
 
 # function for formatting summary table output in various show methods
 # x should be a list or data frame

diff --git a/README.md b/README.md
@@ -29,7 +29,7 @@ There are some advantages to accessing Stan through ``cmdstanr`` rather than rst
 ```
 devtools::install_github("stan-dev/cmdstanr")
 ```
-If you haven't installed cmdstan previously, you will also need to do that with ``install_cmdstan()``. Then you need to add ``cmdstan=TRUE`` to any ``ulam`` code to use cmdstan instead of rstan.
+If you haven't installed cmdstan previously, you will also need to do that with ``install_cmdstan()``. Then you need to add ``cmdstan=TRUE`` to any ``ulam`` code to use cmdstan instead of rstan. To use cmdstan as the default interface, do ``set_ulam_cmdstan(TRUE)``.
 
 Once rstan and cmdstan are installed (almost there), then you can install ``rethinking`` from within R using:
 ```
@@ -419,7 +419,7 @@ x <- rnorm(N)
 m <- 1 + rpois(N,2)
 y <- rbinom( N , size=m , prob=inv_logit(-3+x) )
 dat <- list( y=y , x=x , m=m )
-# single thread
+# two threads
 m1 <- ulam(
     alist(
         y ~ binomial_logit( m , logit_p ),

diff --git a/data/Kline.csv b/data/Kline.csv
diff --git a/data/elephants.csv b/data/elephants.csv
diff --git a/data/galapagos.csv b/data/galapagos.csv
diff --git a/data/homeworkch3.R b/data/homeworkch3.R
diff --git a/data/islands.csv b/data/islands.csv
diff --git a/data/rugged.csv b/data/rugged.csv
diff --git a/data/salamanders.csv b/data/salamanders.csv
diff --git a/data/tulips.csv b/data/tulips.csv
diff --git a/tests/book_chapters/test_chapter13.R b/tests/book_chapters/test_chapter13.R
@@ -8,6 +8,7 @@ expect_equiv_eps <- function( x , y , eps=0.01 ) {
     expect_equivalent( x , y , tolerance=eps )
 }
 
+# set_ulam_cmdstan(TRUE)
 
 ## R code 13.1
 library(rethinking)

diff --git a/tests/rethinking_tests/test_ulam1.R b/tests/rethinking_tests/test_ulam1.R
@@ -8,6 +8,8 @@ expect_equiv_eps <- function( x , y , eps=0.01 ) {
     expect_equivalent( x , y , tolerance=eps )
 }
 
+set_ulam_cmdstan(TRUE)
+
 # student_t
 
 data(Howell1)
@@ -23,7 +25,7 @@ mt <- ulam(
     b ~ normal( 0 , 1 ) ,
     sigma ~ exponential( 1 ),
     nu ~ gamma(2, 0.1)
-  ) , data=d2 , sample=TRUE , refresh=-1 , seed=1 )
+  ) , data=d2 , sample=TRUE , seed=1 )
 
 test_that("Student_t regression code match",
     expect_known_hash( mt@model , "98532fc851" )
@@ -94,6 +96,7 @@ UCBadmit$male <- as.integer( ifelse( UCBadmit$applicant.gender=="male" , 1 , 0 )
 UCBadmit$dept <- rep( 1:6 , each=2 )
 UCBadmit$ag <- UCBadmit$applicant.gender
 UCBadmit$applicant.gender <- NULL
+UCBadmit$reject <- NULL
 
 test_that("quap to ulam binomial",{
     z <- quap(
@@ -103,7 +106,7 @@ test_that("quap to ulam binomial",{
             a ~ dnorm(0,4)
         ), data=UCBadmit )
     zz <- ulam( z , sample=FALSE )
-    expect_known_hash( zz$model , "93f10cbc50" )
+    expect_known_hash( zz$model , "e1f6f2c9a3" )
 })
 
 test_that("ulam constraints",{
@@ -115,7 +118,7 @@ test_that("ulam constraints",{
         ),
         constraints=list(a="lower=0"),
         data=UCBadmit , sample=FALSE )
-    expect_known_hash( z$model , "ea06a8ca36" )
+    expect_known_hash( z$model , "608f2ad66b" )
 })
 
 test_that("ulam binomial log_lik",{
@@ -126,7 +129,7 @@ test_that("ulam binomial log_lik",{
             a ~ normal(0,1),
             b ~ normal(0,0.5)
         ), data=UCBadmit , sample=TRUE , log_lik=TRUE )
-    expect_known_hash( z@model , "097208dacb" )
+    expect_known_hash( z@model , "2add972dae" )
     expect_equivalent( length(WAIC(z)) , 4 )
 })
 
@@ -160,7 +163,7 @@ test_that("ulam continuous missing data 1",{
             b ~ normal(0,1),
             bx ~ normal(0,1)
         ), data=UCBadmit , sample=TRUE , log_lik=FALSE )
-    expect_known_hash( z2b2@model , "ed1532565d" )
+    expect_known_hash( z2b2@model , "6300da1946" )
     expect_equivalent( dim(precis(z2b2,2)) , c(5,6) )
 })
 
@@ -177,7 +180,7 @@ test_that("ulam continuous missing data 2",{
             bx ~ normal(0,1)
         ),
         data=UCBadmit , sample=TRUE , log_lik=TRUE )
-    expect_known_hash( z2b_auto@model , "0656b0bec9" )
+    expect_known_hash( z2b_auto@model , "12312ebec6" )
     expect_equivalent( dim(precis(z2b_auto,2)) , c(5,6) )
 })
 
@@ -197,7 +200,7 @@ test_that("ulam continuous missing data 3",{
             b ~ normal(0,1),
             c(bx,bx2) ~ normal(0,1)
         ), data=UCBadmit , sample=FALSE )
-    expect_known_hash( z2c$model , "db529f4ba3" )
+    expect_known_hash( z2c$model , "111badfdd3" )
 })
 
 
@@ -210,6 +213,7 @@ UCBadmit$applicant.gender <- NULL
 UCBadmit$male2 <- UCBadmit$male
 UCBadmit$male2[1:2] <- (-9) # missingness code
 UCBadmit$male2 <- as.integer(UCBadmit$male2)
+UCBadmit$reject <- NULL
 
 test_that("ulam discrete missing 1",{
     z <- ulam(
@@ -227,7 +231,7 @@ test_that("ulam discrete missing 1",{
             a[dept] ~ normal(0,1),
             b ~ normal(0,1)
         ), data=UCBadmit , sample=TRUE , log_lik=FALSE )
-    expect_known_hash( z@model , "db97f3f2de" )
+    expect_known_hash( z@model , "aed7571a87" )
 })
 
 # same but with custom()
@@ -247,7 +251,7 @@ test_that("ulam discrete missing 2",{
             a[dept] ~ normal(0,4),
             b ~ normal(0,1)
         ), data=UCBadmit , sample=FALSE , log_lik=FALSE )
-    expect_known_hash( z2d$model , "6ca98ae43e" )
+    expect_known_hash( z2d$model , "73cd87c7f2" )
 })
 
 # log_sum_exp form
@@ -267,6 +271,6 @@ test_that("ulam discrete missing 3",{
             a ~ normal(0,4),
             b ~ normal(0,1)
         ), data=UCBadmit , sample=FALSE )
-    expect_known_hash( z2e$model , "eded9b0b6e" )
+    expect_known_hash( z2e$model , "dfca821d69" )
 })