diff --git a/Rmd/orsf_examples.Rmd b/Rmd/orsf_examples.Rmd
index 375e5985..27375573 100644
--- a/Rmd/orsf_examples.Rmd
+++ b/Rmd/orsf_examples.Rmd
@@ -51,7 +51,8 @@ The accelerated ORSF ensemble is the default because it has a nice balance of co
fit_accel <- orsf(pbc_orsf,
control = orsf_control_fast(),
- formula = Surv(time, status) ~ . - id)
+ formula = Surv(time, status) ~ . - id,
+ tree_seeds = 329)
```
@@ -63,7 +64,8 @@ fit_accel <- orsf(pbc_orsf,
fit_cph <- orsf(pbc_orsf,
control = orsf_control_cph(),
- formula = Surv(time, status) ~ . - id)
+ formula = Surv(time, status) ~ . - id,
+ tree_seeds = 329)
```
@@ -73,11 +75,12 @@ fit_cph <- orsf(pbc_orsf,
```{r}
+# select 3 predictors out of 5 to be used in
+# each linear combination of predictors.
fit_net <- orsf(pbc_orsf,
- # select 3 predictors out of 5 to be used in
- # each linear combination of predictors.
control = orsf_control_net(df_target = 3),
- formula = Surv(time, status) ~ . - id)
+ formula = Surv(time, status) ~ . - id,
+ tree_seeds = 329)
```
@@ -133,7 +136,14 @@ Let's make two customized functions to identify linear combinations of predictor
n_tree = 25,
importance = 'permute')
- out <- orsf_vi(fit)[colnames(x_node)]
+ out <- orsf_vi(fit)
+
+ # drop the least two important variables
+ n_vars <- length(out)
+ out[c(n_vars, n_vars-1)] <- 0
+
+ # ensure out has same variable order as input
+ out <- out[colnames(x_node)]
matrix(out, ncol = 1)
@@ -147,14 +157,17 @@ We can plug these functions into `orsf_control_custom()`, and then pass the resu
fit_rando <- orsf(pbc_orsf,
Surv(time, status) ~ . - id,
- control = orsf_control_custom(beta_fun = f_rando))
+ control = orsf_control_custom(beta_fun = f_rando),
+ tree_seeds = 329)
fit_pca <- orsf(pbc_orsf,
Surv(time, status) ~ . - id,
- control = orsf_control_custom(beta_fun = f_pca))
+ control = orsf_control_custom(beta_fun = f_pca),
+ tree_seeds = 329)
fit_rlt <- orsf(pbc_orsf, time + status ~ . - id,
- control = orsf_control_custom(beta_fun = f_aorsf))
+ control = orsf_control_custom(beta_fun = f_aorsf),
+ tree_seeds = 329)
```
@@ -193,9 +206,9 @@ sc$Brier$score[order(-IPA), .(model, times, IPA)]
From inspection,
-- the `glmnet` approach has the highest discrimination and index of prediction accuracy.
+- `net`, `accel`, and `rlt` have high discrimination and index of prediction accuracy.
-- the random coefficients don't do that well, but they aren't bad.
+- `rando` and `pca` do less well, but they aren't bad.
## tidymodels
diff --git a/man/orsf.Rd b/man/orsf.Rd
index fe59b988..d9c05477 100644
--- a/man/orsf.Rd
+++ b/man/orsf.Rd
@@ -383,7 +383,8 @@ function to find linear combinations of predictors.
\if{html}{\out{
}}\preformatted{fit_accel <- orsf(pbc_orsf,
control = orsf_control_fast(),
- formula = Surv(time, status) ~ . - id)
+ formula = Surv(time, status) ~ . - id,
+ tree_seeds = 329)
}\if{html}{\out{
}}
}
@@ -395,7 +396,8 @@ combinations of predictors:
\if{html}{\out{}}\preformatted{fit_cph <- orsf(pbc_orsf,
control = orsf_control_cph(),
- formula = Surv(time, status) ~ . - id)
+ formula = Surv(time, status) ~ . - id,
+ tree_seeds = 329)
}\if{html}{\out{
}}
}
@@ -407,11 +409,12 @@ linear combinations of predictors. This can be really helpful if you
want to do feature selection within the node, but it is a lot slower
than the other options.
-\if{html}{\out{}}\preformatted{fit_net <- orsf(pbc_orsf,
- # select 3 predictors out of 5 to be used in
- # each linear combination of predictors.
+\if{html}{\out{
}}\preformatted{# select 3 predictors out of 5 to be used in
+# each linear combination of predictors.
+fit_net <- orsf(pbc_orsf,
control = orsf_control_net(df_target = 3),
- formula = Surv(time, status) ~ . - id)
+ formula = Surv(time, status) ~ . - id,
+ tree_seeds = 329)
}\if{html}{\out{
}}
}
@@ -437,10 +440,10 @@ predictors.
\}
}\if{html}{\out{
}}
-\item The third uses \code{orsf()} inside of \code{orsf()} (aka reinforcement learning
-trees \link{RLTs}).
+\item The third uses \code{orsf()} inside of \code{orsf()}.
-\if{html}{\out{}}\preformatted{# some special care is taken to prevent your R session from crashing.
+\if{html}{\out{
}}\preformatted{# This approach is known as reinforcement learning trees.
+# some special care is taken to prevent your R session from crashing.
# Specifically, random coefficients are used when n_obs <= 10
# or n_events <= 5.
@@ -457,9 +460,16 @@ f_aorsf <- function(x_node, y_node, w_node)\{
fit <- orsf(data, time + status ~ .,
weights = as.numeric(w_node),
n_tree = 25,
- importance = 'anova')
+ importance = 'permute')
- out <- orsf_vi(fit)[colnames(x_node)]
+ out <- orsf_vi(fit)
+
+ # drop the least two important variables
+ n_vars <- length(out)
+ out[c(n_vars, n_vars-1)] <- 0
+
+ # ensure out has same variable order as input
+ out <- out[colnames(x_node)]
matrix(out, ncol = 1)
@@ -472,14 +482,17 @@ the result into \code{orsf()}:
\if{html}{\out{
}}\preformatted{fit_rando <- orsf(pbc_orsf,
Surv(time, status) ~ . - id,
- control = orsf_control_custom(beta_fun = f_rando))
+ control = orsf_control_custom(beta_fun = f_rando),
+ tree_seeds = 329)
fit_pca <- orsf(pbc_orsf,
Surv(time, status) ~ . - id,
- control = orsf_control_custom(beta_fun = f_pca))
+ control = orsf_control_custom(beta_fun = f_pca),
+ tree_seeds = 329)
fit_rlt <- orsf(pbc_orsf, time + status ~ . - id,
- control = orsf_control_custom(beta_fun = f_aorsf))
+ control = orsf_control_custom(beta_fun = f_aorsf),
+ tree_seeds = 329)
}\if{html}{\out{
}}
So which fit seems to work best in this example? Let’s find out by
@@ -532,9 +545,9 @@ And the indices of prediction accuracy:
From inspection,
\itemize{
-\item the \code{glmnet} approach has the highest discrimination and index of
+\item \code{net}, \code{accel}, and \code{rlt} have high discrimination and index of
prediction accuracy.
-\item the random coefficients don’t do that well, but they aren’t bad.
+\item \code{rando} and \code{pca} do less well, but they aren’t bad.
}
}
diff --git a/man/orsf_control_custom.Rd b/man/orsf_control_custom.Rd
index 794ce2a9..3bfb4de1 100644
--- a/man/orsf_control_custom.Rd
+++ b/man/orsf_control_custom.Rd
@@ -108,7 +108,12 @@ How well do our two customized ORSFs do? Let’s compute their indices of
prediction accuracy based on out-of-bag predictions:
\if{html}{\out{
}}\preformatted{library(riskRegression)
-library(survival)
+}\if{html}{\out{
}}
+
+\if{html}{\out{
}}\preformatted{## riskRegression version 2023.03.22
+}\if{html}{\out{
}}
+
+\if{html}{\out{
}}\preformatted{library(survival)
risk_preds <- list(rando = 1 - fit_rando$pred_oobag,
pca = 1 - fit_pca$pred_oobag)