From b53b67d378332c8007f7bc63f3186c98aa145ae1 Mon Sep 17 00:00:00 2001 From: bcjaeger Date: Sat, 27 Apr 2024 11:24:21 -0400 Subject: [PATCH] update docs --- R/coerce_nans.R | 6 +- R/orsf_R6.R | 2 + R/orsf_predict.R | 5 + man/orsf.Rd | 216 +++++++++++++++-------------- man/orsf_control.Rd | 62 ++++----- man/orsf_ice_oob.Rd | 260 ++++++++++++++++++----------------- man/orsf_pd_oob.Rd | 195 +++++++++++++------------- man/orsf_vi.Rd | 212 +++++++++++++++------------- man/orsf_vs.Rd | 8 ++ man/predict.ObliqueForest.Rd | 150 +++++++++++--------- 10 files changed, 599 insertions(+), 517 deletions(-) diff --git a/R/coerce_nans.R b/R/coerce_nans.R index dbed7a3d..79e9cec3 100644 --- a/R/coerce_nans.R +++ b/R/coerce_nans.R @@ -8,7 +8,11 @@ coerce_nans.list <- function(x, to){ } -coerce_nans.array <- coerce_nans.matrix <- function(x, to){ +coerce_nans.factor <- + coerce_nans.integer <- + coerce_nans.double <- + coerce_nans.array <- + coerce_nans.matrix <- function(x, to){ if(any(is.nan(x))){ x[is.nan(x)] <- to diff --git a/R/orsf_R6.R b/R/orsf_R6.R index e7ef971f..b19e44d0 100644 --- a/R/orsf_R6.R +++ b/R/orsf_R6.R @@ -441,6 +441,8 @@ ObliqueForest <- R6::R6Class( # object and then use this function. We need checks for that case. new_data <- new_data %||% self$data + # browser() + # run checks before you assign new values to object. # otherwise, if a check throws an error, the object will # not be restored to its normal state. diff --git a/R/orsf_predict.R b/R/orsf_predict.R index e25b8d00..9617c73e 100644 --- a/R/orsf_predict.R +++ b/R/orsf_predict.R @@ -50,6 +50,11 @@ #' is `'mort'` for survival or `'class'` for classification, or an array of #' matrices if `length(pred_horizon) > 1`. #' +#' @param oobag (_logical_) If `FALSE` (the default), predictions will +#' be computed using all trees for each observation. If `TRUE`, then +#' out-of-bag predictions will be computed. This input parameter should +#' only be set to `TRUE` if `new_data` is `NULL`. +#' #' @param na_action `r roxy_na_action_header("new_data")` #' #' - `r roxy_na_action_fail("new_data")` diff --git a/man/orsf.Rd b/man/orsf.Rd index 975523b8..5466277d 100644 --- a/man/orsf.Rd +++ b/man/orsf.Rd @@ -364,11 +364,6 @@ data that were not used to train it, i.e., testing data. \section{Examples}{ \if{html}{\out{
}}\preformatted{library(aorsf) library(magrittr) # for \%>\% -#> -#> Attaching package: 'magrittr' -#> The following objects are masked from 'package:testthat': -#> -#> equals, is_less_than, not }\if{html}{\out{
}} \code{orsf()} is the entry-point of the \code{aorsf} package. It can be used to @@ -383,21 +378,23 @@ penguin_fit <- orsf(data = penguins_orsf, formula = species ~ .) penguin_fit -#> ---------- Oblique random classification forest -#> -#> Linear combinations: Accelerated Logistic regression -#> N observations: 333 -#> N classes: 3 -#> N trees: 5 -#> N predictors total: 7 -#> N predictors per node: 3 -#> Average leaves per tree: 4.8 -#> Min observations in leaf: 5 -#> OOB stat value: 0.98 -#> OOB stat type: AUC-ROC -#> Variable importance: anova -#> -#> ----------------------------------------- +}\if{html}{\out{}} + +\if{html}{\out{
}}\preformatted{## ---------- Oblique random classification forest +## +## Linear combinations: Accelerated Logistic regression +## N observations: 333 +## N classes: 3 +## N trees: 5 +## N predictors total: 7 +## N predictors per node: 3 +## Average leaves per tree: 6.4 +## Min observations in leaf: 5 +## OOB stat value: 0.98 +## OOB stat type: AUC-ROC +## Variable importance: anova +## +## ----------------------------------------- }\if{html}{\out{
}} For regression, we use the same data but predict bill length of @@ -409,20 +406,22 @@ bill_fit <- orsf(data = penguins_orsf, formula = bill_length_mm ~ .) bill_fit -#> ---------- Oblique random regression forest -#> -#> Linear combinations: Accelerated Linear regression -#> N observations: 333 -#> N trees: 5 -#> N predictors total: 7 -#> N predictors per node: 3 -#> Average leaves per tree: 50.4 -#> Min observations in leaf: 5 -#> OOB stat value: 0.74 -#> OOB stat type: RSQ -#> Variable importance: anova -#> -#> ----------------------------------------- +}\if{html}{\out{}} + +\if{html}{\out{
}}\preformatted{## ---------- Oblique random regression forest +## +## Linear combinations: Accelerated Linear regression +## N observations: 333 +## N trees: 5 +## N predictors total: 7 +## N predictors per node: 3 +## Average leaves per tree: 49.2 +## Min observations in leaf: 5 +## OOB stat value: 0.75 +## OOB stat type: RSQ +## Variable importance: anova +## +## ----------------------------------------- }\if{html}{\out{
}} My personal favorite is the oblique survival RF with accelerated Cox @@ -438,22 +437,24 @@ pbc_fit <- orsf(data = pbc_orsf, formula = Surv(time, status) ~ . - id) pbc_fit -#> ---------- Oblique random survival forest -#> -#> Linear combinations: Accelerated Cox regression -#> N observations: 276 -#> N events: 111 -#> N trees: 5 -#> N predictors total: 17 -#> N predictors per node: 5 -#> Average leaves per tree: 19.6 -#> Min observations in leaf: 5 -#> Min events in leaf: 1 -#> OOB stat value: 0.78 -#> OOB stat type: Harrell's C-index -#> Variable importance: anova -#> -#> ----------------------------------------- +}\if{html}{\out{}} + +\if{html}{\out{
}}\preformatted{## ---------- Oblique random survival forest +## +## Linear combinations: Accelerated Cox regression +## N observations: 276 +## N events: 111 +## N trees: 5 +## N predictors total: 17 +## N predictors per node: 5 +## Average leaves per tree: 19.4 +## Min observations in leaf: 5 +## Min events in leaf: 1 +## OOB stat value: 0.77 +## OOB stat type: Harrell's C-index +## Variable importance: anova +## +## ----------------------------------------- }\if{html}{\out{
}} \subsection{More than one way to grow a forest}{ @@ -465,22 +466,24 @@ forest instead of a fitted forest. no_fit = TRUE) orsf_spec -#> Untrained oblique random survival forest -#> -#> Linear combinations: Accelerated Cox regression -#> N observations: 276 -#> N events: 111 -#> N trees: 500 -#> N predictors total: 17 -#> N predictors per node: 5 -#> Average leaves per tree: 0 -#> Min observations in leaf: 5 -#> Min events in leaf: 1 -#> OOB stat value: none -#> OOB stat type: Harrell's C-index -#> Variable importance: anova -#> -#> ----------------------------------------- +}\if{html}{\out{}} + +\if{html}{\out{
}}\preformatted{## Untrained oblique random survival forest +## +## Linear combinations: Accelerated Cox regression +## N observations: 276 +## N events: 111 +## N trees: 500 +## N predictors total: 17 +## N predictors per node: 5 +## Average leaves per tree: 0 +## Min observations in leaf: 5 +## Min events in leaf: 1 +## OOB stat value: none +## OOB stat type: Harrell's C-index +## Variable importance: anova +## +## ----------------------------------------- }\if{html}{\out{
}} Why would you do this? Two reasons: @@ -489,28 +492,38 @@ Why would you do this? Two reasons: take to fit the forest before you commit to it: } -\if{html}{\out{
}}\preformatted{ -orsf_spec \%>\% +\if{html}{\out{
}}\preformatted{orsf_spec \%>\% orsf_update(n_tree = 10000) \%>\% orsf_time_to_train() -#> Time difference of 2.029378 secs +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Time difference of 2.32199 secs }\if{html}{\out{
}} \enumerate{ \item If fitting multiple forests, use the blueprint along with \code{orsf_train()} and \code{orsf_update()} to simplify your code: } -\if{html}{\out{
}}\preformatted{ -orsf_fit <- orsf_train(orsf_spec) +\if{html}{\out{
}}\preformatted{orsf_fit <- orsf_train(orsf_spec) orsf_fit_10 <- orsf_update(orsf_fit, leaf_min_obs = 10) orsf_fit_20 <- orsf_update(orsf_fit, leaf_min_obs = 20) orsf_fit$leaf_min_obs -#> [1] 5 -orsf_fit_10$leaf_min_obs -#> [1] 10 -orsf_fit_20$leaf_min_obs -#> [1] 20 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [1] 5 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{orsf_fit_10$leaf_min_obs +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [1] 10 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{orsf_fit_20$leaf_min_obs +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [1] 20 }\if{html}{\out{
}} } @@ -518,8 +531,7 @@ orsf_fit_20$leaf_min_obs \code{tidymodels} includes support for \code{aorsf} as a computational engine: -\if{html}{\out{
}}\preformatted{ -library(tidymodels) +\if{html}{\out{
}}\preformatted{library(tidymodels) library(censored) library(yardstick) @@ -541,8 +553,7 @@ orsf_fit <- fit(orsf_spec, Prediction with \code{aorsf} models at different times is also supported: -\if{html}{\out{
}}\preformatted{ -time_points <- seq(500, 3000, by = 500) +\if{html}{\out{
}}\preformatted{time_points <- seq(500, 3000, by = 500) test_pred <- augment(orsf_fit, new_data = testing(split), @@ -552,29 +563,34 @@ brier_scores <- test_pred \%>\% brier_survival(truth = event_time, .pred) brier_scores -#> # A tibble: 6 x 4 -#> .metric .estimator .eval_time .estimate -#> -#> 1 brier_survival standard 500 0.0515 -#> 2 brier_survival standard 1000 0.0988 -#> 3 brier_survival standard 1500 0.0830 -#> 4 brier_survival standard 2000 0.0667 -#> 5 brier_survival standard 2500 0.102 -#> 6 brier_survival standard 3000 0.141 - -roc_scores <- test_pred \%>\% +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## # A tibble: 6 x 4 +## .metric .estimator .eval_time .estimate +## +## 1 brier_survival standard 500 0.0661 +## 2 brier_survival standard 1000 0.0999 +## 3 brier_survival standard 1500 0.110 +## 4 brier_survival standard 2000 0.0789 +## 5 brier_survival standard 2500 0.127 +## 6 brier_survival standard 3000 0.194 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{roc_scores <- test_pred \%>\% roc_auc_survival(truth = event_time, .pred) roc_scores -#> # A tibble: 6 x 4 -#> .metric .estimator .eval_time .estimate -#> -#> 1 roc_auc_survival standard 500 0.977 -#> 2 roc_auc_survival standard 1000 0.954 -#> 3 roc_auc_survival standard 1500 0.965 -#> 4 roc_auc_survival standard 2000 0.975 -#> 5 roc_auc_survival standard 2500 0.974 -#> 6 roc_auc_survival standard 3000 0.955 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## # A tibble: 6 x 4 +## .metric .estimator .eval_time .estimate +## +## 1 roc_auc_survival standard 500 0.941 +## 2 roc_auc_survival standard 1000 0.920 +## 3 roc_auc_survival standard 1500 0.925 +## 4 roc_auc_survival standard 2000 0.967 +## 5 roc_auc_survival standard 2500 0.937 +## 6 roc_auc_survival standard 3000 0.889 }\if{html}{\out{
}} } } diff --git a/man/orsf_control.Rd b/man/orsf_control.Rd index 2e4f13b7..1ada468f 100644 --- a/man/orsf_control.Rd +++ b/man/orsf_control.Rd @@ -155,8 +155,7 @@ balance of computational speed and prediction accuracy. It runs a single iteration of Newton Raphson scoring on the Cox partial likelihood function to find linear combinations of predictors. -\if{html}{\out{
}}\preformatted{ -fit_accel <- orsf(pbc_orsf, +\if{html}{\out{
}}\preformatted{fit_accel <- orsf(pbc_orsf, control = orsf_control_survival(), formula = Surv(time, status) ~ . - id, tree_seeds = 329) @@ -170,8 +169,7 @@ repeat iterations until convergence allows you to run Cox regression in each non-terminal node of each survival tree, using the regression coefficients to create linear combinations of predictors: -\if{html}{\out{
}}\preformatted{ -control_cph <- orsf_control_survival(method = 'glm', +\if{html}{\out{
}}\preformatted{control_cph <- orsf_control_survival(method = 'glm', scale_x = TRUE, max_iter = 20) @@ -189,8 +187,7 @@ non-terminal node of each survival tree. This can be really helpful if you want to do feature selection within the node, but it is a lot slower than the \code{'glm'} option. -\if{html}{\out{
}}\preformatted{ -# select 3 predictors out of 5 to be used in +\if{html}{\out{
}}\preformatted{# select 3 predictors out of 5 to be used in # each linear combination of predictors. control_net <- orsf_control_survival(method = 'net', target_df = 3) @@ -211,8 +208,7 @@ here. \item The first uses random coefficients } -\if{html}{\out{
}}\preformatted{ -f_rando <- function(x_node, y_node, w_node)\{ +\if{html}{\out{
}}\preformatted{f_rando <- function(x_node, y_node, w_node)\{ matrix(runif(ncol(x_node)), ncol=1) \} }\if{html}{\out{
}} @@ -220,8 +216,7 @@ f_rando <- function(x_node, y_node, w_node)\{ \item The second derives coefficients from principal component analysis } -\if{html}{\out{
}}\preformatted{ -f_pca <- function(x_node, y_node, w_node) \{ +\if{html}{\out{
}}\preformatted{f_pca <- function(x_node, y_node, w_node) \{ # estimate two principal components. pca <- stats::prcomp(x_node, rank. = 2) @@ -237,8 +232,7 @@ similar to a method known as reinforcement learning trees (see the to the method proposed by Zhu et al.  } -\if{html}{\out{
}}\preformatted{ -f_rlt <- function(x_node, y_node, w_node)\{ +\if{html}{\out{
}}\preformatted{f_rlt <- function(x_node, y_node, w_node)\{ colnames(y_node) <- c('time', 'status') colnames(x_node) <- paste("x", seq(ncol(x_node)), sep = '') @@ -277,8 +271,7 @@ f_rlt <- function(x_node, y_node, w_node)\{ We can plug these functions into \code{orsf_control_custom()}, and then pass the result into \code{orsf()}: -\if{html}{\out{
}}\preformatted{ -fit_rando <- orsf(pbc_orsf, +\if{html}{\out{
}}\preformatted{fit_rando <- orsf(pbc_orsf, Surv(time, status) ~ . - id, control = orsf_control_survival(method = f_rando), tree_seeds = 329) @@ -296,8 +289,7 @@ fit_rlt <- orsf(pbc_orsf, time + status ~ . - id, So which fit seems to work best in this example? Let’s find out by evaluating the out-of-bag survival predictions. -\if{html}{\out{
}}\preformatted{ -risk_preds <- list( +\if{html}{\out{
}}\preformatted{risk_preds <- list( accel = fit_accel$pred_oobag, cph = fit_cph$pred_oobag, net = fit_net$pred_oobag, @@ -316,28 +308,32 @@ sc <- Score(object = risk_preds, The AUC values, from highest to lowest: \if{html}{\out{
}}\preformatted{sc$AUC$score[order(-AUC)] -#> model times AUC se lower upper -#> -#> 1: net 1788 0.9151649 0.02025057 0.8754745 0.9548553 -#> 2: rlt 1788 0.9119200 0.02090107 0.8709547 0.9528854 -#> 3: accel 1788 0.9095628 0.02143250 0.8675558 0.9515697 -#> 4: cph 1788 0.9095628 0.02143250 0.8675558 0.9515697 -#> 5: rando 1788 0.9062197 0.02148854 0.8641029 0.9483365 -#> 6: pca 1788 0.8999479 0.02226683 0.8563057 0.9435901 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## model times AUC se lower upper +## +## 1: net 1788 0.9151649 0.02025057 0.8754745 0.9548553 +## 2: rlt 1788 0.9119200 0.02090107 0.8709547 0.9528854 +## 3: accel 1788 0.9095628 0.02143250 0.8675558 0.9515697 +## 4: cph 1788 0.9095628 0.02143250 0.8675558 0.9515697 +## 5: rando 1788 0.9062197 0.02148854 0.8641029 0.9483365 +## 6: pca 1788 0.8999479 0.02226683 0.8563057 0.9435901 }\if{html}{\out{
}} And the indices of prediction accuracy: \if{html}{\out{
}}\preformatted{sc$Brier$score[order(-IPA), .(model, times, IPA)] -#> model times IPA -#> -#> 1: net 1788 0.4905777 -#> 2: accel 1788 0.4806649 -#> 3: cph 1788 0.4806649 -#> 4: rlt 1788 0.4675228 -#> 5: pca 1788 0.4383995 -#> 6: rando 1788 0.4302814 -#> 7: Null model 1788 0.0000000 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## model times IPA +## +## 1: net 1788 0.4905777 +## 2: accel 1788 0.4806649 +## 3: cph 1788 0.4806649 +## 4: rlt 1788 0.4675228 +## 5: pca 1788 0.4383995 +## 6: rando 1788 0.4302814 +## 7: Null model 1788 0.0000000 }\if{html}{\out{
}} From inspection, diff --git a/man/orsf_ice_oob.Rd b/man/orsf_ice_oob.Rd index e07309a8..96e74f3b 100644 --- a/man/orsf_ice_oob.Rd +++ b/man/orsf_ice_oob.Rd @@ -154,8 +154,7 @@ model’s reliability or fairness. Begin by fitting an oblique classification random forest: -\if{html}{\out{
}}\preformatted{ -set.seed(329) +\if{html}{\out{
}}\preformatted{set.seed(329) index_train <- sample(nrow(penguins_orsf), 150) @@ -169,26 +168,27 @@ fit_clsf <- orsf(data = penguins_orsf_train, Compute individual conditional expectation using out-of-bag data for \code{flipper_length_mm = c(190, 210)}. -\if{html}{\out{
}}\preformatted{ -pred_spec <- list(flipper_length_mm = c(190, 210)) +\if{html}{\out{
}}\preformatted{pred_spec <- list(flipper_length_mm = c(190, 210)) ice_oob <- orsf_ice_oob(fit_clsf, pred_spec = pred_spec) ice_oob -#> Key: -#> id_variable id_row class flipper_length_mm pred -#> -#> 1: 1 1 Adelie 190 0.92169247 -#> 2: 1 2 Adelie 190 0.80944657 -#> 3: 1 3 Adelie 190 0.85172955 -#> 4: 1 4 Adelie 190 0.93559327 -#> 5: 1 5 Adelie 190 0.97708693 -#> --- -#> 896: 2 146 Gentoo 210 0.26092984 -#> 897: 2 147 Gentoo 210 0.04798334 -#> 898: 2 148 Gentoo 210 0.07927359 -#> 899: 2 149 Gentoo 210 0.84779971 -#> 900: 2 150 Gentoo 210 0.11105143 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Key: +## id_variable id_row class flipper_length_mm pred +## +## 1: 1 1 Adelie 190 0.92169247 +## 2: 1 2 Adelie 190 0.80944657 +## 3: 1 3 Adelie 190 0.85172955 +## 4: 1 4 Adelie 190 0.93559327 +## 5: 1 5 Adelie 190 0.97708693 +## --- +## 896: 2 146 Gentoo 210 0.26092984 +## 897: 2 147 Gentoo 210 0.04798334 +## 898: 2 148 Gentoo 210 0.07927359 +## 899: 2 149 Gentoo 210 0.84779971 +## 900: 2 150 Gentoo 210 0.11105143 }\if{html}{\out{
}} There are two identifiers in the output: @@ -203,13 +203,14 @@ Note that predicted probabilities are returned for each class and each observation in the data. Predicted probabilities for a given observation and given variable value sum to 1. For example, -\if{html}{\out{
}}\preformatted{ -ice_oob \%>\% +\if{html}{\out{
}}\preformatted{ice_oob \%>\% .[flipper_length_mm == 190] \%>\% .[id_row == 1] \%>\% .[['pred']] \%>\% sum() -#> [1] 1 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [1] 1 }\if{html}{\out{
}} } @@ -217,8 +218,7 @@ ice_oob \%>\% Begin by fitting an oblique regression random forest: -\if{html}{\out{
}}\preformatted{ -set.seed(329) +\if{html}{\out{
}}\preformatted{set.seed(329) index_train <- sample(nrow(penguins_orsf), 150) @@ -232,85 +232,87 @@ fit_regr <- orsf(data = penguins_orsf_train, Compute individual conditional expectation using new data for \code{flipper_length_mm = c(190, 210)}. -\if{html}{\out{
}}\preformatted{ -pred_spec <- list(flipper_length_mm = c(190, 210)) +\if{html}{\out{
}}\preformatted{pred_spec <- list(flipper_length_mm = c(190, 210)) ice_new <- orsf_ice_new(fit_regr, pred_spec = pred_spec, new_data = penguins_orsf_test) ice_new -#> id_variable id_row flipper_length_mm pred -#> -#> 1: 1 1 190 37.94483 -#> 2: 1 2 190 37.61595 -#> 3: 1 3 190 37.53681 -#> 4: 1 4 190 39.49476 -#> 5: 1 5 190 38.95635 -#> --- -#> 362: 2 179 210 51.80471 -#> 363: 2 180 210 47.27183 -#> 364: 2 181 210 47.05031 -#> 365: 2 182 210 50.39028 -#> 366: 2 183 210 48.44774 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## id_variable id_row flipper_length_mm pred +## +## 1: 1 1 190 37.94483 +## 2: 1 2 190 37.61595 +## 3: 1 3 190 37.53681 +## 4: 1 4 190 39.49476 +## 5: 1 5 190 38.95635 +## --- +## 362: 2 179 210 51.80471 +## 363: 2 180 210 47.27183 +## 364: 2 181 210 47.05031 +## 365: 2 182 210 50.39028 +## 366: 2 183 210 48.44774 }\if{html}{\out{
}} You can also let \code{pred_spec_auto} pick reasonable values like so: -\if{html}{\out{
}}\preformatted{ -pred_spec = pred_spec_auto(species, island, body_mass_g) +\if{html}{\out{
}}\preformatted{pred_spec = pred_spec_auto(species, island, body_mass_g) ice_new <- orsf_ice_new(fit_regr, pred_spec = pred_spec, new_data = penguins_orsf_test) ice_new -#> id_variable id_row species island body_mass_g pred -#> -#> 1: 1 1 Adelie Biscoe 3200 37.78339 -#> 2: 1 2 Adelie Biscoe 3200 37.73273 -#> 3: 1 3 Adelie Biscoe 3200 37.71248 -#> 4: 1 4 Adelie Biscoe 3200 40.25782 -#> 5: 1 5 Adelie Biscoe 3200 40.04074 -#> --- -#> 8231: 45 179 Gentoo Torgersen 5300 46.14559 -#> 8232: 45 180 Gentoo Torgersen 5300 43.98050 -#> 8233: 45 181 Gentoo Torgersen 5300 44.59837 -#> 8234: 45 182 Gentoo Torgersen 5300 44.85146 -#> 8235: 45 183 Gentoo Torgersen 5300 44.23710 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## id_variable id_row species island body_mass_g pred +## +## 1: 1 1 Adelie Biscoe 3200 37.78339 +## 2: 1 2 Adelie Biscoe 3200 37.73273 +## 3: 1 3 Adelie Biscoe 3200 37.71248 +## 4: 1 4 Adelie Biscoe 3200 40.25782 +## 5: 1 5 Adelie Biscoe 3200 40.04074 +## --- +## 8231: 45 179 Gentoo Torgersen 5300 46.14559 +## 8232: 45 180 Gentoo Torgersen 5300 43.98050 +## 8233: 45 181 Gentoo Torgersen 5300 44.59837 +## 8234: 45 182 Gentoo Torgersen 5300 44.85146 +## 8235: 45 183 Gentoo Torgersen 5300 44.23710 }\if{html}{\out{
}} By default, all combinations of all variables are used. However, you can also look at the variables one by one, separately, like so: -\if{html}{\out{
}}\preformatted{ -ice_new <- orsf_ice_new(fit_regr, +\if{html}{\out{
}}\preformatted{ice_new <- orsf_ice_new(fit_regr, expand_grid = FALSE, pred_spec = pred_spec, new_data = penguins_orsf_test) ice_new -#> id_variable id_row variable value level pred -#> -#> 1: 1 1 species NA Adelie 37.74136 -#> 2: 1 2 species NA Adelie 37.42367 -#> 3: 1 3 species NA Adelie 37.04598 -#> 4: 1 4 species NA Adelie 39.89602 -#> 5: 1 5 species NA Adelie 39.14848 -#> --- -#> 2009: 5 179 body_mass_g 5300 51.50196 -#> 2010: 5 180 body_mass_g 5300 47.27055 -#> 2011: 5 181 body_mass_g 5300 48.34064 -#> 2012: 5 182 body_mass_g 5300 48.75828 -#> 2013: 5 183 body_mass_g 5300 48.11020 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## id_variable id_row variable value level pred +## +## 1: 1 1 species NA Adelie 37.74136 +## 2: 1 2 species NA Adelie 37.42367 +## 3: 1 3 species NA Adelie 37.04598 +## 4: 1 4 species NA Adelie 39.89602 +## 5: 1 5 species NA Adelie 39.14848 +## --- +## 2009: 5 179 body_mass_g 5300 51.50196 +## 2010: 5 180 body_mass_g 5300 47.27055 +## 2011: 5 181 body_mass_g 5300 48.34064 +## 2012: 5 182 body_mass_g 5300 48.75828 +## 2013: 5 183 body_mass_g 5300 48.11020 }\if{html}{\out{
}} And you can also bypass all the bells and whistles by using your own \code{data.frame} for a \code{pred_spec}. (Just make sure you request values that exist in the training data.) -\if{html}{\out{
}}\preformatted{ -custom_pred_spec <- data.frame(species = 'Adelie', +\if{html}{\out{
}}\preformatted{custom_pred_spec <- data.frame(species = 'Adelie', island = 'Biscoe') ice_new <- orsf_ice_new(fit_regr, @@ -318,19 +320,21 @@ ice_new <- orsf_ice_new(fit_regr, new_data = penguins_orsf_test) ice_new -#> id_variable id_row species island pred -#> -#> 1: 1 1 Adelie Biscoe 38.52327 -#> 2: 1 2 Adelie Biscoe 38.32073 -#> 3: 1 3 Adelie Biscoe 37.71248 -#> 4: 1 4 Adelie Biscoe 41.68380 -#> 5: 1 5 Adelie Biscoe 40.91140 -#> --- -#> 179: 1 179 Adelie Biscoe 43.09493 -#> 180: 1 180 Adelie Biscoe 38.79455 -#> 181: 1 181 Adelie Biscoe 39.37734 -#> 182: 1 182 Adelie Biscoe 40.71952 -#> 183: 1 183 Adelie Biscoe 39.34501 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## id_variable id_row species island pred +## +## 1: 1 1 Adelie Biscoe 38.52327 +## 2: 1 2 Adelie Biscoe 38.32073 +## 3: 1 3 Adelie Biscoe 37.71248 +## 4: 1 4 Adelie Biscoe 41.68380 +## 5: 1 5 Adelie Biscoe 40.91140 +## --- +## 179: 1 179 Adelie Biscoe 43.09493 +## 180: 1 180 Adelie Biscoe 38.79455 +## 181: 1 181 Adelie Biscoe 39.37734 +## 182: 1 182 Adelie Biscoe 40.71952 +## 183: 1 183 Adelie Biscoe 39.34501 }\if{html}{\out{
}} } @@ -338,8 +342,7 @@ ice_new Begin by fitting an oblique survival random forest: -\if{html}{\out{
}}\preformatted{ -set.seed(329) +\if{html}{\out{
}}\preformatted{set.seed(329) index_train <- sample(nrow(pbc_orsf), 150) @@ -356,19 +359,21 @@ Compute individual conditional expectation using in-bag data for \if{html}{\out{
}}\preformatted{ice_train <- orsf_ice_inb(fit_surv, pred_spec = list(bili = 1:5)) ice_train -#> id_variable id_row pred_horizon bili pred -#> -#> 1: 1 1 1826.25 1 0.1290317 -#> 2: 1 2 1826.25 1 0.1242352 -#> 3: 1 3 1826.25 1 0.0963452 -#> 4: 1 4 1826.25 1 0.1172367 -#> 5: 1 5 1826.25 1 0.2030256 -#> --- -#> 746: 5 146 1826.25 5 0.7868537 -#> 747: 5 147 1826.25 5 0.2012954 -#> 748: 5 148 1826.25 5 0.4893605 -#> 749: 5 149 1826.25 5 0.4698220 -#> 750: 5 150 1826.25 5 0.9557285 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## id_variable id_row pred_horizon bili pred +## +## 1: 1 1 1826.25 1 0.1290317 +## 2: 1 2 1826.25 1 0.1242352 +## 3: 1 3 1826.25 1 0.0963452 +## 4: 1 4 1826.25 1 0.1172367 +## 5: 1 5 1826.25 1 0.2030256 +## --- +## 746: 5 146 1826.25 5 0.7868537 +## 747: 5 147 1826.25 5 0.2012954 +## 748: 5 148 1826.25 5 0.4893605 +## 749: 5 149 1826.25 5 0.4698220 +## 750: 5 150 1826.25 5 0.9557285 }\if{html}{\out{
}} If you don’t have specific values of a variable in mind, let @@ -376,41 +381,44 @@ If you don’t have specific values of a variable in mind, let \if{html}{\out{
}}\preformatted{ice_train <- orsf_ice_inb(fit_surv, pred_spec_auto(bili)) ice_train -#> id_variable id_row pred_horizon bili pred -#> -#> 1: 1 1 1826.25 0.55 0.11728559 -#> 2: 1 2 1826.25 0.55 0.11728839 -#> 3: 1 3 1826.25 0.55 0.08950739 -#> 4: 1 4 1826.25 0.55 0.10064959 -#> 5: 1 5 1826.25 0.55 0.18736417 -#> --- -#> 746: 5 146 1826.25 7.25 0.82600898 -#> 747: 5 147 1826.25 7.25 0.29156437 -#> 748: 5 148 1826.25 7.25 0.58395919 -#> 749: 5 149 1826.25 7.25 0.54202021 -#> 750: 5 150 1826.25 7.25 0.96391985 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## id_variable id_row pred_horizon bili pred +## +## 1: 1 1 1826.25 0.55 0.11728559 +## 2: 1 2 1826.25 0.55 0.11728839 +## 3: 1 3 1826.25 0.55 0.08950739 +## 4: 1 4 1826.25 0.55 0.10064959 +## 5: 1 5 1826.25 0.55 0.18736417 +## --- +## 746: 5 146 1826.25 7.25 0.82600898 +## 747: 5 147 1826.25 7.25 0.29156437 +## 748: 5 148 1826.25 7.25 0.58395919 +## 749: 5 149 1826.25 7.25 0.54202021 +## 750: 5 150 1826.25 7.25 0.96391985 }\if{html}{\out{
}} Specify \code{pred_horizon} to get individual conditional expectation at each value: -\if{html}{\out{
}}\preformatted{ -ice_train <- orsf_ice_inb(fit_surv, pred_spec_auto(bili), +\if{html}{\out{
}}\preformatted{ice_train <- orsf_ice_inb(fit_surv, pred_spec_auto(bili), pred_horizon = seq(500, 3000, by = 500)) ice_train -#> id_variable id_row pred_horizon bili pred -#> -#> 1: 1 1 500 0.55 0.008276627 -#> 2: 1 1 1000 0.55 0.055724516 -#> 3: 1 1 1500 0.55 0.085091120 -#> 4: 1 1 2000 0.55 0.123423352 -#> 5: 1 1 2500 0.55 0.166380739 -#> --- -#> 4496: 5 150 1000 7.25 0.837774757 -#> 4497: 5 150 1500 7.25 0.934536379 -#> 4498: 5 150 2000 7.25 0.967823286 -#> 4499: 5 150 2500 7.25 0.972059574 -#> 4500: 5 150 3000 7.25 0.980785643 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## id_variable id_row pred_horizon bili pred +## +## 1: 1 1 500 0.55 0.008276627 +## 2: 1 1 1000 0.55 0.055724516 +## 3: 1 1 1500 0.55 0.085091120 +## 4: 1 1 2000 0.55 0.123423352 +## 5: 1 1 2500 0.55 0.166380739 +## --- +## 4496: 5 150 1000 7.25 0.837774757 +## 4497: 5 150 1500 7.25 0.934536379 +## 4498: 5 150 2000 7.25 0.967823286 +## 4499: 5 150 2500 7.25 0.972059574 +## 4500: 5 150 3000 7.25 0.980785643 }\if{html}{\out{
}} Multi-prediction horizon ice comes with minimal extra computational diff --git a/man/orsf_pd_oob.Rd b/man/orsf_pd_oob.Rd index 3a91e91c..7e550962 100644 --- a/man/orsf_pd_oob.Rd +++ b/man/orsf_pd_oob.Rd @@ -174,8 +174,7 @@ fairness. Begin by fitting an oblique classification random forest: -\if{html}{\out{
}}\preformatted{ -set.seed(329) +\if{html}{\out{
}}\preformatted{set.seed(329) index_train <- sample(nrow(penguins_orsf), 150) @@ -189,21 +188,22 @@ fit_clsf <- orsf(data = penguins_orsf_train, Compute partial dependence using out-of-bag data for \code{flipper_length_mm = c(190, 210)}. -\if{html}{\out{
}}\preformatted{ -pred_spec <- list(flipper_length_mm = c(190, 210)) +\if{html}{\out{
}}\preformatted{pred_spec <- list(flipper_length_mm = c(190, 210)) pd_oob <- orsf_pd_oob(fit_clsf, pred_spec = pred_spec) pd_oob -#> Key: -#> class flipper_length_mm mean lwr medn upr -#> -#> 1: Adelie 190 0.6176908 0.202278109 0.75856417 0.9810614 -#> 2: Adelie 210 0.4338528 0.019173811 0.56489202 0.8648110 -#> 3: Chinstrap 190 0.2114979 0.017643385 0.15211271 0.7215181 -#> 4: Chinstrap 210 0.1803019 0.020108201 0.09679464 0.7035053 -#> 5: Gentoo 190 0.1708113 0.001334861 0.02769695 0.5750201 -#> 6: Gentoo 210 0.3858453 0.068685035 0.20717073 0.9532853 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Key: +## class flipper_length_mm mean lwr medn upr +## +## 1: Adelie 190 0.6176908 0.202278109 0.75856417 0.9810614 +## 2: Adelie 210 0.4338528 0.019173811 0.56489202 0.8648110 +## 3: Chinstrap 190 0.2114979 0.017643385 0.15211271 0.7215181 +## 4: Chinstrap 210 0.1803019 0.020108201 0.09679464 0.7035053 +## 5: Gentoo 190 0.1708113 0.001334861 0.02769695 0.5750201 +## 6: Gentoo 210 0.3858453 0.068685035 0.20717073 0.9532853 }\if{html}{\out{
}} Note that predicted probabilities are returned for each class and @@ -211,16 +211,18 @@ probabilities in the \code{mean} column sum to 1 if you take the sum over each class at a specific value of the \code{pred_spec} variables. For example, -\if{html}{\out{
}}\preformatted{ -sum(pd_oob[flipper_length_mm == 190, mean]) -#> [1] 1 +\if{html}{\out{
}}\preformatted{sum(pd_oob[flipper_length_mm == 190, mean]) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [1] 1 }\if{html}{\out{
}} But this isn’t the case for the median predicted probability! -\if{html}{\out{
}}\preformatted{ -sum(pd_oob[flipper_length_mm == 190, medn]) -#> [1] 0.9383738 +\if{html}{\out{
}}\preformatted{sum(pd_oob[flipper_length_mm == 190, medn]) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [1] 0.9383738 }\if{html}{\out{
}} } @@ -228,8 +230,7 @@ sum(pd_oob[flipper_length_mm == 190, medn]) Begin by fitting an oblique regression random forest: -\if{html}{\out{
}}\preformatted{ -set.seed(329) +\if{html}{\out{
}}\preformatted{set.seed(329) index_train <- sample(nrow(penguins_orsf), 150) @@ -243,76 +244,78 @@ fit_regr <- orsf(data = penguins_orsf_train, Compute partial dependence using new data for \code{flipper_length_mm = c(190, 210)}. -\if{html}{\out{
}}\preformatted{ -pred_spec <- list(flipper_length_mm = c(190, 210)) +\if{html}{\out{
}}\preformatted{pred_spec <- list(flipper_length_mm = c(190, 210)) pd_new <- orsf_pd_new(fit_regr, pred_spec = pred_spec, new_data = penguins_orsf_test) pd_new -#> flipper_length_mm mean lwr medn upr -#> -#> 1: 190 42.96571 37.09805 43.69769 48.72301 -#> 2: 210 45.66012 40.50693 46.31577 51.65163 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## flipper_length_mm mean lwr medn upr +## +## 1: 190 42.96571 37.09805 43.69769 48.72301 +## 2: 210 45.66012 40.50693 46.31577 51.65163 }\if{html}{\out{
}} You can also let \code{pred_spec_auto} pick reasonable values like so: -\if{html}{\out{
}}\preformatted{ -pred_spec = pred_spec_auto(species, island, body_mass_g) +\if{html}{\out{
}}\preformatted{pred_spec = pred_spec_auto(species, island, body_mass_g) pd_new <- orsf_pd_new(fit_regr, pred_spec = pred_spec, new_data = penguins_orsf_test) pd_new -#> species island body_mass_g mean lwr medn upr -#> -#> 1: Adelie Biscoe 3200 40.31374 37.24373 40.31967 44.22824 -#> 2: Chinstrap Biscoe 3200 45.10582 42.63342 45.10859 47.60119 -#> 3: Gentoo Biscoe 3200 42.81649 40.19221 42.55664 46.84035 -#> 4: Adelie Dream 3200 40.16219 36.95895 40.34633 43.90681 -#> 5: Chinstrap Dream 3200 46.21778 43.53954 45.90929 49.19173 -#> --- -#> 41: Chinstrap Dream 5300 48.48139 46.36282 48.25679 51.02996 -#> 42: Gentoo Dream 5300 45.91819 43.62832 45.54110 49.91622 -#> 43: Adelie Torgersen 5300 42.92879 40.66576 42.31072 46.76406 -#> 44: Chinstrap Torgersen 5300 46.59576 44.80400 46.49196 49.03906 -#> 45: Gentoo Torgersen 5300 45.11384 42.95190 44.51289 49.27629 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## species island body_mass_g mean lwr medn upr +## +## 1: Adelie Biscoe 3200 40.31374 37.24373 40.31967 44.22824 +## 2: Chinstrap Biscoe 3200 45.10582 42.63342 45.10859 47.60119 +## 3: Gentoo Biscoe 3200 42.81649 40.19221 42.55664 46.84035 +## 4: Adelie Dream 3200 40.16219 36.95895 40.34633 43.90681 +## 5: Chinstrap Dream 3200 46.21778 43.53954 45.90929 49.19173 +## --- +## 41: Chinstrap Dream 5300 48.48139 46.36282 48.25679 51.02996 +## 42: Gentoo Dream 5300 45.91819 43.62832 45.54110 49.91622 +## 43: Adelie Torgersen 5300 42.92879 40.66576 42.31072 46.76406 +## 44: Chinstrap Torgersen 5300 46.59576 44.80400 46.49196 49.03906 +## 45: Gentoo Torgersen 5300 45.11384 42.95190 44.51289 49.27629 }\if{html}{\out{
}} By default, all combinations of all variables are used. However, you can also look at the variables one by one, separately, like so: -\if{html}{\out{
}}\preformatted{ -pd_new <- orsf_pd_new(fit_regr, +\if{html}{\out{
}}\preformatted{pd_new <- orsf_pd_new(fit_regr, expand_grid = FALSE, pred_spec = pred_spec, new_data = penguins_orsf_test) pd_new -#> variable value level mean lwr medn upr -#> -#> 1: species NA Adelie 41.90271 37.10417 41.51723 48.51478 -#> 2: species NA Chinstrap 47.11314 42.40419 46.96478 51.51392 -#> 3: species NA Gentoo 44.37038 39.87306 43.89889 51.21635 -#> 4: island NA Biscoe 44.21332 37.22711 45.27862 51.21635 -#> 5: island NA Dream 44.43354 37.01471 45.57261 51.51392 -#> 6: island NA Torgersen 43.29539 37.01513 44.26924 49.84391 -#> 7: body_mass_g 3200 42.84625 37.03978 43.95991 49.19173 -#> 8: body_mass_g 3550 43.53326 37.56730 44.43756 50.47092 -#> 9: body_mass_g 3975 44.30431 38.31567 45.22089 51.50683 -#> 10: body_mass_g 4700 45.22559 39.88199 46.34680 51.18955 -#> 11: body_mass_g 5300 45.91412 40.84742 46.95327 51.48851 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## variable value level mean lwr medn upr +## +## 1: species NA Adelie 41.90271 37.10417 41.51723 48.51478 +## 2: species NA Chinstrap 47.11314 42.40419 46.96478 51.51392 +## 3: species NA Gentoo 44.37038 39.87306 43.89889 51.21635 +## 4: island NA Biscoe 44.21332 37.22711 45.27862 51.21635 +## 5: island NA Dream 44.43354 37.01471 45.57261 51.51392 +## 6: island NA Torgersen 43.29539 37.01513 44.26924 49.84391 +## 7: body_mass_g 3200 42.84625 37.03978 43.95991 49.19173 +## 8: body_mass_g 3550 43.53326 37.56730 44.43756 50.47092 +## 9: body_mass_g 3975 44.30431 38.31567 45.22089 51.50683 +## 10: body_mass_g 4700 45.22559 39.88199 46.34680 51.18955 +## 11: body_mass_g 5300 45.91412 40.84742 46.95327 51.48851 }\if{html}{\out{
}} And you can also bypass all the bells and whistles by using your own \code{data.frame} for a \code{pred_spec}. (Just make sure you request values that exist in the training data.) -\if{html}{\out{
}}\preformatted{ -custom_pred_spec <- data.frame(species = 'Adelie', +\if{html}{\out{
}}\preformatted{custom_pred_spec <- data.frame(species = 'Adelie', island = 'Biscoe') pd_new <- orsf_pd_new(fit_regr, @@ -320,9 +323,11 @@ pd_new <- orsf_pd_new(fit_regr, new_data = penguins_orsf_test) pd_new -#> species island mean lwr medn upr -#> -#> 1: Adelie Biscoe 41.98024 37.22711 41.65252 48.51478 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## species island mean lwr medn upr +## +## 1: Adelie Biscoe 41.98024 37.22711 41.65252 48.51478 }\if{html}{\out{
}} } @@ -330,8 +335,7 @@ pd_new Begin by fitting an oblique survival random forest: -\if{html}{\out{
}}\preformatted{ -set.seed(329) +\if{html}{\out{
}}\preformatted{set.seed(329) index_train <- sample(nrow(pbc_orsf), 150) @@ -347,13 +351,15 @@ Compute partial dependence using in-bag data for \code{bili = c(1,2,3,4,5)}: \if{html}{\out{
}}\preformatted{pd_train <- orsf_pd_inb(fit_surv, pred_spec = list(bili = 1:5)) pd_train -#> pred_horizon bili mean lwr medn upr -#> -#> 1: 1826.25 1 0.2566200 0.02234786 0.1334170 0.8918909 -#> 2: 1826.25 2 0.3121392 0.06853733 0.1896849 0.9204338 -#> 3: 1826.25 3 0.3703242 0.11409793 0.2578505 0.9416791 -#> 4: 1826.25 4 0.4240692 0.15645214 0.3331057 0.9591581 -#> 5: 1826.25 5 0.4663670 0.20123406 0.3841700 0.9655296 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## pred_horizon bili mean lwr medn upr +## +## 1: 1826.25 1 0.2566200 0.02234786 0.1334170 0.8918909 +## 2: 1826.25 2 0.3121392 0.06853733 0.1896849 0.9204338 +## 3: 1826.25 3 0.3703242 0.11409793 0.2578505 0.9416791 +## 4: 1826.25 4 0.4240692 0.15645214 0.3331057 0.9591581 +## 5: 1826.25 5 0.4663670 0.20123406 0.3841700 0.9655296 }\if{html}{\out{
}} If you don’t have specific values of a variable in mind, let @@ -361,34 +367,37 @@ If you don’t have specific values of a variable in mind, let \if{html}{\out{
}}\preformatted{pd_train <- orsf_pd_inb(fit_surv, pred_spec_auto(bili)) pd_train -#> pred_horizon bili mean lwr medn upr -#> -#> 1: 1826.25 0.55 0.2481444 0.02035041 0.1242215 0.8801444 -#> 2: 1826.25 0.70 0.2502831 0.02045039 0.1271039 0.8836536 -#> 3: 1826.25 1.50 0.2797763 0.03964900 0.1601715 0.9041584 -#> 4: 1826.25 3.50 0.3959349 0.13431288 0.2920400 0.9501230 -#> 5: 1826.25 7.25 0.5351935 0.28064629 0.4652185 0.9783000 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## pred_horizon bili mean lwr medn upr +## +## 1: 1826.25 0.55 0.2481444 0.02035041 0.1242215 0.8801444 +## 2: 1826.25 0.70 0.2502831 0.02045039 0.1271039 0.8836536 +## 3: 1826.25 1.50 0.2797763 0.03964900 0.1601715 0.9041584 +## 4: 1826.25 3.50 0.3959349 0.13431288 0.2920400 0.9501230 +## 5: 1826.25 7.25 0.5351935 0.28064629 0.4652185 0.9783000 }\if{html}{\out{
}} Specify \code{pred_horizon} to get partial dependence at each value: -\if{html}{\out{
}}\preformatted{ -pd_train <- orsf_pd_inb(fit_surv, pred_spec_auto(bili), +\if{html}{\out{
}}\preformatted{pd_train <- orsf_pd_inb(fit_surv, pred_spec_auto(bili), pred_horizon = seq(500, 3000, by = 500)) pd_train -#> pred_horizon bili mean lwr medn upr -#> -#> 1: 500 0.55 0.0617199 0.000443399 0.00865419 0.5907104 -#> 2: 1000 0.55 0.1418501 0.005793742 0.05572853 0.7360749 -#> 3: 1500 0.55 0.2082505 0.013609478 0.09174558 0.8556319 -#> 4: 2000 0.55 0.2679017 0.023047689 0.14574169 0.8910549 -#> 5: 2500 0.55 0.3179617 0.063797305 0.20254500 0.9017710 -#> --- -#> 26: 1000 7.25 0.3264627 0.135343689 0.25956791 0.8884333 -#> 27: 1500 7.25 0.4641265 0.218208755 0.38787435 0.9702903 -#> 28: 2000 7.25 0.5511761 0.293367409 0.48427730 0.9812413 -#> 29: 2500 7.25 0.6200238 0.371965247 0.56954399 0.9845058 -#> 30: 3000 7.25 0.6803482 0.425128031 0.64642318 0.9888637 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## pred_horizon bili mean lwr medn upr +## +## 1: 500 0.55 0.0617199 0.000443399 0.00865419 0.5907104 +## 2: 1000 0.55 0.1418501 0.005793742 0.05572853 0.7360749 +## 3: 1500 0.55 0.2082505 0.013609478 0.09174558 0.8556319 +## 4: 2000 0.55 0.2679017 0.023047689 0.14574169 0.8910549 +## 5: 2500 0.55 0.3179617 0.063797305 0.20254500 0.9017710 +## --- +## 26: 1000 7.25 0.3264627 0.135343689 0.25956791 0.8884333 +## 27: 1500 7.25 0.4641265 0.218208755 0.38787435 0.9702903 +## 28: 2000 7.25 0.5511761 0.293367409 0.48427730 0.9812413 +## 29: 2500 7.25 0.6200238 0.371965247 0.56954399 0.9845058 +## 30: 3000 7.25 0.6803482 0.425128031 0.64642318 0.9888637 }\if{html}{\out{
}} vector-valued \code{pred_horizon} input comes with minimal extra diff --git a/man/orsf_vi.Rd b/man/orsf_vi.Rd index 684ee443..a0f5afb1 100644 --- a/man/orsf_vi.Rd +++ b/man/orsf_vi.Rd @@ -124,26 +124,27 @@ or not (see examples). The default variable importance technique, ANOVA, is calculated while you fit an oblique random forest ensemble. -\if{html}{\out{
}}\preformatted{ -fit <- orsf(pbc_orsf, Surv(time, status) ~ . - id) +\if{html}{\out{
}}\preformatted{fit <- orsf(pbc_orsf, Surv(time, status) ~ . - id) fit -#> ---------- Oblique random survival forest -#> -#> Linear combinations: Accelerated Cox regression -#> N observations: 276 -#> N events: 111 -#> N trees: 500 -#> N predictors total: 17 -#> N predictors per node: 5 -#> Average leaves per tree: 21.022 -#> Min observations in leaf: 5 -#> Min events in leaf: 1 -#> OOB stat value: 0.84 -#> OOB stat type: Harrell's C-index -#> Variable importance: anova -#> -#> ----------------------------------------- +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## ---------- Oblique random survival forest +## +## Linear combinations: Accelerated Cox regression +## N observations: 276 +## N events: 111 +## N trees: 500 +## N predictors total: 17 +## N predictors per node: 5 +## Average leaves per tree: 21.022 +## Min observations in leaf: 5 +## Min events in leaf: 1 +## OOB stat value: 0.84 +## OOB stat type: Harrell's C-index +## Variable importance: anova +## +## ----------------------------------------- }\if{html}{\out{
}} ANOVA is the default because it is fast, but it may not be as decisive @@ -154,27 +155,28 @@ as the permutation and negation techniques for variable selection. the ‘raw’ variable importance values can be accessed from the fit object -\if{html}{\out{
}}\preformatted{ -fit$get_importance_raw() -#> [,1] -#> trt_placebo 0.06355042 -#> age 0.23259259 -#> sex_f 0.14700432 -#> ascites_1 0.46791708 -#> hepato_1 0.14349776 -#> spiders_1 0.17371938 -#> edema_0.5 0.17459191 -#> edema_1 0.51197605 -#> bili 0.40590758 -#> chol 0.17666667 -#> albumin 0.25972156 -#> copper 0.28840580 -#> alk.phos 0.10614251 -#> ast 0.18327491 -#> trig 0.12815626 -#> platelet 0.09265648 -#> protime 0.22656250 -#> stage 0.20264766 +\if{html}{\out{
}}\preformatted{fit$get_importance_raw() +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [,1] +## trt_placebo 0.06355042 +## age 0.23259259 +## sex_f 0.14700432 +## ascites_1 0.46791708 +## hepato_1 0.14349776 +## spiders_1 0.17371938 +## edema_0.5 0.17459191 +## edema_1 0.51197605 +## bili 0.40590758 +## chol 0.17666667 +## albumin 0.25972156 +## copper 0.28840580 +## alk.phos 0.10614251 +## ast 0.18327491 +## trig 0.12815626 +## platelet 0.09265648 +## protime 0.22656250 +## stage 0.20264766 }\if{html}{\out{
}} these are ‘raw’ because values for factors have not been aggregated into @@ -192,22 +194,26 @@ To get aggregated values across all levels of each factor, \if{html}{\out{
}}\preformatted{# this assumes you used group_factors = TRUE in orsf() fit$importance -#> ascites bili edema copper albumin age protime -#> 0.46791708 0.40590758 0.31115216 0.28840580 0.25972156 0.23259259 0.22656250 -#> stage ast chol spiders sex hepato trig -#> 0.20264766 0.18327491 0.17666667 0.17371938 0.14700432 0.14349776 0.12815626 -#> alk.phos platelet trt -#> 0.10614251 0.09265648 0.06355042 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## ascites bili edema copper albumin age protime +## 0.46791708 0.40590758 0.31115216 0.28840580 0.25972156 0.23259259 0.22656250 +## stage ast chol spiders sex hepato trig +## 0.20264766 0.18327491 0.17666667 0.17371938 0.14700432 0.14349776 0.12815626 +## alk.phos platelet trt +## 0.10614251 0.09265648 0.06355042 }\if{html}{\out{
}} \item use \code{orsf_vi()} with group_factors set to \code{TRUE} (the default) \if{html}{\out{
}}\preformatted{orsf_vi(fit) -#> ascites bili edema copper albumin age protime -#> 0.46791708 0.40590758 0.31115216 0.28840580 0.25972156 0.23259259 0.22656250 -#> stage ast chol spiders sex hepato trig -#> 0.20264766 0.18327491 0.17666667 0.17371938 0.14700432 0.14349776 0.12815626 -#> alk.phos platelet trt -#> 0.10614251 0.09265648 0.06355042 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## ascites bili edema copper albumin age protime +## 0.46791708 0.40590758 0.31115216 0.28840580 0.25972156 0.23259259 0.22656250 +## stage ast chol spiders sex hepato trig +## 0.20264766 0.18327491 0.17666667 0.17371938 0.14700432 0.14349776 0.12815626 +## alk.phos platelet trt +## 0.10614251 0.09265648 0.06355042 }\if{html}{\out{
}} } @@ -220,8 +226,7 @@ by setting \code{group_factors} to \code{FALSE} in the \code{orsf_vi} functions You can fit an oblique random forest without VI, then add VI later -\if{html}{\out{
}}\preformatted{ -fit_no_vi <- orsf(pbc_orsf, +\if{html}{\out{
}}\preformatted{fit_no_vi <- orsf(pbc_orsf, Surv(time, status) ~ . - id, importance = 'none') @@ -229,22 +234,27 @@ fit_no_vi <- orsf(pbc_orsf, # VI can only be computed while the forest is being grown. orsf_vi_negate(fit_no_vi) -#> bili copper sex protime age stage -#> 0.130439814 0.051880867 0.038308025 0.025115249 0.023826061 0.020354822 -#> albumin ascites chol ast spiders hepato -#> 0.019997729 0.015918292 0.013320469 0.010086726 0.007409116 0.007326714 -#> edema trt alk.phos trig platelet -#> 0.006844435 0.003214544 0.002517057 0.002469545 0.001056829 - -orsf_vi_permute(fit_no_vi) -#> bili copper age ascites protime -#> 0.0592069141 0.0237362075 0.0136479213 0.0130805894 0.0123091354 -#> stage albumin chol hepato ast -#> 0.0117177661 0.0106414724 0.0064501213 0.0058813969 0.0057753740 -#> edema spiders sex trig platelet -#> 0.0052171180 0.0048427005 0.0023386947 0.0017883700 0.0013533691 -#> alk.phos trt -#> 0.0006492029 -0.0009921507 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## bili copper sex protime age stage +## 0.130439814 0.051880867 0.038308025 0.025115249 0.023826061 0.020354822 +## albumin ascites chol ast spiders hepato +## 0.019997729 0.015918292 0.013320469 0.010086726 0.007409116 0.007326714 +## edema trt alk.phos trig platelet +## 0.006844435 0.003214544 0.002517057 0.002469545 0.001056829 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{orsf_vi_permute(fit_no_vi) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## bili copper age ascites protime +## 0.0592069141 0.0237362075 0.0136479213 0.0130805894 0.0123091354 +## stage albumin chol hepato ast +## 0.0117177661 0.0106414724 0.0064501213 0.0058813969 0.0057753740 +## edema spiders sex trig platelet +## 0.0052171180 0.0048427005 0.0023386947 0.0017883700 0.0013533691 +## alk.phos trt +## 0.0006492029 -0.0009921507 }\if{html}{\out{
}} } @@ -252,33 +262,35 @@ orsf_vi_permute(fit_no_vi) fit an oblique random forest and compute vi at the same time -\if{html}{\out{
}}\preformatted{ -fit_permute_vi <- orsf(pbc_orsf, +\if{html}{\out{
}}\preformatted{fit_permute_vi <- orsf(pbc_orsf, Surv(time, status) ~ . - id, importance = 'permute') # get the vi instantly (i.e., it doesn't need to be computed again) orsf_vi_permute(fit_permute_vi) -#> bili copper ascites protime albumin -#> 0.0571305446 0.0243657146 0.0138318057 0.0133401675 0.0130746154 -#> age stage chol ast spiders -#> 0.0123610374 0.0102963203 0.0077895394 0.0075250059 0.0048628813 -#> edema hepato sex platelet trig -#> 0.0046003168 0.0039818730 0.0016891584 0.0012767063 0.0007324402 -#> alk.phos trt -#> 0.0005128897 -0.0014443967 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## bili copper ascites protime albumin +## 0.0571305446 0.0243657146 0.0138318057 0.0133401675 0.0130746154 +## age stage chol ast spiders +## 0.0123610374 0.0102963203 0.0077895394 0.0075250059 0.0048628813 +## edema hepato sex platelet trig +## 0.0046003168 0.0039818730 0.0016891584 0.0012767063 0.0007324402 +## alk.phos trt +## 0.0005128897 -0.0014443967 }\if{html}{\out{
}} You can still get negation VI from this fit, but it needs to be computed -\if{html}{\out{
}}\preformatted{ -orsf_vi_negate(fit_permute_vi) -#> bili copper sex protime stage age -#> 0.123331760 0.052544318 0.037291358 0.024977898 0.023239189 0.021934511 -#> albumin ascites chol ast spiders edema -#> 0.020586632 0.014229536 0.014053040 0.012227048 0.007643156 0.006832766 -#> hepato trt alk.phos trig platelet -#> 0.006301693 0.004348705 0.002371797 0.002309396 0.001347035 +\if{html}{\out{
}}\preformatted{orsf_vi_negate(fit_permute_vi) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## bili copper sex protime stage age +## 0.123331760 0.052544318 0.037291358 0.024977898 0.023239189 0.021934511 +## albumin ascites chol ast spiders edema +## 0.020586632 0.014229536 0.014053040 0.012227048 0.007643156 0.006832766 +## hepato trt alk.phos trig platelet +## 0.006301693 0.004348705 0.002371797 0.002309396 0.001347035 }\if{html}{\out{
}} } @@ -286,15 +298,16 @@ orsf_vi_negate(fit_permute_vi) The default prediction accuracy functions work well most of the time: -\if{html}{\out{
}}\preformatted{ -fit_standard <- orsf(penguins_orsf, bill_length_mm ~ ., tree_seeds = 1) +\if{html}{\out{
}}\preformatted{fit_standard <- orsf(penguins_orsf, bill_length_mm ~ ., tree_seeds = 1) # Default method for prediction accuracy with VI is R-squared orsf_vi_permute(fit_standard) -#> species flipper_length_mm body_mass_g bill_depth_mm -#> 0.3725898166 0.3261834607 0.2225730676 0.1026569498 -#> island sex year -#> 0.0876071687 0.0844807334 0.0006978493 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## species flipper_length_mm body_mass_g bill_depth_mm +## 0.3725898166 0.3261834607 0.2225730676 0.1026569498 +## island sex year +## 0.0876071687 0.0844807334 0.0006978493 }\if{html}{\out{
}} But sometimes you want to do something specific and the defaults just @@ -305,8 +318,7 @@ values are considered good when \code{aorsf} computes prediction accuracy, so we make our function return a pseudo R-squared based on mean absolute error: -\if{html}{\out{
}}\preformatted{ -rsq_mae <- function(y_mat, w_vec, s_vec)\{ +\if{html}{\out{
}}\preformatted{rsq_mae <- function(y_mat, w_vec, s_vec)\{ mae_standard <- mean(abs((y_mat - mean(y_mat)) * w_vec)) mae_fit <- mean(abs((y_mat - s_vec) * w_vec)) @@ -320,10 +332,12 @@ fit_custom <- orsf_update(fit_standard, oobag_fun = rsq_mae) # not much changes, but the difference between variables shrinks # and the ordering of sex and island has swapped orsf_vi_permute(fit_custom) -#> species flipper_length_mm body_mass_g bill_depth_mm -#> 0.206951751 0.193248912 0.140899603 0.076759148 -#> sex island year -#> 0.073042331 0.050851073 0.003633365 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## species flipper_length_mm body_mass_g bill_depth_mm +## 0.206951751 0.193248912 0.140899603 0.076759148 +## sex island year +## 0.073042331 0.050851073 0.003633365 }\if{html}{\out{
}} } } diff --git a/man/orsf_vs.Rd b/man/orsf_vs.Rd index d861e2b8..c3db8cca 100644 --- a/man/orsf_vs.Rd +++ b/man/orsf_vs.Rd @@ -18,6 +18,7 @@ a \link[data.table:data.table]{data.table} with four columns: \itemize{ \item \emph{n_predictors}: the number of predictors used \item \emph{stat_value}: the out-of-bag statistic +\item \emph{variables_included}: the names of the variables included \item \emph{predictors_included}: the names of the predictors included \item \emph{predictor_dropped}: the predictor selected to be dropped } @@ -26,6 +27,13 @@ a \link[data.table:data.table]{data.table} with four columns: Variable selection } \details{ +The difference between \code{variables_included} and \code{predictors_included} is +referent coding. The \code{variable} would be the name of a factor variable +in the training data, while the \code{predictor} would be the name of that +same factor with the levels of the factor appended. For example, if +the variable is \code{diabetes} with \code{levels = c("no", "yes")}, then the +variable name is \code{diabetes} and the predictor name is \code{diabetes_yes}. + \code{tree_seeds} should be specified in \code{object} so that each successive run of \code{orsf} will be evaluated in the same out-of-bag samples as the initial run. diff --git a/man/predict.ObliqueForest.Rd b/man/predict.ObliqueForest.Rd index 7f4e405b..79c59d6d 100644 --- a/man/predict.ObliqueForest.Rd +++ b/man/predict.ObliqueForest.Rd @@ -6,11 +6,12 @@ \usage{ \method{predict}{ObliqueForest}( object, - new_data, + new_data = NULL, pred_type = NULL, pred_horizon = NULL, pred_aggregate = TRUE, pred_simplify = FALSE, + oobag = FALSE, na_action = NULL, boundary_checks = TRUE, n_thread = NULL, @@ -65,6 +66,11 @@ If \code{TRUE}, predictions may be simplified to a vector, e.g., if \code{pred_t is \code{'mort'} for survival or \code{'class'} for classification, or an array of matrices if \code{length(pred_horizon) > 1}.} +\item{oobag}{(\emph{logical}) If \code{FALSE} (the default), predictions will +be computed using all trees for each observation. If \code{TRUE}, then +out-of-bag predictions will be computed. This input parameter should +only be set to \code{TRUE} if \code{new_data} is \code{NULL}.} + \item{na_action}{(\emph{character}) what should happen when \code{new_data} contains missing values (i.e., \code{NA} values). Valid options are: \itemize{ \item 'fail' : an error is thrown if \code{new_data} contains \code{NA} values @@ -118,8 +124,7 @@ used for \code{oobag_pred_horizon} when \code{object} was created (see \link{ors }\if{html}{\out{
}} \subsection{Classification}{ -\if{html}{\out{
}}\preformatted{ -set.seed(329) +\if{html}{\out{
}}\preformatted{set.seed(329) index_train <- sample(nrow(penguins_orsf), 150) @@ -136,38 +141,45 @@ Predict probability for each class or the predicted class: predict(fit_clsf, new_data = penguins_orsf_test[1:5, ], pred_type = 'prob') -#> Adelie Chinstrap Gentoo -#> [1,] 0.9405310 0.04121955 0.018249405 -#> [2,] 0.9628988 0.03455909 0.002542096 -#> [3,] 0.9032074 0.08510528 0.011687309 -#> [4,] 0.9300133 0.05209040 0.017896329 -#> [5,] 0.7965703 0.16243492 0.040994821 - -# predicted class (as a matrix by default) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Adelie Chinstrap Gentoo +## [1,] 0.9405310 0.04121955 0.018249405 +## [2,] 0.9628988 0.03455909 0.002542096 +## [3,] 0.9032074 0.08510528 0.011687309 +## [4,] 0.9300133 0.05209040 0.017896329 +## [5,] 0.7965703 0.16243492 0.040994821 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{# predicted class (as a matrix by default) predict(fit_clsf, new_data = penguins_orsf_test[1:5, ], pred_type = 'class') -#> [,1] -#> [1,] 1 -#> [2,] 1 -#> [3,] 1 -#> [4,] 1 -#> [5,] 1 - -# predicted class (as a factor if you use simplify) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [,1] +## [1,] 1 +## [2,] 1 +## [3,] 1 +## [4,] 1 +## [5,] 1 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{# predicted class (as a factor if you use simplify) predict(fit_clsf, new_data = penguins_orsf_test[1:5, ], pred_type = 'class', pred_simplify = TRUE) -#> [1] Adelie Adelie Adelie Adelie Adelie -#> Levels: Adelie Chinstrap Gentoo +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [1] Adelie Adelie Adelie Adelie Adelie +## Levels: Adelie Chinstrap Gentoo }\if{html}{\out{
}} } \subsection{Regression}{ -\if{html}{\out{
}}\preformatted{ -set.seed(329) +\if{html}{\out{
}}\preformatted{set.seed(329) index_train <- sample(nrow(penguins_orsf), 150) @@ -180,16 +192,17 @@ fit_regr <- orsf(data = penguins_orsf_train, Predict the mean value of the outcome: -\if{html}{\out{
}}\preformatted{ -predict(fit_regr, +\if{html}{\out{
}}\preformatted{predict(fit_regr, new_data = penguins_orsf_test[1:5, ], pred_type = 'mean') -#> [,1] -#> [1,] 37.74136 -#> [2,] 37.42367 -#> [3,] 37.04598 -#> [4,] 39.89602 -#> [5,] 39.14848 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [,1] +## [1,] 37.74136 +## [2,] 37.42367 +## [3,] 37.04598 +## [4,] 39.89602 +## [5,] 39.14848 }\if{html}{\out{
}} } @@ -197,8 +210,7 @@ predict(fit_regr, Begin by fitting an oblique survival random forest: -\if{html}{\out{
}}\preformatted{ -set.seed(329) +\if{html}{\out{
}}\preformatted{set.seed(329) index_train <- sample(nrow(pbc_orsf), 150) @@ -212,43 +224,50 @@ fit_surv <- orsf(data = pbc_orsf_train, Predict risk, survival, or cumulative hazard at one or several times: -\if{html}{\out{
}}\preformatted{ -# predicted risk, the default +\if{html}{\out{
}}\preformatted{# predicted risk, the default predict(fit_surv, new_data = pbc_orsf_test[1:5, ], pred_type = 'risk', pred_horizon = c(500, 1000, 1500)) -#> [,1] [,2] [,3] -#> [1,] 0.013648562 0.058393393 0.11184029 -#> [2,] 0.003811413 0.026857586 0.04774151 -#> [3,] 0.030548361 0.100600301 0.14847107 -#> [4,] 0.040381075 0.169596943 0.27018952 -#> [5,] 0.001484698 0.006663576 0.01337655 - -# predicted survival, i.e., 1 - risk +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [,1] [,2] [,3] +## [1,] 0.013648562 0.058393393 0.11184029 +## [2,] 0.003811413 0.026857586 0.04774151 +## [3,] 0.030548361 0.100600301 0.14847107 +## [4,] 0.040381075 0.169596943 0.27018952 +## [5,] 0.001484698 0.006663576 0.01337655 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{# predicted survival, i.e., 1 - risk predict(fit_surv, new_data = pbc_orsf_test[1:5, ], pred_type = 'surv', pred_horizon = c(500, 1000, 1500)) -#> [,1] [,2] [,3] -#> [1,] 0.9863514 0.9416066 0.8881597 -#> [2,] 0.9961886 0.9731424 0.9522585 -#> [3,] 0.9694516 0.8993997 0.8515289 -#> [4,] 0.9596189 0.8304031 0.7298105 -#> [5,] 0.9985153 0.9933364 0.9866235 - -# predicted cumulative hazard function +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [,1] [,2] [,3] +## [1,] 0.9863514 0.9416066 0.8881597 +## [2,] 0.9961886 0.9731424 0.9522585 +## [3,] 0.9694516 0.8993997 0.8515289 +## [4,] 0.9596189 0.8304031 0.7298105 +## [5,] 0.9985153 0.9933364 0.9866235 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{# predicted cumulative hazard function # (expected number of events for person i at time j) predict(fit_surv, new_data = pbc_orsf_test[1:5, ], pred_type = 'chf', pred_horizon = c(500, 1000, 1500)) -#> [,1] [,2] [,3] -#> [1,] 0.015395388 0.067815817 0.14942956 -#> [2,] 0.004022524 0.028740305 0.05424314 -#> [3,] 0.034832754 0.127687156 0.20899732 -#> [4,] 0.059978334 0.233048809 0.42562310 -#> [5,] 0.001651365 0.007173177 0.01393016 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [,1] [,2] [,3] +## [1,] 0.015395388 0.067815817 0.14942956 +## [2,] 0.004022524 0.028740305 0.05424314 +## [3,] 0.034832754 0.127687156 0.20899732 +## [4,] 0.059978334 0.233048809 0.42562310 +## [5,] 0.001651365 0.007173177 0.01393016 }\if{html}{\out{
}} Predict mortality, defined as the number of events in the forest’s @@ -256,16 +275,17 @@ population if all observations had characteristics like the current observation. This type of prediction does not require you to specify a prediction horizon -\if{html}{\out{
}}\preformatted{ -predict(fit_surv, +\if{html}{\out{
}}\preformatted{predict(fit_surv, new_data = pbc_orsf_test[1:5, ], pred_type = 'mort') -#> [,1] -#> [1,] 23.405016 -#> [2,] 15.362916 -#> [3,] 26.180648 -#> [4,] 36.515629 -#> [5,] 5.856674 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [,1] +## [1,] 23.405016 +## [2,] 15.362916 +## [3,] 26.180648 +## [4,] 36.515629 +## [5,] 5.856674 }\if{html}{\out{
}} } }