From b89c6f68b2961291247a9293335ac8e3d64c26e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= Date: Wed, 12 Feb 2025 15:32:00 -0500 Subject: [PATCH 1/8] overdue examples for brulee --- DESCRIPTION | 13 +- vignettes/articles/Examples.Rmd | 256 ++++++++++++++++++++++++++++++++ 2 files changed, 263 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f9af401d6..01b0860ba 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,11 +1,12 @@ Package: parsnip Title: A Common API to Modeling and Analysis Functions -Version: 1.2.1.9004 +Version: 1.3.0 Authors@R: c( person("Max", "Kuhn", , "max@posit.co", role = c("aut", "cre")), person("Davis", "Vaughan", , "davis@posit.co", role = "aut"), person("Emil", "Hvitfeldt", , "emil.hvitfeldt@posit.co", role = "ctb"), - person("Posit Software, PBC", role = c("cph", "fnd"), comment = c(ROR = "03wc8by49")) + person("Posit Software, PBC", role = c("cph", "fnd"), + comment = c(ROR = "03wc8by49")) ) Maintainer: Max Kuhn Description: A common interface is provided to allow users to specify a @@ -70,10 +71,10 @@ Suggests: VignetteBuilder: knitr ByteCompile: true -Config/Needs/website: C50, dbarts, earth, glmnet, keras, kernlab, kknn, - LiblineaR, mgcv, nnet, parsnip, quantreg, randomForest, ranger, rpart, - rstanarm, tidymodels/tidymodels, tidyverse/tidytemplate, rstudio/reticulate, - xgboost, rmarkdown +Config/Needs/website: brulee, C50, dbarts, earth, glmnet, keras, kernlab, + kknn, LiblineaR, mgcv, nnet, parsnip, quantreg, randomForest, ranger, + rpart, rstanarm, tidymodels/tidymodels, tidyverse/tidytemplate, + rstudio/reticulate, xgboost, rmarkdown Config/rcmdcheck/ignore-inconsequential-notes: true Config/testthat/edition: 3 Encoding: UTF-8 diff --git a/vignettes/articles/Examples.Rmd b/vignettes/articles/Examples.Rmd index 705014dce..aef6fd8bc 100644 --- a/vignettes/articles/Examples.Rmd +++ b/vignettes/articles/Examples.Rmd @@ -631,6 +631,41 @@ The following examples use consistent data sets throughout. For regression, we u ``` + +
+ + With the `"brulee"` engine + +

Regression Example (`brulee`)

+ + ```{r echo=FALSE} + knitr::spin_child("template-reg-chicago.R") + ``` + + We can define the model with specific parameters: + + ```{r} + linreg_reg_spec <- + linear_reg() %>% + set_engine("brulee") + linreg_reg_spec + ``` + + Now we create the model fit object: + + ```{r} + set.seed(1) + linreg_reg_fit <- linreg_reg_spec %>% fit(ridership ~ ., data = Chicago_train) + linreg_reg_fit + ``` + + The holdout data can be predicted: + + ```{r} + predict(linreg_reg_fit, Chicago_test) + ``` + +
## `logistic_reg()` models @@ -828,6 +863,45 @@ The following examples use consistent data sets throughout. For regression, we u + +
+ + With the `"brulee"` engine + +

Classification Example (`brulee`)

+ + ```{r echo=FALSE} + knitr::spin_child("template-cls-two-class.R") + ``` + + We can define the model with specific parameters: + + ```{r} + logreg_cls_spec <- + logistic_reg() %>% + set_engine("brulee") + logreg_cls_spec + ``` + + Now we create the model fit object: + + ```{r} + set.seed(1) + logreg_cls_fit <- logreg_cls_spec %>% fit(Class ~ ., data = data_train) + logreg_cls_fit + ``` + + The holdout data can be predicted for both hard class predictions and probabilities. We'll bind these together into one tibble: + + ```{r} + bind_cols( + predict(logreg_cls_fit, data_test), + predict(logreg_cls_fit, data_test, type = "prob") + ) + ``` + +
+ ## `mars()` models
@@ -1047,6 +1121,149 @@ The following examples use consistent data sets throughout. For regression, we u
+ +
+ + With the `"brulee"` engine + +

Regression Example (`brulee`)

+ + ```{r echo=FALSE} + knitr::spin_child("template-reg-chicago.R") + ``` + + We can define the model with specific parameters: + + ```{r} + mlp_reg_spec <- + mlp(penalty = 0, epochs = 100) %>% + # This model can be used for classification or regression, so set mode + set_mode("regression") %>% + set_engine("brulee") + mlp_reg_spec + ``` + + Now we create the model fit object: + + ```{r} + set.seed(1) + mlp_reg_fit <- mlp_reg_spec %>% fit(ridership ~ ., data = Chicago_train) + mlp_reg_fit + ``` + + The holdout data can be predicted: + + ```{r} + predict(mlp_reg_fit, Chicago_test) + ``` + +

Classification Example (`brulee`)

+ + ```{r echo=FALSE} + knitr::spin_child("template-cls-two-class.R") + ``` + + We can define the model with specific parameters: + + ```{r} + mlp_cls_spec <- + mlp(penalty = 0, epochs = 100) %>% + # This model can be used for classification or regression, so set mode + set_mode("classification") %>% + set_engine("brulee") + mlp_cls_spec + ``` + + Now we create the model fit object: + + ```{r} + set.seed(1) + mlp_cls_fit <- mlp_cls_spec %>% fit(Class ~ ., data = data_train) + mlp_cls_fit + ``` + + The holdout data can be predicted for both hard class predictions and probabilities. We'll bind these together into one tibble: + + ```{r} + bind_cols( + predict(mlp_cls_fit, data_test), + predict(mlp_cls_fit, data_test, type = "prob") + ) + ``` + +
+ +
+ + With the `"brulee_two_layer"` engine + +

Regression Example (`brulee_two_layer`)

+ + ```{r echo=FALSE} + knitr::spin_child("template-reg-chicago.R") + ``` + + We can define the model with specific parameters: + + ```{r} + mlp_reg_spec <- + mlp(penalty = 0, epochs = 10) %>% + # This model can be used for classification or regression, so set mode + set_mode("regression") %>% + set_engine("brulee_two_layer", hidden_units_2 = 2) + mlp_reg_spec + ``` + + Now we create the model fit object: + + ```{r} + set.seed(13) + mlp_reg_fit <- mlp_reg_spec %>% fit(ridership ~ ., data = Chicago_train) + mlp_reg_fit + ``` + + The holdout data can be predicted: + + ```{r} + predict(mlp_reg_fit, Chicago_test) + ``` + +

Classification Example (`brulee_two_layer`)

+ + ```{r echo=FALSE} + knitr::spin_child("template-cls-two-class.R") + ``` + + We can define the model with specific parameters: + + ```{r} + mlp_cls_spec <- + mlp(penalty = 0, epochs = 10) %>% + # This model can be used for classification or regression, so set mode + set_mode("classification") %>% + set_engine("brulee_two_layer", hidden_units_2 = 2) + mlp_cls_spec + ``` + + Now we create the model fit object: + + ```{r} + set.seed(12) + mlp_cls_fit <- mlp_cls_spec %>% fit(Class ~ ., data = data_train) + mlp_cls_fit + ``` + + The holdout data can be predicted for both hard class predictions and probabilities. We'll bind these together into one tibble: + + ```{r} + bind_cols( + predict(mlp_cls_fit, data_test), + predict(mlp_cls_fit, data_test, type = "prob") + ) + ``` + +
+ ## `multinom_reg()` models @@ -1167,6 +1384,45 @@ The following examples use consistent data sets throughout. For regression, we u +
+ + With the `"brulee"` engine + +

Classification Example (`brulee`)

+ + ```{r echo=FALSE} + knitr::spin_child("template-cls-multi-class.R") + ``` + + We can define the model with specific parameters: + + ```{r} + mr_cls_spec <- + multinom_reg(penalty = 0.1) %>% + set_engine("brulee") + mr_cls_spec + ``` + + Now we create the model fit object: + + ```{r} + set.seed(1) + mr_cls_fit <- mr_cls_spec %>% fit(island ~ ., data = penguins_train) + mr_cls_fit + ``` + + The holdout data can be predicted for both hard class predictions and probabilities. We'll bind these together into one tibble: + + ```{r} + bind_cols( + predict(mr_cls_fit, penguins_test), + predict(mr_cls_fit, penguins_test, type = "prob") + ) + ``` + +
+ + ## `nearest_neighbor()` models
From 7b9e1ec3ccf97ad8fd5704f538b12585ba45ba8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= Date: Wed, 12 Feb 2025 15:32:34 -0500 Subject: [PATCH 2/8] namespace to anchor links --- man/rmd/survival_reg_flexsurv.Rmd | 2 +- man/rmd/survival_reg_flexsurv.md | 2 +- man/rmd/survival_reg_flexsurvspline.Rmd | 2 +- man/rmd/survival_reg_flexsurvspline.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/man/rmd/survival_reg_flexsurv.Rmd b/man/rmd/survival_reg_flexsurv.Rmd index b5469e4f3..05cae3c27 100644 --- a/man/rmd/survival_reg_flexsurv.Rmd +++ b/man/rmd/survival_reg_flexsurv.Rmd @@ -40,7 +40,7 @@ survival_reg(dist = character(1)) %>% The main interface for this model uses the formula method since the model specification typically involved the use of [survival::Surv()]. -For this engine, stratification cannot be specified via [`strata()`], please see [flexsurv::flexsurvreg()] for alternative specifications. +For this engine, stratification cannot be specified via [`survival::strata()`], please see [flexsurv::flexsurvreg()] for alternative specifications. ```{r child = "template-survival-mean.Rmd"} ``` diff --git a/man/rmd/survival_reg_flexsurv.md b/man/rmd/survival_reg_flexsurv.md index f99705d92..a94358018 100644 --- a/man/rmd/survival_reg_flexsurv.md +++ b/man/rmd/survival_reg_flexsurv.md @@ -42,7 +42,7 @@ survival_reg(dist = character(1)) %>% The main interface for this model uses the formula method since the model specification typically involved the use of [survival::Surv()]. -For this engine, stratification cannot be specified via [`strata()`], please see [flexsurv::flexsurvreg()] for alternative specifications. +For this engine, stratification cannot be specified via [`survival::strata()`], please see [flexsurv::flexsurvreg()] for alternative specifications. diff --git a/man/rmd/survival_reg_flexsurvspline.Rmd b/man/rmd/survival_reg_flexsurvspline.Rmd index 58417c87a..5c49a9ab9 100644 --- a/man/rmd/survival_reg_flexsurvspline.Rmd +++ b/man/rmd/survival_reg_flexsurvspline.Rmd @@ -26,7 +26,7 @@ survival_reg() %>% The main interface for this model uses the formula method since the model specification typically involved the use of [survival::Surv()]. -For this engine, stratification cannot be specified via [`strata()`], please see [flexsurv::flexsurvspline()] for alternative specifications. +For this engine, stratification cannot be specified via [`survival::strata()`], please see [flexsurv::flexsurvspline()] for alternative specifications. ```{r child = "template-survival-mean.Rmd"} ``` diff --git a/man/rmd/survival_reg_flexsurvspline.md b/man/rmd/survival_reg_flexsurvspline.md index b7bfb41cb..bb5a74f2c 100644 --- a/man/rmd/survival_reg_flexsurvspline.md +++ b/man/rmd/survival_reg_flexsurvspline.md @@ -37,7 +37,7 @@ survival_reg() %>% The main interface for this model uses the formula method since the model specification typically involved the use of [survival::Surv()]. -For this engine, stratification cannot be specified via [`strata()`], please see [flexsurv::flexsurvspline()] for alternative specifications. +For this engine, stratification cannot be specified via [`survival::strata()`], please see [flexsurv::flexsurvspline()] for alternative specifications. From c41628d64bf53cb15469c4e1ee5f4283b0d2dc41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= Date: Wed, 12 Feb 2025 15:32:42 -0500 Subject: [PATCH 3/8] polish news --- NEWS.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index b46487f17..c0430f235 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# parsnip (development version) +# parsnip 1.3.0 ## New Features @@ -7,16 +7,17 @@ * Predictions are encoded via a custom vector type. See [hardhat::quantile_pred()]. * Predicted quantile levels are designated when the new mode is specified. See `?set_mode`. -* `fit_xy()` can now take dgCMatrix input for `x` argument (#1121). - -* `fit_xy()` can now take sparse tibbles as data values (#1165). - -* `predict()` can now take dgCMatrix and sparse tibble input for `new_data` argument, and error informatively when model doesn't support it (#1167). +* Updates for sparse data formats: + * `fit_xy()` can now take dgCMatrix input for `x` argument (#1121). + * `fit_xy()` can now take sparse tibbles as data values (#1165). + * `predict()` can now take dgCMatrix and sparse tibble input for `new_data` argument, and error informatively when model doesn't support it (#1167). * New `extract_fit_time()` method has been added that returns the time it took to train the model (#853). * `mlp()` with `keras` engine now work for all activation functions currently supported by `keras` (#1127). +* `mlp()` now has a `brulee_two_layer` engine. + ## Other Changes * Transitioned package errors and warnings to use cli (#1147 and #1148 by @shum461, #1153 by @RobLBaker and @wright13, #1154 by @JamesHWade, #1160, #1161, #1081). @@ -49,7 +50,6 @@ * `NULL` is no longer accepted as an engine (#1242). - # parsnip 1.2.1 * Added a missing `tidy()` method for survival analysis glmnet models (#1086). From e7f7ba3c7296947365ca18d41448af62d58ee8b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= Date: Wed, 12 Feb 2025 15:34:28 -0500 Subject: [PATCH 4/8] update model file --- inst/models.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/models.tsv b/inst/models.tsv index 2c3e2d9ab..2c39fff0c 100644 --- a/inst/models.tsv +++ b/inst/models.tsv @@ -44,6 +44,7 @@ "discrim_regularized" "classification" "klaR" "discrim" "gen_additive_mod" "classification" "mgcv" NA "gen_additive_mod" "regression" "mgcv" NA +"linear_reg" "quantile regression" "quantreg" NA "linear_reg" "regression" "brulee" NA "linear_reg" "regression" "gee" "multilevelmod" "linear_reg" "regression" "glm" NA @@ -55,7 +56,6 @@ "linear_reg" "regression" "lm" NA "linear_reg" "regression" "lme" "multilevelmod" "linear_reg" "regression" "lmer" "multilevelmod" -"linear_reg" "quantile regression" "quantreg" NA "linear_reg" "regression" "spark" NA "linear_reg" "regression" "stan" NA "linear_reg" "regression" "stan_glmer" "multilevelmod" From c2d7591505671ea6f00528f28927fdcdc0a75b90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= Date: Wed, 12 Feb 2025 15:37:16 -0500 Subject: [PATCH 5/8] redoc --- man/details_survival_reg_flexsurv.Rd | 2 +- man/details_survival_reg_flexsurvspline.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/man/details_survival_reg_flexsurv.Rd b/man/details_survival_reg_flexsurv.Rd index ea0122a01..d7e2504d1 100644 --- a/man/details_survival_reg_flexsurv.Rd +++ b/man/details_survival_reg_flexsurv.Rd @@ -48,7 +48,7 @@ model specification typically involved the use of \code{\link[survival:Surv]{survival::Surv()}}. For this engine, stratification cannot be specified via -\code{\link[=strata]{strata()}}, please see +\code{\link[survival:strata]{survival::strata()}}, please see \code{\link[flexsurv:flexsurvreg]{flexsurv::flexsurvreg()}} for alternative specifications. diff --git a/man/details_survival_reg_flexsurvspline.Rd b/man/details_survival_reg_flexsurvspline.Rd index 7e19af667..e3c565c70 100644 --- a/man/details_survival_reg_flexsurvspline.Rd +++ b/man/details_survival_reg_flexsurvspline.Rd @@ -45,7 +45,7 @@ model specification typically involved the use of \code{\link[survival:Surv]{survival::Surv()}}. For this engine, stratification cannot be specified via -\code{\link[=strata]{strata()}}, please see +\code{\link[survival:strata]{survival::strata()}}, please see \code{\link[flexsurv:flexsurvspline]{flexsurv::flexsurvspline()}} for alternative specifications. From 4b55307e0f6a267fa12a7d7be3df28185d4cf34e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= Date: Wed, 12 Feb 2025 20:33:45 -0500 Subject: [PATCH 6/8] small updates --- DESCRIPTION | 3 +-- tests/testthat/test-fit_interfaces.R | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 01b0860ba..7fc54ae10 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,8 +5,7 @@ Authors@R: c( person("Max", "Kuhn", , "max@posit.co", role = c("aut", "cre")), person("Davis", "Vaughan", , "davis@posit.co", role = "aut"), person("Emil", "Hvitfeldt", , "emil.hvitfeldt@posit.co", role = "ctb"), - person("Posit Software, PBC", role = c("cph", "fnd"), - comment = c(ROR = "03wc8by49")) + person("Posit Software, PBC", role = c("cph", "fnd")) ) Maintainer: Max Kuhn Description: A common interface is provided to allow users to specify a diff --git a/tests/testthat/test-fit_interfaces.R b/tests/testthat/test-fit_interfaces.R index 3b5d45dc2..40ce9dcef 100644 --- a/tests/testthat/test-fit_interfaces.R +++ b/tests/testthat/test-fit_interfaces.R @@ -156,7 +156,7 @@ test_that("overhead of parsnip interface is minimal (#1071)", { skip_on_cran() skip_on_covr() skip_if_not_installed("bench") - skip_if_not_installed("parsnip", minimum_version = "1.3.0") + skip_if_not_installed("parsnip", minimum_version = "1.4.0") bm <- bench::mark( time_engine = lm(mpg ~ ., mtcars), From 3fd86f6c4edbda5ce6c64c3f5d17890ad3fedcf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= Date: Wed, 12 Feb 2025 20:36:53 -0500 Subject: [PATCH 7/8] doc update --- man/details_boost_tree_lightgbm.Rd | 8 ++++++++ man/details_decision_tree_partykit.Rd | 6 +++--- man/details_linear_reg_lme.Rd | 2 +- man/details_rand_forest_aorsf.Rd | 8 ++++---- man/details_rand_forest_partykit.Rd | 6 +++--- man/details_rand_forest_ranger.Rd | 4 ++++ man/parsnip-package.Rd | 2 +- man/rmd/boost_tree_lightgbm.md | 5 +++++ man/rmd/decision_tree_partykit.md | 6 +++--- man/rmd/linear_reg_lme.md | 2 +- man/rmd/rand_forest_aorsf.md | 6 +++--- man/rmd/rand_forest_partykit.md | 6 +++--- man/rmd/rand_forest_ranger.md | 2 ++ 13 files changed, 41 insertions(+), 22 deletions(-) diff --git a/man/details_boost_tree_lightgbm.Rd b/man/details_boost_tree_lightgbm.Rd index 1853eed15..28578bc9a 100644 --- a/man/details_boost_tree_lightgbm.Rd +++ b/man/details_boost_tree_lightgbm.Rd @@ -192,6 +192,14 @@ the user. To print out all logs during training, set \code{quiet = TRUE}. } +\subsection{Sparse Data}{ + +This model can utilize sparse data during model fitting and prediction. +Both sparse matrices such as dgCMatrix from the \code{Matrix} package and +sparse tibbles from the \code{sparsevctrs} package are supported. See +\link{sparse_data} for more information. +} + \subsection{Examples}{ The “Introduction to bonsai” article contains diff --git a/man/details_decision_tree_partykit.Rd b/man/details_decision_tree_partykit.Rd index ecd0cd264..87afcbcaa 100644 --- a/man/details_decision_tree_partykit.Rd +++ b/man/details_decision_tree_partykit.Rd @@ -8,14 +8,14 @@ tree-based structure using hypothesis testing methods. } \details{ -For this engine, there are multiple modes: regression, classification, -and censored regression +For this engine, there are multiple modes: censored regression, +regression, and classification \subsection{Tuning Parameters}{ This model has 2 tuning parameters: \itemize{ -\item \code{min_n}: Minimal Node Size (type: integer, default: 20L) \item \code{tree_depth}: Tree Depth (type: integer, default: see below) +\item \code{min_n}: Minimal Node Size (type: integer, default: 20L) } The \code{tree_depth} parameter defaults to \code{0} which means no restrictions diff --git a/man/details_linear_reg_lme.Rd b/man/details_linear_reg_lme.Rd index 3c9c70096..93f2c2fd6 100644 --- a/man/details_linear_reg_lme.Rd +++ b/man/details_linear_reg_lme.Rd @@ -44,7 +44,7 @@ linear predictor (\verb{\eta}) for a random intercept: \if{html}{\out{
}}\preformatted{\eta_\{i\} = (\beta_0 + b_\{0i\}) + \beta_1x_\{i1\} }\if{html}{\out{
}} -where $i$ denotes the \code{i}th independent experimental unit +where \code{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that subject’s data to adjust the \emph{population} intercept to be more specific to that subjects results. diff --git a/man/details_rand_forest_aorsf.Rd b/man/details_rand_forest_aorsf.Rd index eeee82cd3..7796fdf1d 100644 --- a/man/details_rand_forest_aorsf.Rd +++ b/man/details_rand_forest_aorsf.Rd @@ -9,16 +9,16 @@ trees, each de-correlated from the others. The final prediction uses all predictions from the individual trees and combines them. } \details{ -For this engine, there are multiple modes: classification, regression, -and censored regression +For this engine, there are multiple modes: censored regression, +classification, and regression \subsection{Tuning Parameters}{ This model has 3 tuning parameters: \itemize{ -\item \code{mtry}: # Randomly Selected Predictors (type: integer, default: -ceiling(sqrt(n_predictors))) \item \code{trees}: # Trees (type: integer, default: 500L) \item \code{min_n}: Minimal Node Size (type: integer, default: 5L) +\item \code{mtry}: # Randomly Selected Predictors (type: integer, default: +ceiling(sqrt(n_predictors))) } Additionally, this model has one engine-specific tuning parameter: diff --git a/man/details_rand_forest_partykit.Rd b/man/details_rand_forest_partykit.Rd index e7c759d7f..25184df2e 100644 --- a/man/details_rand_forest_partykit.Rd +++ b/man/details_rand_forest_partykit.Rd @@ -9,15 +9,15 @@ trees, each independent of the others. The final prediction uses all predictions from the individual trees and combines them. } \details{ -For this engine, there are multiple modes: regression, classification, -and censored regression +For this engine, there are multiple modes: censored regression, +regression, and classification \subsection{Tuning Parameters}{ This model has 3 tuning parameters: \itemize{ +\item \code{trees}: # Trees (type: integer, default: 500L) \item \code{min_n}: Minimal Node Size (type: integer, default: 20L) \item \code{mtry}: # Randomly Selected Predictors (type: integer, default: 5L) -\item \code{trees}: # Trees (type: integer, default: 500L) } } diff --git a/man/details_rand_forest_ranger.Rd b/man/details_rand_forest_ranger.Rd index 1253d39fc..31cde1265 100644 --- a/man/details_rand_forest_ranger.Rd +++ b/man/details_rand_forest_ranger.Rd @@ -129,6 +129,10 @@ This model can utilize sparse data during model fitting and prediction. Both sparse matrices such as dgCMatrix from the \code{Matrix} package and sparse tibbles from the \code{sparsevctrs} package are supported. See \link{sparse_data} for more information. + +While this engine supports sparse data as an input, it doesn’t use it +any differently than dense data. Hence there it no reason to convert +back and forth. } \subsection{Saving fitted model objects}{ diff --git a/man/parsnip-package.Rd b/man/parsnip-package.Rd index 2e074dc3b..84b9a622d 100644 --- a/man/parsnip-package.Rd +++ b/man/parsnip-package.Rd @@ -30,7 +30,7 @@ Authors: Other contributors: \itemize{ \item Emil Hvitfeldt \email{emil.hvitfeldt@posit.co} [contributor] - \item Posit Software, PBC (03wc8by49) [copyright holder, funder] + \item Posit Software, PBC [copyright holder, funder] } } diff --git a/man/rmd/boost_tree_lightgbm.md b/man/rmd/boost_tree_lightgbm.md index 8acc1f037..ad23cdc5a 100644 --- a/man/rmd/boost_tree_lightgbm.md +++ b/man/rmd/boost_tree_lightgbm.md @@ -133,6 +133,11 @@ To effectively enable bagging, the user would also need to set the `bagging_freq bonsai quiets much of the logging output from [lightgbm::lgb.train()] by default. With default settings, logged warnings and errors will still be passed on to the user. To print out all logs during training, set `quiet = TRUE`. +## Sparse Data + + +This model can utilize sparse data during model fitting and prediction. Both sparse matrices such as dgCMatrix from the `Matrix` package and sparse tibbles from the `sparsevctrs` package are supported. See [sparse_data] for more information. + ## Examples The "Introduction to bonsai" article contains [examples](https://bonsai.tidymodels.org/articles/bonsai.html) of `boost_tree()` with the `"lightgbm"` engine. diff --git a/man/rmd/decision_tree_partykit.md b/man/rmd/decision_tree_partykit.md index 8fa6c53af..15cd1be1a 100644 --- a/man/rmd/decision_tree_partykit.md +++ b/man/rmd/decision_tree_partykit.md @@ -1,7 +1,7 @@ -For this engine, there are multiple modes: regression, classification, and censored regression +For this engine, there are multiple modes: censored regression, regression, and classification ## Tuning Parameters @@ -9,10 +9,10 @@ For this engine, there are multiple modes: regression, classification, and censo This model has 2 tuning parameters: -- `min_n`: Minimal Node Size (type: integer, default: 20L) - - `tree_depth`: Tree Depth (type: integer, default: see below) +- `min_n`: Minimal Node Size (type: integer, default: 20L) + The `tree_depth` parameter defaults to `0` which means no restrictions are applied to tree depth. An engine-specific parameter for this model is: diff --git a/man/rmd/linear_reg_lme.md b/man/rmd/linear_reg_lme.md index c939889b7..ec58f84c9 100644 --- a/man/rmd/linear_reg_lme.md +++ b/man/rmd/linear_reg_lme.md @@ -39,7 +39,7 @@ This model can use subject-specific coefficient estimates to make predictions (i \eta_{i} = (\beta_0 + b_{0i}) + \beta_1x_{i1} ``` -where $i$ denotes the `i`th independent experimental unit (e.g. subject). When the model has seen subject `i`, it can use that subject's data to adjust the _population_ intercept to be more specific to that subjects results. +where `i` denotes the `i`th independent experimental unit (e.g. subject). When the model has seen subject `i`, it can use that subject's data to adjust the _population_ intercept to be more specific to that subjects results. What happens when data are being predicted for a subject that was not used in the model fit? In that case, this package uses _only_ the population parameter estimates for prediction: diff --git a/man/rmd/rand_forest_aorsf.md b/man/rmd/rand_forest_aorsf.md index 3dd2d5b1d..948de93eb 100644 --- a/man/rmd/rand_forest_aorsf.md +++ b/man/rmd/rand_forest_aorsf.md @@ -1,7 +1,7 @@ -For this engine, there are multiple modes: classification, regression, and censored regression +For this engine, there are multiple modes: censored regression, classification, and regression ## Tuning Parameters @@ -9,12 +9,12 @@ For this engine, there are multiple modes: classification, regression, and censo This model has 3 tuning parameters: -- `mtry`: # Randomly Selected Predictors (type: integer, default: ceiling(sqrt(n_predictors))) - - `trees`: # Trees (type: integer, default: 500L) - `min_n`: Minimal Node Size (type: integer, default: 5L) +- `mtry`: # Randomly Selected Predictors (type: integer, default: ceiling(sqrt(n_predictors))) + Additionally, this model has one engine-specific tuning parameter: * `split_min_stat`: Minimum test statistic required to split a node. Defaults are `3.841459` for censored regression (which is roughly a p-value of 0.05) and `0` for classification and regression. For classification, this tuning parameter should be between 0 and 1, and for regression it should be greater than or equal to 0. Higher values of this parameter cause trees grown by `aorsf` to have less depth. diff --git a/man/rmd/rand_forest_partykit.md b/man/rmd/rand_forest_partykit.md index 7204c7c97..3e918c8df 100644 --- a/man/rmd/rand_forest_partykit.md +++ b/man/rmd/rand_forest_partykit.md @@ -1,7 +1,7 @@ -For this engine, there are multiple modes: regression, classification, and censored regression +For this engine, there are multiple modes: censored regression, regression, and classification ## Tuning Parameters @@ -9,12 +9,12 @@ For this engine, there are multiple modes: regression, classification, and censo This model has 3 tuning parameters: +- `trees`: # Trees (type: integer, default: 500L) + - `min_n`: Minimal Node Size (type: integer, default: 20L) - `mtry`: # Randomly Selected Predictors (type: integer, default: 5L) -- `trees`: # Trees (type: integer, default: 500L) - ## Translation from parsnip to the original package (regression) The **bonsai** extension package is required to fit this model. diff --git a/man/rmd/rand_forest_ranger.md b/man/rmd/rand_forest_ranger.md index 50420f527..efcced7e8 100644 --- a/man/rmd/rand_forest_ranger.md +++ b/man/rmd/rand_forest_ranger.md @@ -108,6 +108,8 @@ The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that e This model can utilize sparse data during model fitting and prediction. Both sparse matrices such as dgCMatrix from the `Matrix` package and sparse tibbles from the `sparsevctrs` package are supported. See [sparse_data] for more information. +While this engine supports sparse data as an input, it doesn't use it any differently than dense data. Hence there it no reason to convert back and forth. + ## Saving fitted model objects From 4923a19b969e138ce005ee5242913409c8fef431 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= Date: Wed, 12 Feb 2025 20:53:04 -0500 Subject: [PATCH 8/8] install torch --- .github/workflows/pkgdown.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index a907d5095..2e6925f56 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -49,6 +49,11 @@ jobs: tensorflow::install_tensorflow(version='2.13', conda_python_version = NULL) shell: Rscript {0} + - name: Install Torch + run: | + torch::install_torch() + shell: Rscript {0} + - name: Build site run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) shell: Rscript {0}