diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 8bd8caabc20f..1fa0d7a9cf46 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -21,7 +21,7 @@ S3method(variable.names,xgb.Booster) export("xgb.attr<-") export("xgb.attributes<-") export("xgb.config<-") -export("xgb.parameters<-") +export("xgb.model.parameters<-") export(getinfo) export(setinfo) export(xgb.Callback) @@ -61,6 +61,7 @@ export(xgb.is.same.Booster) export(xgb.load) export(xgb.load.raw) export(xgb.model.dt.tree) +export(xgb.params) export(xgb.plot.deepness) export(xgb.plot.importance) export(xgb.plot.multi.trees) diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R index d83b6b184329..94aac5beede6 100644 --- a/R-package/R/callbacks.R +++ b/R-package/R/callbacks.R @@ -204,7 +204,7 @@ #' dm <- xgb.DMatrix(x, label = y, nthread = 1) #' model <- xgb.train( #' data = dm, -#' params = list(objective = "reg:squarederror", nthread = 1), +#' params = xgb.params(objective = "reg:squarederror", nthread = 1), #' nrounds = 5, #' callbacks = list(ssq_callback), #' keep_extra_attributes = TRUE @@ -563,7 +563,7 @@ xgb.cb.reset.parameters <- function(new_params) { } }, f_before_iter = function(env, model, data, evals, iteration) { - pars <- lapply(env$new_params, function(p) { + params <- lapply(env$new_params, function(p) { if (is.function(p)) { return(p(iteration, env$end_iteration)) } else { @@ -572,10 +572,10 @@ xgb.cb.reset.parameters <- function(new_params) { }) if (inherits(model, "xgb.Booster")) { - xgb.parameters(model) <- pars + xgb.model.parameters(model) <- params } else { for (fd in model) { - xgb.parameters(fd$bst) <- pars + xgb.model.parameters(fd$bst) <- params } } return(FALSE) diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R index b38cd42bcef3..8cf41067a73d 100644 --- a/R-package/R/xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -20,20 +20,20 @@ xgb.Booster <- function(params, cachelist, modelfile) { .Call(XGBoosterLoadModel_R, xgb.get.handle(bst), enc2utf8(modelfile[1])) niter <- xgb.get.num.boosted.rounds(bst) if (length(params) > 0) { - xgb.parameters(bst) <- params + xgb.model.parameters(bst) <- params } return(list(bst = bst, niter = niter)) } else if (is.raw(modelfile)) { ## A memory buffer bst <- xgb.load.raw(modelfile) niter <- xgb.get.num.boosted.rounds(bst) - xgb.parameters(bst) <- params + xgb.model.parameters(bst) <- params return(list(bst = bst, niter = niter)) } else if (inherits(modelfile, "xgb.Booster")) { ## A booster object bst <- .Call(XGDuplicate_R, modelfile) niter <- xgb.get.num.boosted.rounds(bst) - xgb.parameters(bst) <- params + xgb.model.parameters(bst) <- params return(list(bst = bst, niter = niter)) } else { stop("modelfile must be either character filename, or raw booster dump, or xgb.Booster object") @@ -42,7 +42,7 @@ xgb.Booster <- function(params, cachelist, modelfile) { ## Create new model bst <- .Call(XGBoosterCreate_R, cachelist) if (length(params) > 0) { - xgb.parameters(bst) <- params + xgb.model.parameters(bst) <- params } return(list(bst = bst, niter = 0L)) } @@ -196,7 +196,7 @@ xgb.get.handle <- function(object) { #' of the most important features first. See below about the format of the returned results. #' #' The `predict()` method uses as many threads as defined in `xgb.Booster` object (all by default). -#' If you want to change their number, assign a new number to `nthread` using [xgb.parameters<-()]. +#' If you want to change their number, assign a new number to `nthread` using [xgb.model.parameters<-()]. #' Note that converting a matrix to [xgb.DMatrix()] uses multiple threads too. #' #' @return @@ -631,7 +631,7 @@ validate.features <- function(bst, newdata) { #' and its serialization is handled externally. #' Also, setting an attribute that has the same name as one of XGBoost's parameters wouldn't #' change the value of that parameter for a model. -#' Use [xgb.parameters<-()] to set or change model parameters. +#' Use [xgb.model.parameters<-()] to set or change model parameters. #' #' The `xgb.attributes<-` setter either updates the existing or adds one or several attributes, #' but it doesn't delete the other existing attributes. @@ -828,11 +828,11 @@ xgb.config <- function(object) { #' objective = "binary:logistic" #' ) #' -#' xgb.parameters(bst) <- list(eta = 0.1) +#' xgb.model.parameters(bst) <- list(eta = 0.1) #' -#' @rdname xgb.parameters +#' @rdname xgb.model.parameters #' @export -`xgb.parameters<-` <- function(object, value) { +`xgb.model.parameters<-` <- function(object, value) { if (length(value) == 0) return(object) p <- as.list(value) if (is.null(names(p)) || any(nchar(names(p)) == 0)) { @@ -897,7 +897,7 @@ setinfo.xgb.Booster <- function(object, name, info) { #' @param model,x A fitted `xgb.Booster` model. #' @return The number of rounds saved in the model as an integer. #' @details Note that setting booster parameters related to training -#' continuation / updates through [xgb.parameters<-()] will reset the +#' continuation / updates through [xgb.model.parameters<-()] will reset the #' number of rounds to zero. #' @export #' @rdname xgb.get.num.boosted.rounds @@ -936,7 +936,7 @@ length.xgb.Booster <- function(x) { #' x <- as.matrix(mtcars[, -1]) #' #' dm <- xgb.DMatrix(x, label = y, nthread = 1) -#' model <- xgb.train(data = dm, params = list(nthread = 1), nrounds = 5) +#' model <- xgb.train(data = dm, params = xgb.params(nthread = 1), nrounds = 5) #' model_slice <- xgb.slice.Booster(model, 1, 3) #' # Prediction for first three rounds #' predict(model, x, predleaf = TRUE)[, 1:3] @@ -1163,7 +1163,7 @@ coef.xgb.Booster <- function(object, ...) { #' #' model <- xgb.train( #' data = dm, -#' params = list(nthread = 1), +#' params = xgb.params(nthread = 1), #' nround = 3 #' ) #' @@ -1225,7 +1225,7 @@ xgb.copy.Booster <- function(model) { #' x <- as.matrix(mtcars[, -1]) #' #' model <- xgb.train( -#' params = list(nthread = 1), +#' params = xgb.params(nthread = 1), #' data = xgb.DMatrix(x, label = y, nthread = 1), #' nround = 3 #' ) diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R index 280fcf52ee3e..b20796d750ab 100644 --- a/R-package/R/xgb.DMatrix.R +++ b/R-package/R/xgb.DMatrix.R @@ -1136,7 +1136,7 @@ setinfo.xgb.DMatrix <- function(object, name, info) { #' # DMatrix is not quantized right away, but will be once a hist model is generated #' model <- xgb.train( #' data = dm, -#' params = list(tree_method = "hist", max_bin = 8, nthread = 1), +#' params = xgb.params(tree_method = "hist", max_bin = 8, nthread = 1), #' nrounds = 3 #' ) #' diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index 67821919f71c..f3be5191b4db 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -2,24 +2,7 @@ #' #' The cross validation function of xgboost. #' -#' @param params The list of parameters. The complete list of parameters is available in the -#' [online documentation](http://xgboost.readthedocs.io/en/latest/parameter.html). -#' Below is a shorter summary: -#' - `objective`: Objective function, common ones are -#' - `reg:squarederror`: Regression with squared loss. -#' - `binary:logistic`: Logistic regression for classification. -#' -#' See [xgb.train()] for complete list of objectives. -#' - `eta`: Step size of each boosting step -#' - `max_depth`: Maximum depth of the tree -#' - `nthread`: Number of threads used in training. If not set, all threads are used -#' -#' See [xgb.train()] for further details. -#' See also demo for walkthrough example in R. -#' -#' Note that, while `params` accepts a `seed` entry and will use such parameter for model training if -#' supplied, this seed is not used for creation of train-test splits, which instead rely on R's own RNG -#' system - thus, for reproducible results, one needs to call the [set.seed()] function beforehand. +#' @inheritParams xgb.train #' @param data An `xgb.DMatrix` object, with corresponding fields like `label` or bounds as required #' for model training by the objective. #' @@ -84,8 +67,6 @@ #' See [xgb.Callback()]. Some of the callbacks are automatically created depending on the #' parameters' values. User can provide either existing or their own callback methods in order #' to customize the training process. -#' @param ... Other parameters to pass to `params`. -#' #' @details #' The original sample is randomly partitioned into `nfold` equal size subsamples. #' @@ -133,13 +114,14 @@ #' nfold = 5, #' metrics = list("rmse","auc"), #' max_depth = 3, -#' eta = 1,objective = "binary:logistic" +#' eta = 1, +#' objective = "binary:logistic" #' ) #' print(cv) #' print(cv, verbose = TRUE) #' #' @export -xgb.cv <- function(params = list(), data, nrounds, nfold, +xgb.cv <- function(params = xgb.params(), data, nrounds, nfold, prediction = FALSE, showsd = TRUE, metrics = list(), obj = NULL, feval = NULL, stratified = "auto", folds = NULL, train_folds = NULL, verbose = TRUE, print_every_n = 1L, diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index cafdde2da856..aee8f37d8f7b 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -3,117 +3,20 @@ #' `xgb.train()` is an advanced interface for training an xgboost model. #' The [xgboost()] function is a simpler wrapper for `xgb.train()`. #' -#' @param params the list of parameters. The complete list of parameters is -#' available in the [online documentation](http://xgboost.readthedocs.io/en/latest/parameter.html). -#' Below is a shorter summary: -#' -#' **1. General Parameters** -#' -#' - `booster`: Which booster to use, can be `gbtree` or `gblinear`. Default: `gbtree`. -#' -#' **2. Booster Parameters** -#' -#' **2.1. Parameters for Tree Booster** -#' - `eta`: The learning rate: scale the contribution of each tree by a factor of `0 < eta < 1` -#' when it is added to the current approximation. -#' Used to prevent overfitting by making the boosting process more conservative. -#' Lower value for `eta` implies larger value for `nrounds`: low `eta` value means model -#' more robust to overfitting but slower to compute. Default: 0.3. -#' - `gamma`: Minimum loss reduction required to make a further partition on a leaf node of the tree. -#' the larger, the more conservative the algorithm will be. -#' - `max_depth`: Maximum depth of a tree. Default: 6. -#' - `min_child_weight`: Minimum sum of instance weight (hessian) needed in a child. -#' If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, -#' then the building process will give up further partitioning. -#' In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. -#' The larger, the more conservative the algorithm will be. Default: 1. -#' - `subsample`: Subsample ratio of the training instance. -#' Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees -#' and this will prevent overfitting. It makes computation shorter (because less data to analyse). -#' It is advised to use this parameter with `eta` and increase `nrounds`. Default: 1. -#' - `colsample_bytree`: Subsample ratio of columns when constructing each tree. Default: 1. -#' - `lambda`: L2 regularization term on weights. Default: 1. -#' - `alpha`: L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0. -#' - `num_parallel_tree`: Experimental parameter. number of trees to grow per round. -#' Useful to test Random Forest through XGBoost. -#' (set `colsample_bytree < 1`, `subsample < 1` and `round = 1`) accordingly. -#' Default: 1. -#' - `monotone_constraints`: A numerical vector consists of `1`, `0` and `-1` with its length -#' equals to the number of features in the training data. -#' `1` is increasing, `-1` is decreasing and `0` is no constraint. -#' - `interaction_constraints`: A list of vectors specifying feature indices of permitted interactions. -#' Each item of the list represents one permitted interaction where specified features are allowed to interact with each other. -#' Feature index values should start from `0` (`0` references the first column). -#' Leave argument unspecified for no interaction constraints. -#' -#' **2.2. Parameters for Linear Booster** -#' -#' - `lambda`: L2 regularization term on weights. Default: 0. -#' - `lambda_bias`: L2 regularization term on bias. Default: 0. -#' - `alpha`: L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0. -#' -#' **3. Task Parameters** -#' -#' - `objective`: Specifies the learning task and the corresponding learning objective. -#' users can pass a self-defined function to it. The default objective options are below: -#' - `reg:squarederror`: Regression with squared loss (default). -#' - `reg:squaredlogerror`: Regression with squared log loss \eqn{1/2 \cdot (\log(pred + 1) - \log(label + 1))^2}. -#' All inputs are required to be greater than -1. -#' Also, see metric rmsle for possible issue with this objective. -#' - `reg:logistic`: Logistic regression. -#' - `reg:pseudohubererror`: Regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss. -#' - `binary:logistic`: Logistic regression for binary classification. Output probability. -#' - `binary:logitraw`: Logistic regression for binary classification, output score before logistic transformation. -#' - `binary:hinge`: Hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities. -#' - `count:poisson`: Poisson regression for count data, output mean of Poisson distribution. -#' The parameter `max_delta_step` is set to 0.7 by default in poisson regression -#' (used to safeguard optimization). -#' - `survival:cox`: Cox regression for right censored survival time data (negative values are considered right censored). -#' Note that predictions are returned on the hazard ratio scale (i.e., as HR = exp(marginal_prediction) in the proportional -#' hazard function \eqn{h(t) = h_0(t) \cdot HR}. -#' - `survival:aft`: Accelerated failure time model for censored survival time data. See -#' [Survival Analysis with Accelerated Failure Time](https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html) -#' for details. -#' The parameter `aft_loss_distribution` specifies the Probability Density Function -#' used by `survival:aft` and the `aft-nloglik` metric. -#' - `multi:softmax`: Set xgboost to do multiclass classification using the softmax objective. -#' Class is represented by a number and should be from 0 to `num_class - 1`. -#' - `multi:softprob`: Same as softmax, but prediction outputs a vector of ndata * nclass elements, which can be -#' further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging -#' to each class. -#' - `rank:pairwise`: Set XGBoost to do ranking task by minimizing the pairwise loss. -#' - `rank:ndcg`: Use LambdaMART to perform list-wise ranking where -#' [Normalized Discounted Cumulative Gain (NDCG)](https://en.wikipedia.org/wiki/Discounted_cumulative_gain) is maximized. -#' - `rank:map`: Use LambdaMART to perform list-wise ranking where -#' [Mean Average Precision (MAP)](https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Mean_average_precision) -#' is maximized. -#' - `reg:gamma`: Gamma regression with log-link. Output is a mean of gamma distribution. -#' It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be -#' [gamma-distributed](https://en.wikipedia.org/wiki/Gamma_distribution#Applications). -#' - `reg:tweedie`: Tweedie regression with log-link. -#' It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be -#' [Tweedie-distributed](https://en.wikipedia.org/wiki/Tweedie_distribution#Applications). -#' -#' For custom objectives, one should pass a function taking as input the current predictions (as a numeric -#' vector or matrix) and the training data (as an `xgb.DMatrix` object) that will return a list with elements -#' `grad` and `hess`, which should be numeric vectors or matrices with number of rows matching to the numbers -#' of rows in the training data (same shape as the predictions that are passed as input to the function). -#' For multi-valued custom objectives, should have shape `[nrows, ntargets]`. Note that negative values of -#' the Hessian will be clipped, so one might consider using the expected Hessian (Fisher information) if the -#' objective is non-convex. -#' -#' See the tutorials [Custom Objective and Evaluation Metric](https://xgboost.readthedocs.io/en/stable/tutorials/custom_metric_obj.html) -#' and [Advanced Usage of Custom Objectives](https://xgboost.readthedocs.io/en/latest/tutorials/advanced_custom_obj.html) -#' for more information about custom objectives. -#' -#' - `base_score`: The initial prediction score of all instances, global bias. Default: 0.5. -#' - `eval_metric`: Evaluation metrics for validation data. -#' Users can pass a self-defined function to it. -#' Default: metric will be assigned according to objective -#' (rmse for regression, and error for classification, mean average precision for ranking). -#' List is provided in detail section. +#' @param params List of XGBoost parameters which control the model building process. +#' See the [online documentation](http://xgboost.readthedocs.io/en/latest/parameter.html) +#' and the documentation for [xgb.params()] for details. +#' +#' Should be passed as list with named entries. Parameters that are not specified in this +#' list will use their default values. Alternatively, parameters may be passed directly +#' as function arguments (accepted through `...`). +#' +#' A list of named parameters can be created through the function [xgb.params()], which +#' accepts all valid parameters as function arguments. #' @param data Training dataset. `xgb.train()` accepts only an `xgb.DMatrix` as the input. -#' [xgboost()], in addition, also accepts `matrix`, `dgCMatrix`, or name of a local data file. +#' +#' Note that there is a function [xgboost()] which is meant to accept R data objects +#' as inputs, such as data frames and matrices. #' @param nrounds Max number of boosting iterations. #' @param evals Named list of `xgb.DMatrix` datasets to use for evaluating model performance. #' Metrics specified in either `eval_metric` or `feval` will be computed for each @@ -168,44 +71,21 @@ #' such as an evaluation log (a `data.table` object) - be aware that these objects are kept #' as R attributes, and thus do not get saved when using XGBoost's own serializaters like #' [xgb.save()] (but are kept when using R serializers like [saveRDS()]). -#' @param ... other parameters to pass to `params`. -#' +#' @param ... Other parameters to pass to `params`. See [xgb.params()] for more details. #' @return An object of class `xgb.Booster`. -#' #' @details -#' These are the training functions for [xgboost()]. -#' -#' The `xgb.train()` interface supports advanced features such as `evals`, -#' customized objective and evaluation metric functions, therefore it is more flexible -#' than the [xgboost()] interface. +#' Compared to [xgboost()], the `xgb.train()` interface supports advanced features such as +#' `evals`, customized objective and evaluation metric functions, among others, with the +#' difference these work `xgb.DMatrix` objects and do not follow typical R idioms. #' #' Parallelization is automatically enabled if OpenMP is present. #' Number of threads can also be manually specified via the `nthread` parameter. #' -#' While in other interfaces, the default random seed defaults to zero, in R, if a parameter `seed` +#' While in XGBoost language bindings, the default random seed defaults to zero, in R, if a parameter `seed` #' is not manually supplied, it will generate a random seed through R's own random number generator, #' whose seed in turn is controllable through `set.seed`. If `seed` is passed, it will override the #' RNG from R. #' -#' The evaluation metric is chosen automatically by XGBoost (according to the objective) -#' when the `eval_metric` parameter is not provided. -#' User may set one or several `eval_metric` parameters. -#' Note that when using a customized metric, only this single metric can be used. -#' The following is the list of built-in metrics for which XGBoost provides optimized implementation: -#' - `rmse`: Root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error} -#' - `logloss`: Negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood} -#' - `mlogloss`: Multiclass logloss. \url{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html} -#' - `error`: Binary classification error rate. It is calculated as `(# wrong cases) / (# all cases)`. -#' By default, it uses the 0.5 threshold for predicted values to define negative and positive instances. -#' Different threshold (e.g., 0.) could be specified as `error@0`. -#' - `merror`: Multiclass classification error rate. It is calculated as `(# wrong cases) / (# all cases)`. -#' - `mae`: Mean absolute error. -#' - `mape`: Mean absolute percentage error. -#' - `auc`: Area under the curve. -#' \url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation. -#' - `aucpr`: Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation. -#' - `ndcg`: Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG} -#' #' The following callbacks are automatically created when certain parameters are set: #' - [xgb.cb.print.evaluation()] is turned on when `verbose > 0` and the `print_every_n` #' parameter is passed to it. @@ -226,7 +106,7 @@ #' is assigned from the `params` argument to this function, and is only meant to serve as a #' reference for what went into the booster, but is not used in other methods that take a booster #' object - so for example, changing the booster's configuration requires calling `xgb.config<-` -#' or `xgb.parameters<-`, while simply modifying `attributes(model)$params$<...>` will have no +#' or `xgb.model.parameters<-`, while simply modifying `attributes(model)$params$<...>` will have no #' effect elsewhere. #' #' @seealso [xgb.Callback()], [predict.xgb.Booster()], [xgb.cv()] @@ -306,7 +186,7 @@ #' #' #' ## An xgb.train example of using variable learning rates at each iteration: -#' param <- list( +#' param <- xgb.params( #' max_depth = 2, #' eta = 1, #' nthread = nthread, @@ -328,19 +208,8 @@ #' bst <- xgb.train( #' param, dtrain, nrounds = 25, evals = evals, early_stopping_rounds = 3 #' ) -#' -#' ## An 'xgboost' interface example: -#' bst <- xgboost( -#' x = agaricus.train$data, -#' y = factor(agaricus.train$label), -#' params = list(max_depth = 2, eta = 1), -#' nthread = nthread, -#' nrounds = 2 -#' ) -#' pred <- predict(bst, agaricus.test$data) -#' #' @export -xgb.train <- function(params = list(), data, nrounds, evals = list(), +xgb.train <- function(params = xgb.params(), data, nrounds, evals = list(), obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L, early_stopping_rounds = NULL, maximize = NULL, save_period = NULL, save_name = "xgboost.model", @@ -512,3 +381,435 @@ xgb.train <- function(params = list(), data, nrounds, evals = list(), return(bst) } + +# nolint start: line_length_linter +#' @title XGBoost Parameters +#' @description Convenience function to generate a list of named XGBoost parameters, which +#' can be passed as argument `params` to [xgb.train()]. See the [online documentation]( +#' https://xgboost.readthedocs.io/en/stable/parameter.html) for more details. +#' +#' The purpose of this function is to enable IDE autocompletions and to provide in-package +#' documentation for all the possible parameters that XGBoost accepts. The output from this +#' function is just a regular R list containing the parameters that were set to non-default +#' values. Note that this function will not perform any validation on the supplied arguments. +#' +#' If passing `NULL` for a given parameter (the default for all of them), then the default +#' value for that parameter will be used. Default values are automatically determined by the +#' XGBoost core library upon calls to [xgb.train()] or [xgb.cv()], and are subject to change +#' over XGBoost library versions. +#' @return A list with the entries that were passed non-NULL values. It is intended to +#' be passed as argument `params` to [xgb.train()] or [xgb.cv()]. +#' @export +#' @param objective (default=`"reg:squarederror"`) +#' Specify the learning task and the corresponding learning objective or a custom objective function to be used. +#' +#' For custom objective, see [Custom Objective and Evaluation Metric](https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html) +#' and [Custom objective and metric](https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html#custom-obj-metric) for more information, +#' along with the end note for function signatures. +#' +#' Supported values are: +#' - `"reg:squarederror"`: regression with squared loss. +#' - `"reg:squaredlogerror"`: regression with squared log loss \eqn{\frac{1}{2}[log(pred + 1) - log(label + 1)]^2}. All input labels are required to be greater than -1. Also, see metric `rmsle` for possible issue with this objective. +#' - `"reg:logistic"`: logistic regression, output probability +#' - `"reg:pseudohubererror"`: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss. +#' - `"reg:absoluteerror"`: Regression with L1 error. When tree model is used, leaf value is refreshed after tree construction. If used in distributed training, the leaf value is calculated as the mean value from all workers, which is not guaranteed to be optimal. +#' +#' Version added: 1.7.0 +#' - `"reg:quantileerror"`: Quantile loss, also known as "pinball loss". See later sections for its parameter and [Quantile Regression](https://xgboost.readthedocs.io/en/latest/python/examples/quantile_regression.html#sphx-glr-python-examples-quantile-regression-py) for a worked example. +#' +#' Version added: 2.0.0 +#' - `"binary:logistic"`: logistic regression for binary classification, output probability +#' - `"binary:logitraw"`: logistic regression for binary classification, output score before logistic transformation +#' - `"binary:hinge"`: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities. +#' - `"count:poisson"`: Poisson regression for count data, output mean of Poisson distribution. #' `"max_delta_step"` is set to 0.7 by default in Poisson regression (used to safeguard optimization) +#' - `"survival:cox"`: Cox regression for right censored survival time data (negative values are considered right censored). +#' +#' Note that predictions are returned on the hazard ratio scale (i.e., as HR = exp(marginal_prediction) in the proportional hazard function `h(t) = h0(t) * HR`). +#' - `"survival:aft"`: Accelerated failure time model for censored survival time data. +#' See [Survival Analysis with Accelerated Failure Time](https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html) for details. +#' - `"multi:softmax"`: set XGBoost to do multiclass classification using the softmax objective, you also need to set num_class(number of classes) +#' - `"multi:softprob"`: same as softmax, but output a vector of `ndata * nclass`, which can be further reshaped to `ndata * nclass` matrix. The result contains predicted probability of each data point belonging to each class. +#' - `"rank:ndcg"`: Use LambdaMART to perform pair-wise ranking where [Normalized Discounted Cumulative Gain (NDCG)](http://en.wikipedia.org/wiki/NDCG) is maximized. This objective supports position debiasing for click data. +#' - `"rank:map"`: Use LambdaMART to perform pair-wise ranking where [Mean Average Precision (MAP)](http://en.wikipedia.org/wiki/Mean_average_precision#Mean_average_precision) is maximized +#' - `"rank:pairwise"`: Use LambdaRank to perform pair-wise ranking using the `ranknet` objective. +#' - `"reg:gamma"`: gamma regression with log-link. Output is a mean of gamma distribution. It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be [gamma-distributed](https://en.wikipedia.org/wiki/Gamma_distribution#Occurrence_and_applications). +#' - `"reg:tweedie"`: Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be [Tweedie-distributed](https://en.wikipedia.org/wiki/Tweedie_distribution#Occurrence_and_applications). +#' @param verbosity (default=1) +#' Verbosity of printing messages. Valid values are 0 (silent), 1 (warning), 2 (info), 3 +#' (debug). Sometimes XGBoost tries to change configurations based on heuristics, which +#' is displayed as warning message. If there's unexpected behaviour, please try to +#' increase value of verbosity. +#' @param nthread (default to maximum number of threads available if not set) +#' Number of parallel threads used to run XGBoost. When choosing it, please keep thread +#' contention and hyperthreading in mind. +#' @param seed Random number seed. If not specified, will take a random seed through R's own RNG engine. +#' @param booster (default= `"gbtree"`) +#' Which booster to use. Can be `"gbtree"`, `"gblinear"` or `"dart"`; `"gbtree"` and `"dart"` use tree based models while `"gblinear"` uses linear functions. +#' @param eta,learning_rate (two aliases for the same parameter) (for Tree Booster) (default=0.3) +#' Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and `eta` shrinks the feature weights to make the boosting process more conservative. +#' +#' range: \eqn{[0,1]} +#' +#' Note: should only pass one of `eta` or `learning_rate`. Both refer to the same parameter and there's thus no difference between one or the other. +#' @param gamma,min_split_loss (two aliases for the same parameter) (for Tree Booster) (default=0, alias: `gamma`) +#' Minimum loss reduction required to make a further partition on a leaf node of the tree. The larger `min_split_loss` is, the more conservative the algorithm will be. Note that a tree where no splits were made might still contain a single terminal node with a non-zero score. +#' +#' range: \eqn{[0, \infty)} +#' +#' Note: should only pass one of `gamma` or `min_split_loss`. Both refer to the same parameter and there's thus no difference between one or the other. +#' @param max_depth (for Tree Booster) (default=6) +#' Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit. 0 indicates no limit on depth. Beware that XGBoost aggressively consumes memory when training a deep tree. `"exact"` tree method requires non-zero value. +#' +#' range: \eqn{[0, \infty)} +#' @param min_child_weight (for Tree Booster) (default=1) +#' Minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than `min_child_weight`, then the building process will give up further partitioning. In linear regression task, this simply corresponds to minimum number of instances needed to be in each node. The larger `min_child_weight` is, the more conservative the algorithm will be. +#' +#' range: \eqn{[0, \infty)} +#' @param max_delta_step (for Tree Booster) (default=0) +#' Maximum delta step we allow each leaf output to be. If the value is set to 0, it means there is no constraint. If it is set to a positive value, it can help making the update step more conservative. Usually this parameter is not needed, but it might help in logistic regression when class is extremely imbalanced. Set it to value of 1-10 might help control the update. +#' +#' range: \eqn{[0, \infty)} +#' @param subsample (for Tree Booster) (default=1) +#' Subsample ratio of the training instances. Setting it to 0.5 means that XGBoost would randomly sample half of the training data prior to growing trees. and this will prevent overfitting. Subsampling will occur once in every boosting iteration. +#' +#' range: \eqn{(0,1]} +#' @param sampling_method (for Tree Booster) (default= `"uniform"`) +#' The method to use to sample the training instances. +#' - `"uniform"`: each training instance has an equal probability of being selected. Typically set +#' `"subsample"` >= 0.5 for good results. +#' - `"gradient_based"`: the selection probability for each training instance is proportional to the +#' \bold{regularized absolute value} of gradients (more specifically, \eqn{\sqrt{g^2+\lambda h^2}}). +#' `"subsample"` may be set to as low as 0.1 without loss of model accuracy. Note that this +#' sampling method is only supported when `"tree_method"` is set to `"hist"` and the device is `"cuda"`; other tree +#' methods only support `"uniform"` sampling. +#' @param colsample_bytree,colsample_bylevel,colsample_bynode (for Tree Booster) (default=1) +#' This is a family of parameters for subsampling of columns. +#' - All `"colsample_by*"` parameters have a range of \eqn{(0, 1]}, the default value of 1, and specify the fraction of columns to be subsampled. +#' - `"colsample_bytree"` is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed. +#' - `"colsample_bylevel"` is the subsample ratio of columns for each level. Subsampling occurs once for every new depth level reached in a tree. Columns are subsampled from the set of columns chosen for the current tree. +#' - `"colsample_bynode"` is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level. This is not supported by the exact tree method. +#' - `"colsample_by*"` parameters work cumulatively. For instance, +#' the combination `{'colsample_bytree'=0.5, 'colsample_bylevel'=0.5, 'colsample_bynode'=0.5}` with 64 features will leave 8 features to choose from at +#' each split. +#' +#' One can set the `"feature_weights"` for DMatrix to +#' define the probability of each feature being selected when using column sampling. +#' @param lambda,reg_lambda (two aliases for the same parameter) +#' +#' - For tree-based boosters: +#' - L2 regularization term on weights. Increasing this value will make model more conservative. +#' - default: 1 +#' - range: \eqn{[0, \infty]} +#' - For linear booster: +#' - L2 regularization term on weights. Increasing this value will make model more conservative. Normalised to number of training examples. +#' - default: 0 +#' - range: \eqn{[0, \infty)} +#' +#' Note: should only pass one of `lambda` or `reg_lambda`. Both refer to the same parameter and there's thus no difference between one or the other. +#' @param alpha,reg_alpha (two aliases for the same parameter) +#' - L1 regularization term on weights. Increasing this value will make model more conservative. +#' - For the linear booster, it's normalised to number of training examples. +#' - default: 0 +#' - range: \eqn{[0, \infty)} +#' +#' Note: should only pass one of `alpha` or `reg_alpha`. Both refer to the same parameter and there's thus no difference between one or the other. +#' @param tree_method (for Tree Booster) (default= `"auto"`) +#' The tree construction algorithm used in XGBoost. See description in the [reference paper](http://arxiv.org/abs/1603.02754) and [Tree Methods](https://xgboost.readthedocs.io/en/latest/treemethod.html). +#' +#' Choices: `"auto"`, `"exact"`, `"approx"`, `"hist"`, this is a combination of commonly +#' used updaters. For other updaters like `"refresh"`, set the parameter `updater` +#' directly. +#' - `"auto"`: Same as the `"hist"` tree method. +#' - `"exact"`: Exact greedy algorithm. Enumerates all split candidates. +#' - `"approx"`: Approximate greedy algorithm using quantile sketch and gradient histogram. +#' - `"hist"`: Faster histogram optimized approximate greedy algorithm. +#' @param scale_pos_weight (for Tree Booster) (default=1) +#' Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: `sum(negative instances) / sum(positive instances)`. See [Parameters Tuning](https://xgboost.readthedocs.io/en/latest/tutorials/param_tuning.html) for more discussion. Also, see Higgs Kaggle competition demo for examples: [R](https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-train.R), [py1](https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-numpy.py), [py2](https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-cv.py), [py3](https://github.com/dmlc/xgboost/blob/master/demo/guide-python/cross_validation.py). +#' @param updater Has different meanings depending on the type of booster. +#' +#' - For tree-based boosters: +#' A comma separated string defining the sequence of tree updaters to run, providing a modular way to construct and to modify the trees. This is an advanced parameter that is usually set automatically, depending on some other parameters. However, it could be also set explicitly by a user. The following updaters exist: +#' - `"grow_colmaker"`: non-distributed column-based construction of trees. +#' - `"grow_histmaker"`: distributed tree construction with row-based data splitting based on global proposal of histogram counting. +#' - `"grow_quantile_histmaker"`: Grow tree using quantized histogram. +#' - `"grow_gpu_hist"`: Enabled when `tree_method` is set to `"hist"` along with `device="cuda"`. +#' - `"grow_gpu_approx"`: Enabled when `tree_method` is set to `"approx"` along with `device="cuda"`. +#' - `"sync"`: synchronizes trees in all distributed nodes. +#' - `"refresh"`: refreshes tree's statistics and/or leaf values based on the current data. Note that no random subsampling of data rows is performed. +#' - `"prune"`: prunes the splits where loss < `min_split_loss` (or `gamma`) and nodes that have depth greater than `max_depth`. +#' +#' - For `booster="gblinear"`: +#' (default= `"shotgun"`) Choice of algorithm to fit linear model +#' - `"shotgun"`: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run. +#' - `"coord_descent"`: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. When the `device` parameter is set to `"cuda"` or `"gpu"`, a GPU variant would be used. +#' @param refresh_leaf (for Tree Booster) (default=1) +#' This is a parameter of the `"refresh"` updater. When this flag is 1, tree leafs as well as tree nodes' stats are updated. When it is 0, only node stats are updated. +#' @param grow_policy (for Tree Booster) (default= `"depthwise"`) +#' - Controls a way new nodes are added to the tree. +#' - Currently supported only if `tree_method` is set to `"hist"` or `"approx"`. +#' - Choices: `"depthwise"`, `"lossguide"` +#' - `"depthwise"`: split at nodes closest to the root. +#' - `"lossguide"`: split at nodes with highest loss change. +#' @param max_leaves (for Tree Booster) (default=0) +#' Maximum number of nodes to be added. Not used by `"exact"` tree method. +#' @param max_bin (for Tree Booster) (default=256) +#' - Only used if `tree_method` is set to `"hist"` or `"approx"`. +#' - Maximum number of discrete bins to bucket continuous features. +#' - Increasing this number improves the optimality of splits at the cost of higher computation time. +#' @param num_parallel_tree (for Tree Booster) (default=1) +#' Number of parallel trees constructed during each iteration. This option is used to support boosted random forest. +#' @param monotone_constraints (for Tree Booster) +#' Constraint of variable monotonicity. See [Monotonic Constraints](https://xgboost.readthedocs.io/en/latest/tutorials/monotonic.html) for more information. +#' @param interaction_constraints (for Tree Booster) +#' Constraints for interaction representing permitted interactions. The constraints must +#' be specified in the form of a nest list, e.g. `list(c(0, 1), c(2, 3, 4))`, where each inner +#' list is a group of indices of features (base-0 numeration) that are allowed to interact with each other. +#' See [Feature Interaction Constraints](https://xgboost.readthedocs.io/en/latest/tutorials/feature_interaction_constraint.html) for more information. +#' @param multi_strategy (for Tree Booster) (default = `"one_output_per_tree"`) +#' The strategy used for training multi-target models, including multi-target regression +#' and multi-class classification. See [Multiple Outputs](https://xgboost.readthedocs.io/en/latest/tutorials/multioutput.html) for more information. +#' - `"one_output_per_tree"`: One model for each target. +#' - `"multi_output_tree"`: Use multi-target trees. +#' +#' Version added: 2.0.0 +#' +#' Note: This parameter is working-in-progress. +#' @param base_score +#' - The initial prediction score of all instances, global bias +#' - The parameter is automatically estimated for selected objectives before training. To +#' disable the estimation, specify a real number argument. +#' - If `base_margin` is supplied, `base_score` will not be added. +#' - For sufficient number of iterations, changing this value will not have too much effect. +#' @param eval_metric (default according to objective) +#' - Evaluation metrics for validation data, a default metric will be assigned according to objective (rmse for regression, and logloss for classification, `mean average precision` for ``rank:map``, etc.) +#' - User can add multiple evaluation metrics. +#' - The choices are listed below: +#' - `"rmse"`: [root mean square error](http://en.wikipedia.org/wiki/Root_mean_square_error) +#' - `"rmsle"`: root mean square log error: \eqn{\sqrt{\frac{1}{N}[log(pred + 1) - log(label + 1)]^2}}. Default metric of `"reg:squaredlogerror"` objective. This metric reduces errors generated by outliers in dataset. But because `log` function is employed, `"rmsle"` might output `nan` when prediction value is less than -1. See `"reg:squaredlogerror"` for other requirements. +#' - `"mae"`: [mean absolute error](https://en.wikipedia.org/wiki/Mean_absolute_error) +#' - `"mape"`: [mean absolute percentage error](https://en.wikipedia.org/wiki/Mean_absolute_percentage_error) +#' - `"mphe"`: [mean Pseudo Huber error](https://en.wikipedia.org/wiki/Huber_loss). Default metric of `"reg:pseudohubererror"` objective. +#' - `"logloss"`: [negative log-likelihood](http://en.wikipedia.org/wiki/Log-likelihood) +#' - `"error"`: Binary classification error rate. It is calculated as `#(wrong cases)/#(all cases)`. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances. +#' - `"error@t"`: a different than 0.5 binary classification threshold value could be specified by providing a numerical value through 't'. +#' - `"merror"`: Multiclass classification error rate. It is calculated as `#(wrong cases)/#(all cases)`. +#' - `"mlogloss"`: [Multiclass logloss](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html). +#' - `"auc"`: [Receiver Operating Characteristic Area under the Curve](https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve). +#' Available for classification and learning-to-rank tasks. +#' - When used with binary classification, the objective should be `"binary:logistic"` or similar functions that work on probability. +#' - When used with multi-class classification, objective should be `"multi:softprob"` instead of `"multi:softmax"`, as the latter doesn't output probability. Also the AUC is calculated by 1-vs-rest with reference class weighted by class prevalence. +#' - When used with LTR task, the AUC is computed by comparing pairs of documents to count correctly sorted pairs. This corresponds to pairwise learning to rank. The implementation has some issues with average AUC around groups and distributed workers not being well-defined. +#' - On a single machine the AUC calculation is exact. In a distributed environment the AUC is a weighted average over the AUC of training rows on each node - therefore, distributed AUC is an approximation sensitive to the distribution of data across workers. Use another metric in distributed environments if precision and reproducibility are important. +#' - When input dataset contains only negative or positive samples, the output is `NaN`. The behavior is implementation defined, for instance, `scikit-learn` returns \eqn{0.5} instead. +#' - `"aucpr"`: [Area under the PR curve](https://en.wikipedia.org/wiki/Precision_and_recall). +#' Available for classification and learning-to-rank tasks. +#' +#' After XGBoost 1.6, both of the requirements and restrictions for using `"aucpr"` in classification problem are similar to `"auc"`. For ranking task, only binary relevance label \eqn{y \in [0, 1]} is supported. Different from `"map"` (mean average precision), `"aucpr"` calculates the *interpolated* area under precision recall curve using continuous interpolation. +#' +#' - `"pre"`: Precision at \eqn{k}. Supports only learning to rank task. +#' - `"ndcg"`: [Normalized Discounted Cumulative Gain](http://en.wikipedia.org/wiki/NDCG) +#' - `"map"`: [Mean Average Precision](http://en.wikipedia.org/wiki/Mean_average_precision#Mean_average_precision) +#' +#' The `average precision` is defined as: +#' +#' \eqn{AP@l = \frac{1}{min{(l, N)}}\sum^l_{k=1}P@k \cdot I_{(k)}} +#' +#' where \eqn{I_{(k)}} is an indicator function that equals to \eqn{1} when the document at \eqn{k} is relevant and \eqn{0} otherwise. The \eqn{P@k} is the precision at \eqn{k}, and \eqn{N} is the total number of relevant documents. Lastly, the `mean average precision` is defined as the weighted average across all queries. +#' +#' - `"ndcg@n"`, `"map@n"`, `"pre@n"`: \eqn{n} can be assigned as an integer to cut off the top positions in the lists for evaluation. +#' - `"ndcg-"`, `"map-"`, `"ndcg@n-"`, `"map@n-"`: In XGBoost, the NDCG and MAP evaluate the score of a list without any positive samples as \eqn{1}. By appending "-" to the evaluation metric name, we can ask XGBoost to evaluate these scores as \eqn{0} to be consistent under some conditions. +#' - `"poisson-nloglik"`: negative log-likelihood for Poisson regression +#' - `"gamma-nloglik"`: negative log-likelihood for gamma regression +#' - `"cox-nloglik"`: negative partial log-likelihood for Cox proportional hazards regression +#' - `"gamma-deviance"`: residual deviance for gamma regression +#' - `"tweedie-nloglik"`: negative log-likelihood for Tweedie regression (at a specified value of the `tweedie_variance_power` parameter) +#' - `"aft-nloglik"`: Negative log likelihood of Accelerated Failure Time model. +#' See [Survival Analysis with Accelerated Failure Time](https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html) for details. +#' - `"interval-regression-accuracy"`: Fraction of data points whose predicted labels fall in the interval-censored labels. +#' Only applicable for interval-censored data. See [Survival Analysis with Accelerated Failure Time](https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html) for details. +#' @param seed_per_iteration (default= `FALSE`) +#' Seed PRNG determnisticly via iterator number. +#' @param device (default= `"cpu"`) +#' Device for XGBoost to run. User can set it to one of the following values: +#' - `"cpu"`: Use CPU. +#' - `"cuda"`: Use a GPU (CUDA device). +#' - `"cuda:"`: `` is an integer that specifies the ordinal of the GPU (which GPU do you want to use if you have more than one devices). +#' - `"gpu"`: Default GPU device selection from the list of available and supported devices. Only `"cuda"` devices are supported currently. +#' - `"gpu:"`: Default GPU device selection from the list of available and supported devices. Only `"cuda"` devices are supported currently. +#' +#' For more information about GPU acceleration, see [XGBoost GPU Support](https://xgboost.readthedocs.io/en/latest/gpu/index.html). In distributed environments, ordinal selection is handled by distributed frameworks instead of XGBoost. As a result, using `"cuda:"` will result in an error. Use `"cuda"` instead. +#' +#' Version added: 2.0.0 +#' +#' Note: if XGBoost was installed from CRAN, it won't have GPU support enabled, thus only `"cpu"` will be available. +#' To get GPU support, the R package for XGBoost must be installed from source or from the GitHub releases - see +#' [instructions](https://xgboost.readthedocs.io/en/latest/install.html#r). +#' @param disable_default_eval_metric (default= `FALSE`) +#' Flag to disable default metric. Set to 1 or `TRUE` to disable. +#' @param use_rmm Whether to use RAPIDS Memory Manager (RMM) to allocate cache GPU +#' memory. The primary memory is always allocated on the RMM pool when XGBoost is built +#' (compiled) with the RMM plugin enabled. Valid values are `TRUE` and `FALSE`. See +#' [Using XGBoost with RAPIDS Memory Manager (RMM) plugin](https://xgboost.readthedocs.io/en/latest/python/rmm-examples/index.html) for details. +#' @param max_cached_hist_node (for Non-Exact Tree Methods) (default = 65536) +#' Maximum number of cached nodes for histogram. This can be used with the `"hist"` and the +#' `"approx"` tree methods. +#' +#' Version added: 2.0.0 +#' +#' - For most of the cases this parameter should not be set except for growing deep +#' trees. After 3.0, this parameter affects GPU algorithms as well. +#' @param extmem_single_page (for Non-Exact Tree Methods) (default = `FALSE`) +#' This parameter is only used for the `"hist"` tree method with `device="cuda"` and +#' `subsample != 1.0`. Before 3.0, pages were always concatenated. +#' +#' Version added: 3.0.0 +#' +#' Whether the GPU-based `"hist"` tree method should concatenate the training data into a +#' single batch instead of fetching data on-demand when external memory is used. For GPU +#' devices that don't support address translation services, external memory training is +#' expensive. This parameter can be used in combination with subsampling to reduce overall +#' memory usage without significant overhead. See [Using XGBoost External Memory Version](https://xgboost.readthedocs.io/en/latest/tutorials/external_memory.html) for +#' more information. +#' @param max_cat_to_onehot (for Non-Exact Tree Methods) +#' A threshold for deciding whether XGBoost should use one-hot encoding based split for +#' categorical data. When number of categories is lesser than the threshold then one-hot +#' encoding is chosen, otherwise the categories will be partitioned into children nodes. +#' +#' Version added: 1.6.0 +#' @param max_cat_threshold (for Non-Exact Tree Methods) +#' Maximum number of categories considered for each split. Used only by partition-based +#' splits for preventing over-fitting. +#' +#' Version added: 1.7.0 +#' @param sample_type (for Dart Booster) (default= `"uniform"`) +#' Type of sampling algorithm. +#' - `"uniform"`: dropped trees are selected uniformly. +#' - `"weighted"`: dropped trees are selected in proportion to weight. +#' @param normalize_type (for Dart Booster) (default= `"tree"`) +#' Type of normalization algorithm. +#' - `"tree"`: new trees have the same weight of each of dropped trees. +#' - Weight of new trees are `1 / (k + learning_rate)`. +#' - Dropped trees are scaled by a factor of `k / (k + learning_rate)`. +#' - `"forest"`: new trees have the same weight of sum of dropped trees (forest). +#' - Weight of new trees are `1 / (1 + learning_rate)`. +#' - Dropped trees are scaled by a factor of `1 / (1 + learning_rate)`. +#' @param rate_drop (for Dart Booster) (default=0.0) +#' Dropout rate (a fraction of previous trees to drop during the dropout). +#' +#' range: \eqn{[0.0, 1.0]} +#' @param one_drop (for Dart Booster) (default=0) +#' When this flag is enabled, at least one tree is always dropped during the dropout (allows Binomial-plus-one or epsilon-dropout from the original DART paper). +#' @param skip_drop (for Dart Booster) (default=0.0) +#' Probability of skipping the dropout procedure during a boosting iteration. +#' - If a dropout is skipped, new trees are added in the same manner as `"gbtree"`. +#' - Note that non-zero `skip_drop` has higher priority than `rate_drop` or `one_drop`. +#' +#' range: \eqn{[0.0, 1.0]} +#' @param feature_selector (for Linear Booster) (default= `"cyclic"`) +#' Feature selection and ordering method +#' - `"cyclic"`: Deterministic selection by cycling through features one at a time. +#' - `"shuffle"`: Similar to `"cyclic"` but with random feature shuffling prior to each update. +#' - `"random"`: A random (with replacement) coordinate selector. +#' - `"greedy"`: Select coordinate with the greatest gradient magnitude. It has `O(num_feature^2)` complexity. It is fully deterministic. It allows restricting the selection to `top_k` features per group with the largest magnitude of univariate weight change, by setting the `top_k` parameter. Doing so would reduce the complexity to `O(num_feature*top_k)`. +#' - `"thrifty"`: Thrifty, approximately-greedy feature selector. Prior to cyclic updates, reorders features in descending magnitude of their univariate weight changes. This operation is multithreaded and is a linear complexity approximation of the quadratic greedy selection. It allows restricting the selection to `top_k` features per group with the largest magnitude of univariate weight change, by setting the `top_k` parameter. +#' @param top_k (for Linear Booster) (default=0) +#' The number of top features to select in `greedy` and `thrifty` feature selector. The value of 0 means using all the features. +#' @param num_class Number of classes when using multi-class classification objectives (e.g. `objective="multi:softprob"`) +#' @param tweedie_variance_power (for Tweedie Regression (`"objective=reg:tweedie"`)) (default=1.5) +#' - Parameter that controls the variance of the Tweedie distribution `var(y) ~ E(y)^tweedie_variance_power` +#' - range: \eqn{(1,2)} +#' - Set closer to 2 to shift towards a gamma distribution +#' - Set closer to 1 to shift towards a Poisson distribution. +#' @param huber_slope (for using Pseudo-Huber (`"reg:pseudohubererror`")) (default = 1.0) +#' A parameter used for Pseudo-Huber loss to define the \eqn{\delta} term. +#' @param quantile_alpha (for using Quantile Loss (`"reg:quantileerror"`)) +#' A scalar or a list of targeted quantiles (passed as a numeric vector). +#' +#' Version added: 2.0.0 +#' @param aft_loss_distribution (for using AFT Survival Loss (`"survival:aft"`) and Negative Log Likelihood of AFT metric (`"aft-nloglik"`)) +#' Probability Density Function, `"normal"`, `"logistic"`, or `"extreme"`. +#' @param lambdarank_pair_method (for learning to rank (`"rank:ndcg"`, `"rank:map"`, `"rank:pairwise"`)) (default = `"topk"`) +#' How to construct pairs for pair-wise learning. +#' - `"mean"`: Sample `lambdarank_num_pair_per_sample` pairs for each document in the query list. +#' - `"topk"`: Focus on top-`lambdarank_num_pair_per_sample` documents. Construct \eqn{|query|} pairs for each document at the top-`lambdarank_num_pair_per_sample` ranked by the model. +#' @param lambdarank_num_pair_per_sample (for learning to rank (`"rank:ndcg"`, `"rank:map"`, `"rank:pairwise"`)) +#' It specifies the number of pairs sampled for each document when pair method is `"mean"`, or the truncation level for queries when the pair method is `"topk"`. For example, to train with `ndcg@6`, set `"lambdarank_num_pair_per_sample"` to \eqn{6} and `lambdarank_pair_method` to `"topk"`. +#' +#' range = \eqn{[1, \infty)} +#' @param lambdarank_normalization (for learning to rank (`"rank:ndcg"`, `"rank:map"`, `"rank:pairwise"`)) (default = `TRUE`) +#' Whether to normalize the leaf value by lambda gradient. This can sometimes stagnate the training progress. +#' +#' Version added: 2.1.0 +#' @param lambdarank_unbiased (for learning to rank (`"rank:ndcg"`, `"rank:map"`, `"rank:pairwise"`)) (default = `FALSE`) +#' Specify whether do we need to debias input click data. +#' @param lambdarank_bias_norm (for learning to rank (`"rank:ndcg"`, `"rank:map"`, `"rank:pairwise"`)) (default = 2.0) +#' \eqn{L_p} normalization for position debiasing, default is \eqn{L_2}. Only relevant when `lambdarank_unbiased` is set to `TRUE`. +#' @param ndcg_exp_gain (for learning to rank (`"rank:ndcg"`, `"rank:map"`, `"rank:pairwise"`)) (default = `TRUE`) +#' Whether we should use exponential gain function for `NDCG`. There are two forms of gain function for `NDCG`, one is using relevance value directly while the other is using\eqn{2^{rel} - 1} to emphasize on retrieving relevant documents. When `ndcg_exp_gain` is `TRUE` (the default), relevance degree cannot be greater than 31. +xgb.params <- function( + objective = NULL, + verbosity = NULL, + nthread = NULL, + seed = NULL, + booster = NULL, + eta = NULL, + learning_rate = NULL, + gamma = NULL, + min_split_loss = NULL, + max_depth = NULL, + min_child_weight = NULL, + max_delta_step = NULL, + subsample = NULL, + sampling_method = NULL, + colsample_bytree = NULL, + colsample_bylevel = NULL, + colsample_bynode = NULL, + lambda = NULL, + reg_lambda = NULL, + alpha = NULL, + reg_alpha = NULL, + tree_method = NULL, + scale_pos_weight = NULL, + updater = NULL, + refresh_leaf = NULL, + grow_policy = NULL, + max_leaves = NULL, + max_bin = NULL, + num_parallel_tree = NULL, + monotone_constraints = NULL, + interaction_constraints = NULL, + multi_strategy = NULL, + base_score = NULL, + eval_metric = NULL, + seed_per_iteration = NULL, + device = NULL, + disable_default_eval_metric = NULL, + use_rmm = NULL, + max_cached_hist_node = NULL, + extmem_single_page = NULL, + max_cat_to_onehot = NULL, + max_cat_threshold = NULL, + sample_type = NULL, + normalize_type = NULL, + rate_drop = NULL, + one_drop = NULL, + skip_drop = NULL, + feature_selector = NULL, + top_k = NULL, + num_class = NULL, + tweedie_variance_power = NULL, + huber_slope = NULL, + quantile_alpha = NULL, + aft_loss_distribution = NULL, + lambdarank_pair_method = NULL, + lambdarank_num_pair_per_sample = NULL, + lambdarank_normalization = NULL, + lambdarank_unbiased = NULL, + lambdarank_bias_norm = NULL, + ndcg_exp_gain = NULL +) { +# nolint end + out <- as.list(environment()) + out <- out[!sapply(out, is.null)] + return(out) +} diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R index c22752a3f506..db4ad80389fd 100644 --- a/R-package/R/xgboost.R +++ b/R-package/R/xgboost.R @@ -778,7 +778,7 @@ process.x.and.col.args <- function( #' @param objective Optimization objective to minimize based on the supplied data, to be passed #' by name as a string / character (e.g. `reg:absoluteerror`). See the #' [Learning Task Parameters](https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters) -#' page for more detailed information on allowed values. +#' page and the [xgb.params()] documentation for more detailed information on allowed values. #' #' If `NULL` (the default), will be automatically determined from `y` according to the following #' logic: diff --git a/R-package/man/predict.xgb.Booster.Rd b/R-package/man/predict.xgb.Booster.Rd index 5cdfed97f504..345a6437dc31 100644 --- a/R-package/man/predict.xgb.Booster.Rd +++ b/R-package/man/predict.xgb.Booster.Rd @@ -205,7 +205,7 @@ Since it quadratically depends on the number of features, it is recommended to p of the most important features first. See below about the format of the returned results. The \code{predict()} method uses as many threads as defined in \code{xgb.Booster} object (all by default). -If you want to change their number, assign a new number to \code{nthread} using \code{\link[=xgb.parameters<-]{xgb.parameters<-()}}. +If you want to change their number, assign a new number to \code{nthread} using \code{\link[=xgb.model.parameters<-]{xgb.model.parameters<-()}}. Note that converting a matrix to \code{\link[=xgb.DMatrix]{xgb.DMatrix()}} uses multiple threads too. } \examples{ diff --git a/R-package/man/xgb.Callback.Rd b/R-package/man/xgb.Callback.Rd index 8cee8c729698..5fd756538889 100644 --- a/R-package/man/xgb.Callback.Rd +++ b/R-package/man/xgb.Callback.Rd @@ -220,7 +220,7 @@ x <- as.matrix(mtcars[, -1]) dm <- xgb.DMatrix(x, label = y, nthread = 1) model <- xgb.train( data = dm, - params = list(objective = "reg:squarederror", nthread = 1), + params = xgb.params(objective = "reg:squarederror", nthread = 1), nrounds = 5, callbacks = list(ssq_callback), keep_extra_attributes = TRUE diff --git a/R-package/man/xgb.attr.Rd b/R-package/man/xgb.attr.Rd index 40dedeea94e2..4c7356eb72ec 100644 --- a/R-package/man/xgb.attr.Rd +++ b/R-package/man/xgb.attr.Rd @@ -49,7 +49,7 @@ would not be saved by \code{\link[=xgb.save]{xgb.save()}} because an XGBoost mod and its serialization is handled externally. Also, setting an attribute that has the same name as one of XGBoost's parameters wouldn't change the value of that parameter for a model. -Use \code{\link[=xgb.parameters<-]{xgb.parameters<-()}} to set or change model parameters. +Use \code{\link[=xgb.model.parameters<-]{xgb.model.parameters<-()}} to set or change model parameters. The \verb{xgb.attributes<-} setter either updates the existing or adds one or several attributes, but it doesn't delete the other existing attributes. diff --git a/R-package/man/xgb.copy.Booster.Rd b/R-package/man/xgb.copy.Booster.Rd index 2bab71cd2a52..aaf14f3920e0 100644 --- a/R-package/man/xgb.copy.Booster.Rd +++ b/R-package/man/xgb.copy.Booster.Rd @@ -30,7 +30,7 @@ dm <- xgb.DMatrix(x, label = y, nthread = 1) model <- xgb.train( data = dm, - params = list(nthread = 1), + params = xgb.params(nthread = 1), nround = 3 ) diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd index c5686e201ec7..299e92973555 100644 --- a/R-package/man/xgb.cv.Rd +++ b/R-package/man/xgb.cv.Rd @@ -5,7 +5,7 @@ \title{Cross Validation} \usage{ xgb.cv( - params = list(), + params = xgb.params(), data, nrounds, nfold, @@ -26,28 +26,16 @@ xgb.cv( ) } \arguments{ -\item{params}{The list of parameters. The complete list of parameters is available in the -\href{http://xgboost.readthedocs.io/en/latest/parameter.html}{online documentation}. -Below is a shorter summary: -\itemize{ -\item \code{objective}: Objective function, common ones are -\itemize{ -\item \code{reg:squarederror}: Regression with squared loss. -\item \code{binary:logistic}: Logistic regression for classification. -} - -See \code{\link[=xgb.train]{xgb.train()}} for complete list of objectives. -\item \code{eta}: Step size of each boosting step -\item \code{max_depth}: Maximum depth of the tree -\item \code{nthread}: Number of threads used in training. If not set, all threads are used -} +\item{params}{List of XGBoost parameters which control the model building process. +See the \href{http://xgboost.readthedocs.io/en/latest/parameter.html}{online documentation} +and the documentation for \code{\link[=xgb.params]{xgb.params()}} for details. -See \code{\link[=xgb.train]{xgb.train()}} for further details. -See also demo for walkthrough example in R. +Should be passed as list with named entries. Parameters that are not specified in this +list will use their default values. Alternatively, parameters may be passed directly +as function arguments (accepted through \code{...}). -Note that, while \code{params} accepts a \code{seed} entry and will use such parameter for model training if -supplied, this seed is not used for creation of train-test splits, which instead rely on R's own RNG -system - thus, for reproducible results, one needs to call the \code{\link[=set.seed]{set.seed()}} function beforehand.} +A list of named parameters can be created through the function \code{\link[=xgb.params]{xgb.params()}}, which +accepts all valid parameters as function arguments.} \item{data}{An \code{xgb.DMatrix} object, with corresponding fields like \code{label} or bounds as required for model training by the objective. @@ -131,7 +119,7 @@ See \code{\link[=xgb.Callback]{xgb.Callback()}}. Some of the callbacks are autom parameters' values. User can provide either existing or their own callback methods in order to customize the training process.} -\item{...}{Other parameters to pass to \code{params}.} +\item{...}{Other parameters to pass to \code{params}. See \code{\link[=xgb.params]{xgb.params()}} for more details.} } \value{ An object of class 'xgb.cv.synchronous' with the following elements: @@ -185,7 +173,8 @@ cv <- xgb.cv( nfold = 5, metrics = list("rmse","auc"), max_depth = 3, - eta = 1,objective = "binary:logistic" + eta = 1, + objective = "binary:logistic" ) print(cv) print(cv, verbose = TRUE) diff --git a/R-package/man/xgb.get.DMatrix.qcut.Rd b/R-package/man/xgb.get.DMatrix.qcut.Rd index daa8edf71093..d9d21b1912a8 100644 --- a/R-package/man/xgb.get.DMatrix.qcut.Rd +++ b/R-package/man/xgb.get.DMatrix.qcut.Rd @@ -44,7 +44,7 @@ dm <- xgb.DMatrix(x, label = y, nthread = 1) # DMatrix is not quantized right away, but will be once a hist model is generated model <- xgb.train( data = dm, - params = list(tree_method = "hist", max_bin = 8, nthread = 1), + params = xgb.params(tree_method = "hist", max_bin = 8, nthread = 1), nrounds = 3 ) diff --git a/R-package/man/xgb.get.num.boosted.rounds.Rd b/R-package/man/xgb.get.num.boosted.rounds.Rd index ba1c5e11a96b..039c0fe5c1c7 100644 --- a/R-package/man/xgb.get.num.boosted.rounds.Rd +++ b/R-package/man/xgb.get.num.boosted.rounds.Rd @@ -20,6 +20,6 @@ Get number of boosting in a fitted booster } \details{ Note that setting booster parameters related to training -continuation / updates through \code{\link[=xgb.parameters<-]{xgb.parameters<-()}} will reset the +continuation / updates through \code{\link[=xgb.model.parameters<-]{xgb.model.parameters<-()}} will reset the number of rounds to zero. } diff --git a/R-package/man/xgb.is.same.Booster.Rd b/R-package/man/xgb.is.same.Booster.Rd index 4ef0182077ca..ff5b4b0c31ab 100644 --- a/R-package/man/xgb.is.same.Booster.Rd +++ b/R-package/man/xgb.is.same.Booster.Rd @@ -40,7 +40,7 @@ y <- mtcars$mpg x <- as.matrix(mtcars[, -1]) model <- xgb.train( - params = list(nthread = 1), + params = xgb.params(nthread = 1), data = xgb.DMatrix(x, label = y, nthread = 1), nround = 3 ) diff --git a/R-package/man/xgb.parameters.Rd b/R-package/man/xgb.model.parameters.Rd similarity index 90% rename from R-package/man/xgb.parameters.Rd rename to R-package/man/xgb.model.parameters.Rd index 65426792e0dd..33537c0514fa 100644 --- a/R-package/man/xgb.parameters.Rd +++ b/R-package/man/xgb.model.parameters.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/xgb.Booster.R -\name{xgb.parameters<-} -\alias{xgb.parameters<-} +\name{xgb.model.parameters<-} +\alias{xgb.model.parameters<-} \title{Accessors for model parameters} \usage{ -xgb.parameters(object) <- value +xgb.model.parameters(object) <- value } \arguments{ \item{object}{Object of class \code{xgb.Booster}. \strong{Will be modified in-place}.} @@ -43,6 +43,6 @@ bst <- xgb.train( objective = "binary:logistic" ) -xgb.parameters(bst) <- list(eta = 0.1) +xgb.model.parameters(bst) <- list(eta = 0.1) } diff --git a/R-package/man/xgb.params.Rd b/R-package/man/xgb.params.Rd new file mode 100644 index 000000000000..051fba6c8bd0 --- /dev/null +++ b/R-package/man/xgb.params.Rd @@ -0,0 +1,539 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xgb.train.R +\name{xgb.params} +\alias{xgb.params} +\title{XGBoost Parameters} +\usage{ +xgb.params( + objective = NULL, + verbosity = NULL, + nthread = NULL, + seed = NULL, + booster = NULL, + eta = NULL, + learning_rate = NULL, + gamma = NULL, + min_split_loss = NULL, + max_depth = NULL, + min_child_weight = NULL, + max_delta_step = NULL, + subsample = NULL, + sampling_method = NULL, + colsample_bytree = NULL, + colsample_bylevel = NULL, + colsample_bynode = NULL, + lambda = NULL, + reg_lambda = NULL, + alpha = NULL, + reg_alpha = NULL, + tree_method = NULL, + scale_pos_weight = NULL, + updater = NULL, + refresh_leaf = NULL, + grow_policy = NULL, + max_leaves = NULL, + max_bin = NULL, + num_parallel_tree = NULL, + monotone_constraints = NULL, + interaction_constraints = NULL, + multi_strategy = NULL, + base_score = NULL, + eval_metric = NULL, + seed_per_iteration = NULL, + device = NULL, + disable_default_eval_metric = NULL, + use_rmm = NULL, + max_cached_hist_node = NULL, + extmem_single_page = NULL, + max_cat_to_onehot = NULL, + max_cat_threshold = NULL, + sample_type = NULL, + normalize_type = NULL, + rate_drop = NULL, + one_drop = NULL, + skip_drop = NULL, + feature_selector = NULL, + top_k = NULL, + num_class = NULL, + tweedie_variance_power = NULL, + huber_slope = NULL, + quantile_alpha = NULL, + aft_loss_distribution = NULL, + lambdarank_pair_method = NULL, + lambdarank_num_pair_per_sample = NULL, + lambdarank_normalization = NULL, + lambdarank_unbiased = NULL, + lambdarank_bias_norm = NULL, + ndcg_exp_gain = NULL +) +} +\arguments{ +\item{objective}{(default=\code{"reg:squarederror"}) +Specify the learning task and the corresponding learning objective or a custom objective function to be used. + +For custom objective, see \href{https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html}{Custom Objective and Evaluation Metric} +and \href{https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html#custom-obj-metric}{Custom objective and metric} for more information, +along with the end note for function signatures. + +Supported values are: +\itemize{ +\item \code{"reg:squarederror"}: regression with squared loss. +\item \code{"reg:squaredlogerror"}: regression with squared log loss \eqn{\frac{1}{2}[log(pred + 1) - log(label + 1)]^2}. All input labels are required to be greater than -1. Also, see metric \code{rmsle} for possible issue with this objective. +\item \code{"reg:logistic"}: logistic regression, output probability +\item \code{"reg:pseudohubererror"}: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss. +\item \code{"reg:absoluteerror"}: Regression with L1 error. When tree model is used, leaf value is refreshed after tree construction. If used in distributed training, the leaf value is calculated as the mean value from all workers, which is not guaranteed to be optimal. + +Version added: 1.7.0 +\item \code{"reg:quantileerror"}: Quantile loss, also known as "pinball loss". See later sections for its parameter and \href{https://xgboost.readthedocs.io/en/latest/python/examples/quantile_regression.html#sphx-glr-python-examples-quantile-regression-py}{Quantile Regression} for a worked example. + +Version added: 2.0.0 +\item \code{"binary:logistic"}: logistic regression for binary classification, output probability +\item \code{"binary:logitraw"}: logistic regression for binary classification, output score before logistic transformation +\item \code{"binary:hinge"}: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities. +\item \code{"count:poisson"}: Poisson regression for count data, output mean of Poisson distribution. #' \code{"max_delta_step"} is set to 0.7 by default in Poisson regression (used to safeguard optimization) +\item \code{"survival:cox"}: Cox regression for right censored survival time data (negative values are considered right censored). + +Note that predictions are returned on the hazard ratio scale (i.e., as HR = exp(marginal_prediction) in the proportional hazard function \code{h(t) = h0(t) * HR}). +\item \code{"survival:aft"}: Accelerated failure time model for censored survival time data. +See \href{https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html}{Survival Analysis with Accelerated Failure Time} for details. +\item \code{"multi:softmax"}: set XGBoost to do multiclass classification using the softmax objective, you also need to set num_class(number of classes) +\item \code{"multi:softprob"}: same as softmax, but output a vector of \code{ndata * nclass}, which can be further reshaped to \code{ndata * nclass} matrix. The result contains predicted probability of each data point belonging to each class. +\item \code{"rank:ndcg"}: Use LambdaMART to perform pair-wise ranking where \href{http://en.wikipedia.org/wiki/NDCG}{Normalized Discounted Cumulative Gain (NDCG)} is maximized. This objective supports position debiasing for click data. +\item \code{"rank:map"}: Use LambdaMART to perform pair-wise ranking where \href{http://en.wikipedia.org/wiki/Mean_average_precision#Mean_average_precision}{Mean Average Precision (MAP)} is maximized +\item \code{"rank:pairwise"}: Use LambdaRank to perform pair-wise ranking using the \code{ranknet} objective. +\item \code{"reg:gamma"}: gamma regression with log-link. Output is a mean of gamma distribution. It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be \href{https://en.wikipedia.org/wiki/Gamma_distribution#Occurrence_and_applications}{gamma-distributed}. +\item \code{"reg:tweedie"}: Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be \href{https://en.wikipedia.org/wiki/Tweedie_distribution#Occurrence_and_applications}{Tweedie-distributed}. +}} + +\item{verbosity}{(default=1) +Verbosity of printing messages. Valid values are 0 (silent), 1 (warning), 2 (info), 3 +(debug). Sometimes XGBoost tries to change configurations based on heuristics, which +is displayed as warning message. If there's unexpected behaviour, please try to +increase value of verbosity.} + +\item{nthread}{(default to maximum number of threads available if not set) +Number of parallel threads used to run XGBoost. When choosing it, please keep thread +contention and hyperthreading in mind.} + +\item{seed}{Random number seed. If not specified, will take a random seed through R's own RNG engine.} + +\item{booster}{(default= \code{"gbtree"}) +Which booster to use. Can be \code{"gbtree"}, \code{"gblinear"} or \code{"dart"}; \code{"gbtree"} and \code{"dart"} use tree based models while \code{"gblinear"} uses linear functions.} + +\item{eta, learning_rate}{(two aliases for the same parameter) (for Tree Booster) (default=0.3) +Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and \code{eta} shrinks the feature weights to make the boosting process more conservative. + +range: \eqn{[0,1]} + +Note: should only pass one of \code{eta} or \code{learning_rate}. Both refer to the same parameter and there's thus no difference between one or the other.} + +\item{gamma, min_split_loss}{(two aliases for the same parameter) (for Tree Booster) (default=0, alias: \code{gamma}) +Minimum loss reduction required to make a further partition on a leaf node of the tree. The larger \code{min_split_loss} is, the more conservative the algorithm will be. Note that a tree where no splits were made might still contain a single terminal node with a non-zero score. + +range: \eqn{[0, \infty)} + +Note: should only pass one of \code{gamma} or \code{min_split_loss}. Both refer to the same parameter and there's thus no difference between one or the other.} + +\item{max_depth}{(for Tree Booster) (default=6) +Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit. 0 indicates no limit on depth. Beware that XGBoost aggressively consumes memory when training a deep tree. \code{"exact"} tree method requires non-zero value. + +range: \eqn{[0, \infty)}} + +\item{min_child_weight}{(for Tree Booster) (default=1) +Minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than \code{min_child_weight}, then the building process will give up further partitioning. In linear regression task, this simply corresponds to minimum number of instances needed to be in each node. The larger \code{min_child_weight} is, the more conservative the algorithm will be. + +range: \eqn{[0, \infty)}} + +\item{max_delta_step}{(for Tree Booster) (default=0) +Maximum delta step we allow each leaf output to be. If the value is set to 0, it means there is no constraint. If it is set to a positive value, it can help making the update step more conservative. Usually this parameter is not needed, but it might help in logistic regression when class is extremely imbalanced. Set it to value of 1-10 might help control the update. + +range: \eqn{[0, \infty)}} + +\item{subsample}{(for Tree Booster) (default=1) +Subsample ratio of the training instances. Setting it to 0.5 means that XGBoost would randomly sample half of the training data prior to growing trees. and this will prevent overfitting. Subsampling will occur once in every boosting iteration. + +range: \eqn{(0,1]}} + +\item{sampling_method}{(for Tree Booster) (default= \code{"uniform"}) +The method to use to sample the training instances. +\itemize{ +\item \code{"uniform"}: each training instance has an equal probability of being selected. Typically set +\code{"subsample"} >= 0.5 for good results. +\item \code{"gradient_based"}: the selection probability for each training instance is proportional to the +\bold{regularized absolute value} of gradients (more specifically, \eqn{\sqrt{g^2+\lambda h^2}}). +\code{"subsample"} may be set to as low as 0.1 without loss of model accuracy. Note that this +sampling method is only supported when \code{"tree_method"} is set to \code{"hist"} and the device is \code{"cuda"}; other tree +methods only support \code{"uniform"} sampling. +}} + +\item{colsample_bytree, colsample_bylevel, colsample_bynode}{(for Tree Booster) (default=1) +This is a family of parameters for subsampling of columns. +\itemize{ +\item All \code{"colsample_by*"} parameters have a range of \eqn{(0, 1]}, the default value of 1, and specify the fraction of columns to be subsampled. +\item \code{"colsample_bytree"} is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed. +\item \code{"colsample_bylevel"} is the subsample ratio of columns for each level. Subsampling occurs once for every new depth level reached in a tree. Columns are subsampled from the set of columns chosen for the current tree. +\item \code{"colsample_bynode"} is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level. This is not supported by the exact tree method. +\item \code{"colsample_by*"} parameters work cumulatively. For instance, +the combination \verb{\{'colsample_bytree'=0.5, 'colsample_bylevel'=0.5, 'colsample_bynode'=0.5\}} with 64 features will leave 8 features to choose from at +each split. +} + +One can set the \code{"feature_weights"} for DMatrix to +define the probability of each feature being selected when using column sampling.} + +\item{lambda, reg_lambda}{(two aliases for the same parameter) +\itemize{ +\item For tree-based boosters: +\itemize{ +\item L2 regularization term on weights. Increasing this value will make model more conservative. +\item default: 1 +\item range: \eqn{[0, \infty]} +} +\item For linear booster: +\itemize{ +\item L2 regularization term on weights. Increasing this value will make model more conservative. Normalised to number of training examples. +\item default: 0 +\item range: \eqn{[0, \infty)} +} +} + +Note: should only pass one of \code{lambda} or \code{reg_lambda}. Both refer to the same parameter and there's thus no difference between one or the other.} + +\item{alpha, reg_alpha}{(two aliases for the same parameter) +\itemize{ +\item L1 regularization term on weights. Increasing this value will make model more conservative. +\item For the linear booster, it's normalised to number of training examples. +\item default: 0 +\item range: \eqn{[0, \infty)} +} + +Note: should only pass one of \code{alpha} or \code{reg_alpha}. Both refer to the same parameter and there's thus no difference between one or the other.} + +\item{tree_method}{(for Tree Booster) (default= \code{"auto"}) +The tree construction algorithm used in XGBoost. See description in the \href{http://arxiv.org/abs/1603.02754}{reference paper} and \href{https://xgboost.readthedocs.io/en/latest/treemethod.html}{Tree Methods}. + +Choices: \code{"auto"}, \code{"exact"}, \code{"approx"}, \code{"hist"}, this is a combination of commonly +used updaters. For other updaters like \code{"refresh"}, set the parameter \code{updater} +directly. +\itemize{ +\item \code{"auto"}: Same as the \code{"hist"} tree method. +\item \code{"exact"}: Exact greedy algorithm. Enumerates all split candidates. +\item \code{"approx"}: Approximate greedy algorithm using quantile sketch and gradient histogram. +\item \code{"hist"}: Faster histogram optimized approximate greedy algorithm. +}} + +\item{scale_pos_weight}{(for Tree Booster) (default=1) +Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: \verb{sum(negative instances) / sum(positive instances)}. See \href{https://xgboost.readthedocs.io/en/latest/tutorials/param_tuning.html}{Parameters Tuning} for more discussion. Also, see Higgs Kaggle competition demo for examples: \href{https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-train.R}{R}, \href{https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-numpy.py}{py1}, \href{https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-cv.py}{py2}, \href{https://github.com/dmlc/xgboost/blob/master/demo/guide-python/cross_validation.py}{py3}.} + +\item{updater}{Has different meanings depending on the type of booster. +\itemize{ +\item For tree-based boosters: +A comma separated string defining the sequence of tree updaters to run, providing a modular way to construct and to modify the trees. This is an advanced parameter that is usually set automatically, depending on some other parameters. However, it could be also set explicitly by a user. The following updaters exist: +\itemize{ +\item \code{"grow_colmaker"}: non-distributed column-based construction of trees. +\item \code{"grow_histmaker"}: distributed tree construction with row-based data splitting based on global proposal of histogram counting. +\item \code{"grow_quantile_histmaker"}: Grow tree using quantized histogram. +\item \code{"grow_gpu_hist"}: Enabled when \code{tree_method} is set to \code{"hist"} along with \code{device="cuda"}. +\item \code{"grow_gpu_approx"}: Enabled when \code{tree_method} is set to \code{"approx"} along with \code{device="cuda"}. +\item \code{"sync"}: synchronizes trees in all distributed nodes. +\item \code{"refresh"}: refreshes tree's statistics and/or leaf values based on the current data. Note that no random subsampling of data rows is performed. +\item \code{"prune"}: prunes the splits where loss < \code{min_split_loss} (or \code{gamma}) and nodes that have depth greater than \code{max_depth}. +} +\item For \code{booster="gblinear"}: +(default= \code{"shotgun"}) Choice of algorithm to fit linear model +\itemize{ +\item \code{"shotgun"}: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run. +\item \code{"coord_descent"}: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. When the \code{device} parameter is set to \code{"cuda"} or \code{"gpu"}, a GPU variant would be used. +} +}} + +\item{refresh_leaf}{(for Tree Booster) (default=1) +This is a parameter of the \code{"refresh"} updater. When this flag is 1, tree leafs as well as tree nodes' stats are updated. When it is 0, only node stats are updated.} + +\item{grow_policy}{(for Tree Booster) (default= \code{"depthwise"}) +\itemize{ +\item Controls a way new nodes are added to the tree. +\item Currently supported only if \code{tree_method} is set to \code{"hist"} or \code{"approx"}. +\item Choices: \code{"depthwise"}, \code{"lossguide"} +\itemize{ +\item \code{"depthwise"}: split at nodes closest to the root. +\item \code{"lossguide"}: split at nodes with highest loss change. +} +}} + +\item{max_leaves}{(for Tree Booster) (default=0) +Maximum number of nodes to be added. Not used by \code{"exact"} tree method.} + +\item{max_bin}{(for Tree Booster) (default=256) +\itemize{ +\item Only used if \code{tree_method} is set to \code{"hist"} or \code{"approx"}. +\item Maximum number of discrete bins to bucket continuous features. +\item Increasing this number improves the optimality of splits at the cost of higher computation time. +}} + +\item{num_parallel_tree}{(for Tree Booster) (default=1) +Number of parallel trees constructed during each iteration. This option is used to support boosted random forest.} + +\item{monotone_constraints}{(for Tree Booster) +Constraint of variable monotonicity. See \href{https://xgboost.readthedocs.io/en/latest/tutorials/monotonic.html}{Monotonic Constraints} for more information.} + +\item{interaction_constraints}{(for Tree Booster) +Constraints for interaction representing permitted interactions. The constraints must +be specified in the form of a nest list, e.g. \code{list(c(0, 1), c(2, 3, 4))}, where each inner +list is a group of indices of features (base-0 numeration) that are allowed to interact with each other. +See \href{https://xgboost.readthedocs.io/en/latest/tutorials/feature_interaction_constraint.html}{Feature Interaction Constraints} for more information.} + +\item{multi_strategy}{(for Tree Booster) (default = \code{"one_output_per_tree"}) +The strategy used for training multi-target models, including multi-target regression +and multi-class classification. See \href{https://xgboost.readthedocs.io/en/latest/tutorials/multioutput.html}{Multiple Outputs} for more information. +\itemize{ +\item \code{"one_output_per_tree"}: One model for each target. +\item \code{"multi_output_tree"}: Use multi-target trees. +} + +Version added: 2.0.0 + +Note: This parameter is working-in-progress.} + +\item{base_score}{\itemize{ +\item The initial prediction score of all instances, global bias +\item The parameter is automatically estimated for selected objectives before training. To +disable the estimation, specify a real number argument. +\item If \code{base_margin} is supplied, \code{base_score} will not be added. +\item For sufficient number of iterations, changing this value will not have too much effect. +}} + +\item{eval_metric}{(default according to objective) +\itemize{ +\item Evaluation metrics for validation data, a default metric will be assigned according to objective (rmse for regression, and logloss for classification, \verb{mean average precision} for \code{rank:map}, etc.) +\item User can add multiple evaluation metrics. +\item The choices are listed below: +\itemize{ +\item \code{"rmse"}: \href{http://en.wikipedia.org/wiki/Root_mean_square_error}{root mean square error} +\item \code{"rmsle"}: root mean square log error: \eqn{\sqrt{\frac{1}{N}[log(pred + 1) - log(label + 1)]^2}}. Default metric of \code{"reg:squaredlogerror"} objective. This metric reduces errors generated by outliers in dataset. But because \code{log} function is employed, \code{"rmsle"} might output \code{nan} when prediction value is less than -1. See \code{"reg:squaredlogerror"} for other requirements. +\item \code{"mae"}: \href{https://en.wikipedia.org/wiki/Mean_absolute_error}{mean absolute error} +\item \code{"mape"}: \href{https://en.wikipedia.org/wiki/Mean_absolute_percentage_error}{mean absolute percentage error} +\item \code{"mphe"}: \href{https://en.wikipedia.org/wiki/Huber_loss}{mean Pseudo Huber error}. Default metric of \code{"reg:pseudohubererror"} objective. +\item \code{"logloss"}: \href{http://en.wikipedia.org/wiki/Log-likelihood}{negative log-likelihood} +\item \code{"error"}: Binary classification error rate. It is calculated as \verb{#(wrong cases)/#(all cases)}. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances. +\item \code{"error@t"}: a different than 0.5 binary classification threshold value could be specified by providing a numerical value through 't'. +\item \code{"merror"}: Multiclass classification error rate. It is calculated as \verb{#(wrong cases)/#(all cases)}. +\item \code{"mlogloss"}: \href{http://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}{Multiclass logloss}. +\item \code{"auc"}: \href{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve}{Receiver Operating Characteristic Area under the Curve}. +Available for classification and learning-to-rank tasks. +\itemize{ +\item When used with binary classification, the objective should be \code{"binary:logistic"} or similar functions that work on probability. +\item When used with multi-class classification, objective should be \code{"multi:softprob"} instead of \code{"multi:softmax"}, as the latter doesn't output probability. Also the AUC is calculated by 1-vs-rest with reference class weighted by class prevalence. +\item When used with LTR task, the AUC is computed by comparing pairs of documents to count correctly sorted pairs. This corresponds to pairwise learning to rank. The implementation has some issues with average AUC around groups and distributed workers not being well-defined. +\item On a single machine the AUC calculation is exact. In a distributed environment the AUC is a weighted average over the AUC of training rows on each node - therefore, distributed AUC is an approximation sensitive to the distribution of data across workers. Use another metric in distributed environments if precision and reproducibility are important. +\item When input dataset contains only negative or positive samples, the output is \code{NaN}. The behavior is implementation defined, for instance, \code{scikit-learn} returns \eqn{0.5} instead. +} +\item \code{"aucpr"}: \href{https://en.wikipedia.org/wiki/Precision_and_recall}{Area under the PR curve}. +Available for classification and learning-to-rank tasks. + +After XGBoost 1.6, both of the requirements and restrictions for using \code{"aucpr"} in classification problem are similar to \code{"auc"}. For ranking task, only binary relevance label \eqn{y \in [0, 1]} is supported. Different from \code{"map"} (mean average precision), \code{"aucpr"} calculates the \emph{interpolated} area under precision recall curve using continuous interpolation. +\item \code{"pre"}: Precision at \eqn{k}. Supports only learning to rank task. +\item \code{"ndcg"}: \href{http://en.wikipedia.org/wiki/NDCG}{Normalized Discounted Cumulative Gain} +\item \code{"map"}: \href{http://en.wikipedia.org/wiki/Mean_average_precision#Mean_average_precision}{Mean Average Precision} + +The \verb{average precision} is defined as: + +\eqn{AP@l = \frac{1}{min{(l, N)}}\sum^l_{k=1}P@k \cdot I_{(k)}} + +where \eqn{I_{(k)}} is an indicator function that equals to \eqn{1} when the document at \eqn{k} is relevant and \eqn{0} otherwise. The \eqn{P@k} is the precision at \eqn{k}, and \eqn{N} is the total number of relevant documents. Lastly, the \verb{mean average precision} is defined as the weighted average across all queries. +\item \code{"ndcg@n"}, \code{"map@n"}, \code{"pre@n"}: \eqn{n} can be assigned as an integer to cut off the top positions in the lists for evaluation. +\item \code{"ndcg-"}, \code{"map-"}, \code{"ndcg@n-"}, \code{"map@n-"}: In XGBoost, the NDCG and MAP evaluate the score of a list without any positive samples as \eqn{1}. By appending "-" to the evaluation metric name, we can ask XGBoost to evaluate these scores as \eqn{0} to be consistent under some conditions. +\item \code{"poisson-nloglik"}: negative log-likelihood for Poisson regression +\item \code{"gamma-nloglik"}: negative log-likelihood for gamma regression +\item \code{"cox-nloglik"}: negative partial log-likelihood for Cox proportional hazards regression +\item \code{"gamma-deviance"}: residual deviance for gamma regression +\item \code{"tweedie-nloglik"}: negative log-likelihood for Tweedie regression (at a specified value of the \code{tweedie_variance_power} parameter) +\item \code{"aft-nloglik"}: Negative log likelihood of Accelerated Failure Time model. +See \href{https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html}{Survival Analysis with Accelerated Failure Time} for details. +\item \code{"interval-regression-accuracy"}: Fraction of data points whose predicted labels fall in the interval-censored labels. +Only applicable for interval-censored data. See \href{https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html}{Survival Analysis with Accelerated Failure Time} for details. +} +}} + +\item{seed_per_iteration}{(default= \code{FALSE}) +Seed PRNG determnisticly via iterator number.} + +\item{device}{(default= \code{"cpu"}) +Device for XGBoost to run. User can set it to one of the following values: +\itemize{ +\item \code{"cpu"}: Use CPU. +\item \code{"cuda"}: Use a GPU (CUDA device). +\item \code{"cuda:"}: \verb{} is an integer that specifies the ordinal of the GPU (which GPU do you want to use if you have more than one devices). +\item \code{"gpu"}: Default GPU device selection from the list of available and supported devices. Only \code{"cuda"} devices are supported currently. +\item \code{"gpu:"}: Default GPU device selection from the list of available and supported devices. Only \code{"cuda"} devices are supported currently. +} + +For more information about GPU acceleration, see \href{https://xgboost.readthedocs.io/en/latest/gpu/index.html}{XGBoost GPU Support}. In distributed environments, ordinal selection is handled by distributed frameworks instead of XGBoost. As a result, using \code{"cuda:"} will result in an error. Use \code{"cuda"} instead. + +Version added: 2.0.0 + +Note: if XGBoost was installed from CRAN, it won't have GPU support enabled, thus only \code{"cpu"} will be available. +To get GPU support, the R package for XGBoost must be installed from source or from the GitHub releases - see +\href{https://xgboost.readthedocs.io/en/latest/install.html#r}{instructions}.} + +\item{disable_default_eval_metric}{(default= \code{FALSE}) +Flag to disable default metric. Set to 1 or \code{TRUE} to disable.} + +\item{use_rmm}{Whether to use RAPIDS Memory Manager (RMM) to allocate cache GPU +memory. The primary memory is always allocated on the RMM pool when XGBoost is built +(compiled) with the RMM plugin enabled. Valid values are \code{TRUE} and \code{FALSE}. See +\href{https://xgboost.readthedocs.io/en/latest/python/rmm-examples/index.html}{Using XGBoost with RAPIDS Memory Manager (RMM) plugin} for details.} + +\item{max_cached_hist_node}{(for Non-Exact Tree Methods) (default = 65536) +Maximum number of cached nodes for histogram. This can be used with the \code{"hist"} and the +\code{"approx"} tree methods. + +Version added: 2.0.0 +\itemize{ +\item For most of the cases this parameter should not be set except for growing deep +trees. After 3.0, this parameter affects GPU algorithms as well. +}} + +\item{extmem_single_page}{(for Non-Exact Tree Methods) (default = \code{FALSE}) +This parameter is only used for the \code{"hist"} tree method with \code{device="cuda"} and +\code{subsample != 1.0}. Before 3.0, pages were always concatenated. + +Version added: 3.0.0 + +Whether the GPU-based \code{"hist"} tree method should concatenate the training data into a +single batch instead of fetching data on-demand when external memory is used. For GPU +devices that don't support address translation services, external memory training is +expensive. This parameter can be used in combination with subsampling to reduce overall +memory usage without significant overhead. See \href{https://xgboost.readthedocs.io/en/latest/tutorials/external_memory.html}{Using XGBoost External Memory Version} for +more information.} + +\item{max_cat_to_onehot}{(for Non-Exact Tree Methods) +A threshold for deciding whether XGBoost should use one-hot encoding based split for +categorical data. When number of categories is lesser than the threshold then one-hot +encoding is chosen, otherwise the categories will be partitioned into children nodes. + +Version added: 1.6.0} + +\item{max_cat_threshold}{(for Non-Exact Tree Methods) +Maximum number of categories considered for each split. Used only by partition-based +splits for preventing over-fitting. + +Version added: 1.7.0} + +\item{sample_type}{(for Dart Booster) (default= \code{"uniform"}) +Type of sampling algorithm. +\itemize{ +\item \code{"uniform"}: dropped trees are selected uniformly. +\item \code{"weighted"}: dropped trees are selected in proportion to weight. +}} + +\item{normalize_type}{(for Dart Booster) (default= \code{"tree"}) +Type of normalization algorithm. +\itemize{ +\item \code{"tree"}: new trees have the same weight of each of dropped trees. +\itemize{ +\item Weight of new trees are \code{1 / (k + learning_rate)}. +\item Dropped trees are scaled by a factor of \code{k / (k + learning_rate)}. +} +\item \code{"forest"}: new trees have the same weight of sum of dropped trees (forest). +\itemize{ +\item Weight of new trees are \code{1 / (1 + learning_rate)}. +\item Dropped trees are scaled by a factor of \code{1 / (1 + learning_rate)}. +} +}} + +\item{rate_drop}{(for Dart Booster) (default=0.0) +Dropout rate (a fraction of previous trees to drop during the dropout). + +range: \eqn{[0.0, 1.0]}} + +\item{one_drop}{(for Dart Booster) (default=0) +When this flag is enabled, at least one tree is always dropped during the dropout (allows Binomial-plus-one or epsilon-dropout from the original DART paper).} + +\item{skip_drop}{(for Dart Booster) (default=0.0) +Probability of skipping the dropout procedure during a boosting iteration. +\itemize{ +\item If a dropout is skipped, new trees are added in the same manner as \code{"gbtree"}. +\item Note that non-zero \code{skip_drop} has higher priority than \code{rate_drop} or \code{one_drop}. +} + +range: \eqn{[0.0, 1.0]}} + +\item{feature_selector}{(for Linear Booster) (default= \code{"cyclic"}) +Feature selection and ordering method +\itemize{ +\item \code{"cyclic"}: Deterministic selection by cycling through features one at a time. +\item \code{"shuffle"}: Similar to \code{"cyclic"} but with random feature shuffling prior to each update. +\item \code{"random"}: A random (with replacement) coordinate selector. +\item \code{"greedy"}: Select coordinate with the greatest gradient magnitude. It has \code{O(num_feature^2)} complexity. It is fully deterministic. It allows restricting the selection to \code{top_k} features per group with the largest magnitude of univariate weight change, by setting the \code{top_k} parameter. Doing so would reduce the complexity to \code{O(num_feature*top_k)}. +\item \code{"thrifty"}: Thrifty, approximately-greedy feature selector. Prior to cyclic updates, reorders features in descending magnitude of their univariate weight changes. This operation is multithreaded and is a linear complexity approximation of the quadratic greedy selection. It allows restricting the selection to \code{top_k} features per group with the largest magnitude of univariate weight change, by setting the \code{top_k} parameter. +}} + +\item{top_k}{(for Linear Booster) (default=0) +The number of top features to select in \code{greedy} and \code{thrifty} feature selector. The value of 0 means using all the features.} + +\item{num_class}{Number of classes when using multi-class classification objectives (e.g. \code{objective="multi:softprob"})} + +\item{tweedie_variance_power}{(for Tweedie Regression (\code{"objective=reg:tweedie"})) (default=1.5) +\itemize{ +\item Parameter that controls the variance of the Tweedie distribution \code{var(y) ~ E(y)^tweedie_variance_power} +\item range: \eqn{(1,2)} +\item Set closer to 2 to shift towards a gamma distribution +\item Set closer to 1 to shift towards a Poisson distribution. +}} + +\item{huber_slope}{(for using Pseudo-Huber (\verb{"reg:pseudohubererror}")) (default = 1.0) +A parameter used for Pseudo-Huber loss to define the \eqn{\delta} term.} + +\item{quantile_alpha}{(for using Quantile Loss (\code{"reg:quantileerror"})) +A scalar or a list of targeted quantiles (passed as a numeric vector). + +Version added: 2.0.0} + +\item{aft_loss_distribution}{(for using AFT Survival Loss (\code{"survival:aft"}) and Negative Log Likelihood of AFT metric (\code{"aft-nloglik"})) +Probability Density Function, \code{"normal"}, \code{"logistic"}, or \code{"extreme"}.} + +\item{lambdarank_pair_method}{(for learning to rank (\code{"rank:ndcg"}, \code{"rank:map"}, \code{"rank:pairwise"})) (default = \code{"topk"}) +How to construct pairs for pair-wise learning. +\itemize{ +\item \code{"mean"}: Sample \code{lambdarank_num_pair_per_sample} pairs for each document in the query list. +\item \code{"topk"}: Focus on top-\code{lambdarank_num_pair_per_sample} documents. Construct \eqn{|query|} pairs for each document at the top-\code{lambdarank_num_pair_per_sample} ranked by the model. +}} + +\item{lambdarank_num_pair_per_sample}{(for learning to rank (\code{"rank:ndcg"}, \code{"rank:map"}, \code{"rank:pairwise"})) +It specifies the number of pairs sampled for each document when pair method is \code{"mean"}, or the truncation level for queries when the pair method is \code{"topk"}. For example, to train with \verb{ndcg@6}, set \code{"lambdarank_num_pair_per_sample"} to \eqn{6} and \code{lambdarank_pair_method} to \code{"topk"}. + +range = \eqn{[1, \infty)}} + +\item{lambdarank_normalization}{(for learning to rank (\code{"rank:ndcg"}, \code{"rank:map"}, \code{"rank:pairwise"})) (default = \code{TRUE}) +Whether to normalize the leaf value by lambda gradient. This can sometimes stagnate the training progress. + +Version added: 2.1.0} + +\item{lambdarank_unbiased}{(for learning to rank (\code{"rank:ndcg"}, \code{"rank:map"}, \code{"rank:pairwise"})) (default = \code{FALSE}) +Specify whether do we need to debias input click data.} + +\item{lambdarank_bias_norm}{(for learning to rank (\code{"rank:ndcg"}, \code{"rank:map"}, \code{"rank:pairwise"})) (default = 2.0) +\eqn{L_p} normalization for position debiasing, default is \eqn{L_2}. Only relevant when \code{lambdarank_unbiased} is set to \code{TRUE}.} + +\item{ndcg_exp_gain}{(for learning to rank (\code{"rank:ndcg"}, \code{"rank:map"}, \code{"rank:pairwise"})) (default = \code{TRUE}) +Whether we should use exponential gain function for \code{NDCG}. There are two forms of gain function for \code{NDCG}, one is using relevance value directly while the other is using\eqn{2^{rel} - 1} to emphasize on retrieving relevant documents. When \code{ndcg_exp_gain} is \code{TRUE} (the default), relevance degree cannot be greater than 31.} +} +\value{ +A list with the entries that were passed non-NULL values. It is intended to +be passed as argument \code{params} to \code{\link[=xgb.train]{xgb.train()}} or \code{\link[=xgb.cv]{xgb.cv()}}. +} +\description{ +Convenience function to generate a list of named XGBoost parameters, which +can be passed as argument \code{params} to \code{\link[=xgb.train]{xgb.train()}}. See the \href{https://xgboost.readthedocs.io/en/stable/parameter.html}{online documentation} for more details. + +The purpose of this function is to enable IDE autocompletions and to provide in-package +documentation for all the possible parameters that XGBoost accepts. The output from this +function is just a regular R list containing the parameters that were set to non-default +values. Note that this function will not perform any validation on the supplied arguments. + +If passing \code{NULL} for a given parameter (the default for all of them), then the default +value for that parameter will be used. Default values are automatically determined by the +XGBoost core library upon calls to \code{\link[=xgb.train]{xgb.train()}} or \code{\link[=xgb.cv]{xgb.cv()}}, and are subject to change +over XGBoost library versions. +} diff --git a/R-package/man/xgb.slice.Booster.Rd b/R-package/man/xgb.slice.Booster.Rd index d245ced1bccf..294a51b5aa32 100644 --- a/R-package/man/xgb.slice.Booster.Rd +++ b/R-package/man/xgb.slice.Booster.Rd @@ -47,7 +47,7 @@ y <- mtcars$mpg x <- as.matrix(mtcars[, -1]) dm <- xgb.DMatrix(x, label = y, nthread = 1) -model <- xgb.train(data = dm, params = list(nthread = 1), nrounds = 5) +model <- xgb.train(data = dm, params = xgb.params(nthread = 1), nrounds = 5) model_slice <- xgb.slice.Booster(model, 1, 3) # Prediction for first three rounds predict(model, x, predleaf = TRUE)[, 1:3] diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd index be4290d9806d..c46ebe8fb9f6 100644 --- a/R-package/man/xgb.train.Rd +++ b/R-package/man/xgb.train.Rd @@ -5,7 +5,7 @@ \title{eXtreme Gradient Boosting Training} \usage{ xgb.train( - params = list(), + params = xgb.params(), data, nrounds, evals = list(), @@ -23,124 +23,21 @@ xgb.train( ) } \arguments{ -\item{params}{the list of parameters. The complete list of parameters is -available in the \href{http://xgboost.readthedocs.io/en/latest/parameter.html}{online documentation}. -Below is a shorter summary: +\item{params}{List of XGBoost parameters which control the model building process. +See the \href{http://xgboost.readthedocs.io/en/latest/parameter.html}{online documentation} +and the documentation for \code{\link[=xgb.params]{xgb.params()}} for details. -\strong{1. General Parameters} -\itemize{ -\item \code{booster}: Which booster to use, can be \code{gbtree} or \code{gblinear}. Default: \code{gbtree}. -} - -\strong{2. Booster Parameters} - -\strong{2.1. Parameters for Tree Booster} -\itemize{ -\item \code{eta}: The learning rate: scale the contribution of each tree by a factor of \verb{0 < eta < 1} -when it is added to the current approximation. -Used to prevent overfitting by making the boosting process more conservative. -Lower value for \code{eta} implies larger value for \code{nrounds}: low \code{eta} value means model -more robust to overfitting but slower to compute. Default: 0.3. -\item \code{gamma}: Minimum loss reduction required to make a further partition on a leaf node of the tree. -the larger, the more conservative the algorithm will be. -\item \code{max_depth}: Maximum depth of a tree. Default: 6. -\item \code{min_child_weight}: Minimum sum of instance weight (hessian) needed in a child. -If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, -then the building process will give up further partitioning. -In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. -The larger, the more conservative the algorithm will be. Default: 1. -\item \code{subsample}: Subsample ratio of the training instance. -Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees -and this will prevent overfitting. It makes computation shorter (because less data to analyse). -It is advised to use this parameter with \code{eta} and increase \code{nrounds}. Default: 1. -\item \code{colsample_bytree}: Subsample ratio of columns when constructing each tree. Default: 1. -\item \code{lambda}: L2 regularization term on weights. Default: 1. -\item \code{alpha}: L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0. -\item \code{num_parallel_tree}: Experimental parameter. number of trees to grow per round. -Useful to test Random Forest through XGBoost. -(set \code{colsample_bytree < 1}, \code{subsample < 1} and \code{round = 1}) accordingly. -Default: 1. -\item \code{monotone_constraints}: A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length -equals to the number of features in the training data. -\code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint. -\item \code{interaction_constraints}: A list of vectors specifying feature indices of permitted interactions. -Each item of the list represents one permitted interaction where specified features are allowed to interact with each other. -Feature index values should start from \code{0} (\code{0} references the first column). -Leave argument unspecified for no interaction constraints. -} +Should be passed as list with named entries. Parameters that are not specified in this +list will use their default values. Alternatively, parameters may be passed directly +as function arguments (accepted through \code{...}). -\strong{2.2. Parameters for Linear Booster} -\itemize{ -\item \code{lambda}: L2 regularization term on weights. Default: 0. -\item \code{lambda_bias}: L2 regularization term on bias. Default: 0. -\item \code{alpha}: L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0. -} - -\strong{3. Task Parameters} -\itemize{ -\item \code{objective}: Specifies the learning task and the corresponding learning objective. -users can pass a self-defined function to it. The default objective options are below: -\itemize{ -\item \code{reg:squarederror}: Regression with squared loss (default). -\item \code{reg:squaredlogerror}: Regression with squared log loss \eqn{1/2 \cdot (\log(pred + 1) - \log(label + 1))^2}. -All inputs are required to be greater than -1. -Also, see metric rmsle for possible issue with this objective. -\item \code{reg:logistic}: Logistic regression. -\item \code{reg:pseudohubererror}: Regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss. -\item \code{binary:logistic}: Logistic regression for binary classification. Output probability. -\item \code{binary:logitraw}: Logistic regression for binary classification, output score before logistic transformation. -\item \code{binary:hinge}: Hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities. -\item \code{count:poisson}: Poisson regression for count data, output mean of Poisson distribution. -The parameter \code{max_delta_step} is set to 0.7 by default in poisson regression -(used to safeguard optimization). -\item \code{survival:cox}: Cox regression for right censored survival time data (negative values are considered right censored). -Note that predictions are returned on the hazard ratio scale (i.e., as HR = exp(marginal_prediction) in the proportional -hazard function \eqn{h(t) = h_0(t) \cdot HR}. -\item \code{survival:aft}: Accelerated failure time model for censored survival time data. See -\href{https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html}{Survival Analysis with Accelerated Failure Time} -for details. -The parameter \code{aft_loss_distribution} specifies the Probability Density Function -used by \code{survival:aft} and the \code{aft-nloglik} metric. -\item \code{multi:softmax}: Set xgboost to do multiclass classification using the softmax objective. -Class is represented by a number and should be from 0 to \code{num_class - 1}. -\item \code{multi:softprob}: Same as softmax, but prediction outputs a vector of ndata * nclass elements, which can be -further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging -to each class. -\item \code{rank:pairwise}: Set XGBoost to do ranking task by minimizing the pairwise loss. -\item \code{rank:ndcg}: Use LambdaMART to perform list-wise ranking where -\href{https://en.wikipedia.org/wiki/Discounted_cumulative_gain}{Normalized Discounted Cumulative Gain (NDCG)} is maximized. -\item \code{rank:map}: Use LambdaMART to perform list-wise ranking where -\href{https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Mean_average_precision}{Mean Average Precision (MAP)} -is maximized. -\item \code{reg:gamma}: Gamma regression with log-link. Output is a mean of gamma distribution. -It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be -\href{https://en.wikipedia.org/wiki/Gamma_distribution#Applications}{gamma-distributed}. -\item \code{reg:tweedie}: Tweedie regression with log-link. -It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be -\href{https://en.wikipedia.org/wiki/Tweedie_distribution#Applications}{Tweedie-distributed}. -} - -For custom objectives, one should pass a function taking as input the current predictions (as a numeric -vector or matrix) and the training data (as an \code{xgb.DMatrix} object) that will return a list with elements -\code{grad} and \code{hess}, which should be numeric vectors or matrices with number of rows matching to the numbers -of rows in the training data (same shape as the predictions that are passed as input to the function). -For multi-valued custom objectives, should have shape \verb{[nrows, ntargets]}. Note that negative values of -the Hessian will be clipped, so one might consider using the expected Hessian (Fisher information) if the -objective is non-convex. - -See the tutorials \href{https://xgboost.readthedocs.io/en/stable/tutorials/custom_metric_obj.html}{Custom Objective and Evaluation Metric} -and \href{https://xgboost.readthedocs.io/en/latest/tutorials/advanced_custom_obj.html}{Advanced Usage of Custom Objectives} -for more information about custom objectives. -\item \code{base_score}: The initial prediction score of all instances, global bias. Default: 0.5. -\item \code{eval_metric}: Evaluation metrics for validation data. -Users can pass a self-defined function to it. -Default: metric will be assigned according to objective -(rmse for regression, and error for classification, mean average precision for ranking). -List is provided in detail section. -}} +A list of named parameters can be created through the function \code{\link[=xgb.params]{xgb.params()}}, which +accepts all valid parameters as function arguments.} \item{data}{Training dataset. \code{xgb.train()} accepts only an \code{xgb.DMatrix} as the input. -\code{\link[=xgboost]{xgboost()}}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or name of a local data file.} + +Note that there is a function \code{\link[=xgboost]{xgboost()}} which is meant to accept R data objects +as inputs, such as data frames and matrices.} \item{nrounds}{Max number of boosting iterations.} @@ -208,7 +105,7 @@ such as an evaluation log (a \code{data.table} object) - be aware that these obj as R attributes, and thus do not get saved when using XGBoost's own serializaters like \code{\link[=xgb.save]{xgb.save()}} (but are kept when using R serializers like \code{\link[=saveRDS]{saveRDS()}}).} -\item{...}{other parameters to pass to \code{params}.} +\item{...}{Other parameters to pass to \code{params}. See \code{\link[=xgb.params]{xgb.params()}} for more details.} } \value{ An object of class \code{xgb.Booster}. @@ -218,41 +115,18 @@ An object of class \code{xgb.Booster}. The \code{\link[=xgboost]{xgboost()}} function is a simpler wrapper for \code{xgb.train()}. } \details{ -These are the training functions for \code{\link[=xgboost]{xgboost()}}. - -The \code{xgb.train()} interface supports advanced features such as \code{evals}, -customized objective and evaluation metric functions, therefore it is more flexible -than the \code{\link[=xgboost]{xgboost()}} interface. +Compared to \code{\link[=xgboost]{xgboost()}}, the \code{xgb.train()} interface supports advanced features such as +\code{evals}, customized objective and evaluation metric functions, among others, with the +difference these work \code{xgb.DMatrix} objects and do not follow typical R idioms. Parallelization is automatically enabled if OpenMP is present. Number of threads can also be manually specified via the \code{nthread} parameter. -While in other interfaces, the default random seed defaults to zero, in R, if a parameter \code{seed} +While in XGBoost language bindings, the default random seed defaults to zero, in R, if a parameter \code{seed} is not manually supplied, it will generate a random seed through R's own random number generator, whose seed in turn is controllable through \code{set.seed}. If \code{seed} is passed, it will override the RNG from R. -The evaluation metric is chosen automatically by XGBoost (according to the objective) -when the \code{eval_metric} parameter is not provided. -User may set one or several \code{eval_metric} parameters. -Note that when using a customized metric, only this single metric can be used. -The following is the list of built-in metrics for which XGBoost provides optimized implementation: -\itemize{ -\item \code{rmse}: Root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error} -\item \code{logloss}: Negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood} -\item \code{mlogloss}: Multiclass logloss. \url{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html} -\item \code{error}: Binary classification error rate. It is calculated as \verb{(# wrong cases) / (# all cases)}. -By default, it uses the 0.5 threshold for predicted values to define negative and positive instances. -Different threshold (e.g., 0.) could be specified as \verb{error@0}. -\item \code{merror}: Multiclass classification error rate. It is calculated as \verb{(# wrong cases) / (# all cases)}. -\item \code{mae}: Mean absolute error. -\item \code{mape}: Mean absolute percentage error. -\item \code{auc}: Area under the curve. -\url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation. -\item \code{aucpr}: Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation. -\item \code{ndcg}: Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG} -} - The following callbacks are automatically created when certain parameters are set: \itemize{ \item \code{\link[=xgb.cb.print.evaluation]{xgb.cb.print.evaluation()}} is turned on when \code{verbose > 0} and the \code{print_every_n} @@ -275,7 +149,7 @@ Be aware that one such R attribute that is automatically added is \code{params} is assigned from the \code{params} argument to this function, and is only meant to serve as a reference for what went into the booster, but is not used in other methods that take a booster object - so for example, changing the booster's configuration requires calling \verb{xgb.config<-} -or \verb{xgb.parameters<-}, while simply modifying \verb{attributes(model)$params$<...>} will have no +or \verb{xgb.model.parameters<-}, while simply modifying \verb{attributes(model)$params$<...>} will have no effect elsewhere. } \examples{ @@ -349,7 +223,7 @@ bst <- xgb.train( ## An xgb.train example of using variable learning rates at each iteration: -param <- list( +param <- xgb.params( max_depth = 2, eta = 1, nthread = nthread, @@ -371,17 +245,6 @@ bst <- xgb.train( bst <- xgb.train( param, dtrain, nrounds = 25, evals = evals, early_stopping_rounds = 3 ) - -## An 'xgboost' interface example: -bst <- xgboost( - x = agaricus.train$data, - y = factor(agaricus.train$label), - params = list(max_depth = 2, eta = 1), - nthread = nthread, - nrounds = 2 -) -pred <- predict(bst, agaricus.test$data) - } \references{ Tianqi Chen and Carlos Guestrin, "XGBoost: A Scalable Tree Boosting System", diff --git a/R-package/man/xgboost.Rd b/R-package/man/xgboost.Rd index ab6c9ac1a8ef..81386825a6a3 100644 --- a/R-package/man/xgboost.Rd +++ b/R-package/man/xgboost.Rd @@ -66,7 +66,7 @@ set as the last level.} \item{objective}{Optimization objective to minimize based on the supplied data, to be passed by name as a string / character (e.g. \code{reg:absoluteerror}). See the \href{https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters}{Learning Task Parameters} -page for more detailed information on allowed values. +page and the \code{\link[=xgb.params]{xgb.params()}} documentation for more detailed information on allowed values. If \code{NULL} (the default), will be automatically determined from \code{y} according to the following logic: diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R index bf95a170dcfc..24df4794b30a 100644 --- a/R-package/tests/testthat/test_callbacks.R +++ b/R-package/tests/testthat/test_callbacks.R @@ -228,10 +228,10 @@ test_that("xgb.cb.save.model works as expected", { expect_true(file.exists(files[1])) expect_true(file.exists(files[2])) b1 <- xgb.load(files[1]) - xgb.parameters(b1) <- list(nthread = 2) + xgb.model.parameters(b1) <- list(nthread = 2) expect_equal(xgb.get.num.boosted.rounds(b1), 1) b2 <- xgb.load(files[2]) - xgb.parameters(b2) <- list(nthread = 2) + xgb.model.parameters(b2) <- list(nthread = 2) expect_equal(xgb.get.num.boosted.rounds(b2), 2) xgb.config(b2) <- xgb.config(bst) diff --git a/R-package/tests/testthat/test_model_compatibility.R b/R-package/tests/testthat/test_model_compatibility.R index 613ba066f459..9bab6e0c91a7 100644 --- a/R-package/tests/testthat/test_model_compatibility.R +++ b/R-package/tests/testthat/test_model_compatibility.R @@ -87,7 +87,7 @@ test_that("Models from previous versions of XGBoost can be loaded", { booster <- readRDS(model_file) } else { booster <- xgb.load(model_file) - xgb.parameters(booster) <- list(nthread = 2) + xgb.model.parameters(booster) <- list(nthread = 2) } predict(booster, newdata = pred_data) run_booster_check(booster, name) diff --git a/R-package/tests/testthat/test_unicode.R b/R-package/tests/testthat/test_unicode.R index 718d58109163..5c8acc1b0c92 100644 --- a/R-package/tests/testthat/test_unicode.R +++ b/R-package/tests/testthat/test_unicode.R @@ -16,7 +16,7 @@ test_that("Can save and load models with Unicode paths", { path <- file.path(tmpdir, x) xgb.save(bst, path) bst2 <- xgb.load(path) - xgb.parameters(bst2) <- list(nthread = 2) + xgb.model.parameters(bst2) <- list(nthread = 2) expect_equal(predict(bst, test$data), predict(bst2, test$data)) }) }) diff --git a/R-package/vignettes/xgboostPresentation.Rmd b/R-package/vignettes/xgboostPresentation.Rmd index 911234b3da70..6d800e63dcd5 100644 --- a/R-package/vignettes/xgboostPresentation.Rmd +++ b/R-package/vignettes/xgboostPresentation.Rmd @@ -475,7 +475,7 @@ An interesting test to see how identical our saved model is to the original one # can be modified like this: RhpcBLASctl::omp_set_num_threads(1) bst2 <- xgb.load(fname) -xgb.parameters(bst2) <- list(nthread = 2) +xgb.model.parameters(bst2) <- list(nthread = 2) pred2 <- predict(bst2, test$data) # And now the test @@ -500,7 +500,7 @@ print(class(rawVec)) # load binary model to R bst3 <- xgb.load.raw(rawVec) -xgb.parameters(bst3) <- list(nthread = 2) +xgb.model.parameters(bst3) <- list(nthread = 2) pred3 <- predict(bst3, test$data) # pred2 should be identical to pred