From 805cde3a95d8a03f3fcc219da1c495aac91aeee1 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Fri, 20 Dec 2024 08:38:09 +0100 Subject: [PATCH 01/15] [R] Add prefetcher args if supported (#11124) --- R-package/configure | 70 ++++++++++++++++++++++++++++++++++++--- R-package/configure.ac | 32 ++++++++++++++++++ R-package/src/Makevars.in | 2 ++ 3 files changed, 100 insertions(+), 4 deletions(-) diff --git a/R-package/configure b/R-package/configure index d34a29286a42..9e2367edb736 100755 --- a/R-package/configure +++ b/R-package/configure @@ -646,6 +646,8 @@ ac_includes_default="\ ac_header_cxx_list= ac_subst_vars='LTLIBOBJS LIBOBJS +XGBOOST_MM_PREFETCH_PRESENT +XGBOOST_BUILTIN_PREFETCH_PRESENT BACKTRACE_LIB DMLC_DEFS ENDIAN_FLAG @@ -2794,11 +2796,11 @@ if test x$ac_prog_cxx_stdcxx = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++11 features" >&5 printf %s "checking for $CXX option to enable C++11 features... " >&6; } -if test ${ac_cv_prog_cxx_11+y} +if test ${ac_cv_prog_cxx_cxx11+y} then : printf %s "(cached) " >&6 else $as_nop - ac_cv_prog_cxx_11=no + ac_cv_prog_cxx_cxx11=no ac_save_CXX=$CXX cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -2840,11 +2842,11 @@ if test x$ac_prog_cxx_stdcxx = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++98 features" >&5 printf %s "checking for $CXX option to enable C++98 features... " >&6; } -if test ${ac_cv_prog_cxx_98+y} +if test ${ac_cv_prog_cxx_cxx98+y} then : printf %s "(cached) " >&6 else $as_nop - ac_cv_prog_cxx_98=no + ac_cv_prog_cxx_cxx98=no ac_save_CXX=$CXX cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -3238,6 +3240,64 @@ printf "%s\n" "$as_me: Forcing endianness to: ${USE_LITTLE_ENDIAN}" >&6;} fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: Checking for prefetch builtin" >&5 +printf "%s\n" "$as_me: Checking for prefetch builtin" >&6;} +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +__builtin_prefetch + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + XGBOOST_BUILTIN_PREFETCH_PRESENT="-DXGBOOST_BUILTIN_PREFETCH_PRESENT=1" +else $as_nop + XGBOOST_BUILTIN_PREFETCH_PRESENT="" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +if [ "$XGBOOST_BUILTIN_PREFETCH_PRESENT" = "" ]; then + echo "Has __builtin_prefetch" +else + echo "Doesn't have __builtin_prefetch" +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: Checking for mm_prefetch" >&5 +printf "%s\n" "$as_me: Checking for mm_prefetch" >&6;} +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +_mm_prefetch + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + XGBOOST_MM_PREFETCH_PRESENT="-DXGBOOST_MM_PREFETCH_PRESENT=1" +else $as_nop + XGBOOST_MM_PREFETCH_PRESENT="" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +if [ "$XGBOOST_MM_PREFETCH_PRESENT" = "" ]; then + echo "Has _mm_prefetch" +else + echo "Doesn't have _mm_prefetch" +fi + OPENMP_CXXFLAGS="" if test `uname -s` = "Linux" @@ -3289,6 +3349,8 @@ fi + + ac_config_files="$ac_config_files src/Makevars" ac_config_headers="$ac_config_headers config.h" diff --git a/R-package/configure.ac b/R-package/configure.ac index e9d8cf113fd2..8fe010070a74 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -50,6 +50,36 @@ AS_IF([test -z "${USE_LITTLE_ENDIAN+x}"], [ ENDIAN_FLAG="-DDMLC_CMAKE_LITTLE_ENDIAN=${USE_LITTLE_ENDIAN}" ]) +AC_MSG_NOTICE([Checking for prefetch builtin]) +AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [], + [__builtin_prefetch] + )], + [XGBOOST_BUILTIN_PREFETCH_PRESENT="-DXGBOOST_BUILTIN_PREFETCH_PRESENT=1"], + [XGBOOST_BUILTIN_PREFETCH_PRESENT=""] +) +if [[ "$XGBOOST_BUILTIN_PREFETCH_PRESENT" = "" ]]; then + echo "Has __builtin_prefetch" +else + echo "Doesn't have __builtin_prefetch" +fi + +AC_MSG_NOTICE([Checking for mm_prefetch]) +AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [#include ], + [_mm_prefetch] + )], + [XGBOOST_MM_PREFETCH_PRESENT="-DXGBOOST_MM_PREFETCH_PRESENT=1"], + [XGBOOST_MM_PREFETCH_PRESENT=""] +) +if [[ "$XGBOOST_MM_PREFETCH_PRESENT" = "" ]]; then + echo "Has _mm_prefetch" +else + echo "Doesn't have _mm_prefetch" +fi + OPENMP_CXXFLAGS="" if test `uname -s` = "Linux" @@ -89,6 +119,8 @@ AC_SUBST(OPENMP_LIB) AC_SUBST(ENDIAN_FLAG) AC_SUBST(DMLC_DEFS) AC_SUBST(BACKTRACE_LIB) +AC_SUBST(XGBOOST_BUILTIN_PREFETCH_PRESENT) +AC_SUBST(XGBOOST_MM_PREFETCH_PRESENT) AC_CONFIG_FILES([src/Makevars]) AC_CONFIG_HEADERS([config.h]) AC_OUTPUT diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in index 712eb8ba8d7e..5bd8f6f9e775 100644 --- a/R-package/src/Makevars.in +++ b/R-package/src/Makevars.in @@ -7,6 +7,8 @@ CXX_STD = CXX17 XGB_RFLAGS = \ @DMLC_DEFS@ \ + @XGBOOST_BUILTIN_PREFETCH_PRESENT@ \ + @XGBOOST_MM_PREFETCH_PRESENT@ \ -DXGBOOST_STRICT_R_MODE=1 \ -DDMLC_LOG_BEFORE_THROW=0 \ -DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) \ From 95f57766265ba495c34f76af930819c377034545 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Fri, 20 Dec 2024 08:39:03 +0100 Subject: [PATCH 02/15] [R] update note about qs (#11125) --- R-package/R/utils.R | 5 ++--- R-package/man/a-compatibility-note-for-saveRDS-save.Rd | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/R-package/R/utils.R b/R-package/R/utils.R index ab88ef5d58c1..1055121299d7 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -462,9 +462,8 @@ NULL #' could in theory change again in the future, so XGBoost's serializers should be #' preferred for long-term storage. #' -#' Furthermore, note that using the package `qs` for serialization will require -#' version 0.26 or higher of said package, and will have the same compatibility -#' restrictions as R serializers. +#' Furthermore, note that model objects from XGBoost might not be serializable with third-party +#' R packages like `qs` or `qs2`. #' #' @details #' Use [xgb.save()] to save the XGBoost model as a stand-alone file. You may opt into diff --git a/R-package/man/a-compatibility-note-for-saveRDS-save.Rd b/R-package/man/a-compatibility-note-for-saveRDS-save.Rd index 5820cd3acf5e..7eb0e0520ea0 100644 --- a/R-package/man/a-compatibility-note-for-saveRDS-save.Rd +++ b/R-package/man/a-compatibility-note-for-saveRDS-save.Rd @@ -54,9 +54,8 @@ like \code{\link[=saveRDS]{saveRDS()}} or \code{\link[=save]{save()}} before ver could in theory change again in the future, so XGBoost's serializers should be preferred for long-term storage. -Furthermore, note that using the package \code{qs} for serialization will require -version 0.26 or higher of said package, and will have the same compatibility -restrictions as R serializers. +Furthermore, note that model objects from XGBoost might not be serializable with third-party +R packages like \code{qs} or \code{qs2}. } \details{ Use \code{\link[=xgb.save]{xgb.save()}} to save the XGBoost model as a stand-alone file. You may opt into From 027eb7bed5efb95f1cb1abd3d86d2fb227695a6c Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 21 Dec 2024 11:39:15 +0800 Subject: [PATCH 03/15] Warning for polars lazy frame. (#11126) --- doc/python/python_intro.rst | 14 +++++++++++++- python-package/xgboost/data.py | 5 +++++ python-package/xgboost/sklearn.py | 8 ++++---- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/doc/python/python_intro.rst b/doc/python/python_intro.rst index 0e2793cfae9f..5ded82922879 100644 --- a/doc/python/python_intro.rst +++ b/doc/python/python_intro.rst @@ -118,6 +118,7 @@ Markers - F: Not supported. - NE: Invalid type for the use case. For instance, `pd.Series` can not be multi-target label. - NPA: Support with the help of numpy array. +- AT: Support with the help of arrow table. - CPA: Support with the help of cupy array. - SciCSR: Support with the help of scripy sparse CSR. The conversion to scipy CSR may or may not be possible. Raise a type error if conversion fails. - FF: We can look forward to having its support in recent future if requested. @@ -170,13 +171,24 @@ Support Matrix +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+ | modin.Series | NPA | FF | NPA | NPA | FF | | +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+ -| pyarrow.Table | NPA | NPA | NPA | NPA | NPA | NPA | +| pyarrow.Table | T | T | T | T | T | T | ++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+ +| polars.DataFrame | AT | AT | AT | AT | AT | AT | ++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+ +| polars.LazyFrame (WARN) | AT | AT | AT | AT | AT | AT | ++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+ +| polars.Series | AT | AT | AT | AT | AT | NE | +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+ | _\_array\_\_ | NPA | F | NPA | NPA | H | | +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+ | Others | SciCSR | F | | F | F | | +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+ +The polars ``LazyFrame.collect`` supports many configurations, ranging from the choice of +query engine to type coercion. XGBoost simply uses the default parameter. Please run +``collect`` to obtain the ``DataFrame`` before passing it into XGBoost for finer control +over the behaviour. + Setting Parameters ------------------ XGBoost can use either a list of pairs or a dictionary to set :doc:`parameters `. For instance: diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py index 1b9435188469..a89d41036e92 100644 --- a/python-package/xgboost/data.py +++ b/python-package/xgboost/data.py @@ -961,6 +961,11 @@ def _transform_polars_df( ) -> Tuple[ArrowTransformed, Optional[FeatureNames], Optional[FeatureTypes]]: if _is_polars_lazyframe(data): df = data.collect() + warnings.warn( + "Using the default parameters for the polars `LazyFrame.collect`. Consider" + " passing a realized `DataFrame` or `Series` instead.", + UserWarning, + ) else: df = data diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 1f233befd91a..53db87b381f7 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -1143,7 +1143,7 @@ def fit( Parameters ---------- X : - Feature matrix. See :ref:`py-data` for a list of supported types. + Input feature matrix. See :ref:`py-data` for a list of supported types. When the ``tree_method`` is set to ``hist``, internally, the :py:class:`QuantileDMatrix` will be used instead of the :py:class:`DMatrix` @@ -1267,7 +1267,7 @@ def predict( Parameters ---------- X : - Data to predict with. + Data to predict with. See :ref:`py-data` for a list of supported types. output_margin : Whether to output the raw untransformed margin value. validate_features : @@ -1334,8 +1334,8 @@ def apply( Parameters ---------- - X : array_like, shape=[n_samples, n_features] - Input features matrix. + X : + Input features matrix. See :ref:`py-data` for a list of supported types. iteration_range : See :py:meth:`predict`. From 198c3e1fe08e018084d3b8e498f895c162c95b8d Mon Sep 17 00:00:00 2001 From: david-cortes Date: Sat, 28 Dec 2024 14:57:45 +0100 Subject: [PATCH 04/15] [R] Change behavior around renamed / removed arguments (#11095) --- R-package/R/utils.R | 160 +++++++++++++++++++----- R-package/R/xgb.Booster.R | 4 +- R-package/R/xgb.plot.importance.R | 2 +- R-package/R/xgb.train.R | 13 +- R-package/man/xgb.cv.Rd | 13 +- R-package/man/xgb.dump.Rd | 13 +- R-package/man/xgb.model.dt.tree.Rd | 13 +- R-package/man/xgb.plot.multi.trees.Rd | 13 +- R-package/man/xgb.plot.tree.Rd | 13 +- R-package/man/xgb.train.Rd | 13 +- R-package/man/xgboost-options.Rd | 26 ++++ R-package/tests/testthat/test_helpers.R | 39 +++++- 12 files changed, 264 insertions(+), 58 deletions(-) create mode 100644 R-package/man/xgboost-options.Rd diff --git a/R-package/R/utils.R b/R-package/R/utils.R index 1055121299d7..7f14413b76d1 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -526,32 +526,73 @@ NULL #' @name a-compatibility-note-for-saveRDS-save NULL +#' @name xgboost-options +#' @title XGBoost Options +#' @description XGBoost offers an \link[base:options]{option setting} for controlling the behavior +#' of deprecated and removed function arguments. +#' +#' Some of the arguments in functions like [xgb.train()] or [predict.xgb.Booster()] been renamed +#' from how they were in previous versions, or have been removed. +#' +#' In order to make the transition to newer XGBoost versions easier, some of these parameters are +#' still accepted but issue a warning when using them. \bold{Note that these warnings will become +#' errors in the future!!} - this is just a temporary workaround to make the transition easier. +#' +#' One can optionally use 'strict mode' to turn these warnings into errors, in order to ensure +#' that code calling xgboost will still work once those are removed in future releases. +#' +#' Currently, the only supported option is `xgboost.strict_mode`, which can be set to `TRUE` or +#' `FALSE` (default). +#' @examples +#' options("xgboost.strict_mode" = FALSE) +#' options("xgboost.strict_mode" = TRUE) +NULL + # Lookup table for the deprecated parameters bookkeeping deprecated_train_params <- list( - 'print.every.n' = 'print_every_n', - 'early.stop.round' = 'early_stopping_rounds', - 'training.data' = 'data', - 'dtrain' = 'data', - 'watchlist' = 'evals', - 'feval' = 'custom_metric' + renamed = list( + 'print.every.n' = 'print_every_n', + 'early.stop.round' = 'early_stopping_rounds', + 'training.data' = 'data', + 'dtrain' = 'data', + 'watchlist' = 'evals', + 'feval' = 'custom_metric' + ), + removed = character() ) deprecated_dttree_params <- list( - 'n_first_tree' = 'trees' + renamed = list('n_first_tree' = 'trees'), + removed = c("feature_names", "text") ) -deprecated_plot_params <- list( - 'plot.height' = 'plot_height', - 'plot.width' = 'plot_width' +deprecated_plotimp_params <- list( + renamed = list( + 'plot.height' = 'plot_height', + 'plot.width' = 'plot_width' + ), + removed = character() ) -deprecated_multitrees_params <- c( - deprecated_plot_params, - list('features.keep' = 'features_keep') +deprecated_multitrees_params <- list( + renamed = c( + deprecated_plotimp_params$renamed, + list('features.keep' = 'features_keep') + ), + removed = "feature_names" ) deprecated_dump_params <- list( - 'with.stats' = 'with_stats' + renamed = list('with.stats' = 'with_stats'), + removed = character() ) deprecated_plottree_params <- c( - deprecated_plot_params, - deprecated_dump_params + renamed = list( + deprecated_plotimp_params$renamed, + deprecated_dump_params$renamed, + list('trees' = 'tree_idx') + ), + removed = c("show_node_id", "feature_names") +) +deprecated_predict_params <- list( + renamed = list("ntreelimit" = "iterationrange"), + removed = "reshape" ) # Checks the dot-parameters for deprecated names @@ -569,42 +610,99 @@ check.deprecation <- function( if (length(params) == 0) { return(NULL) } + error_on_deprecated <- getOption("xgboost.strict_mode", default = FALSE) + throw_err_or_depr_msg <- function(...) { + if (error_on_deprecated) { + stop(...) + } else { + warning(..., " This warning will become an error in a future version.") + } + } + if (is.null(names(params)) || min(nchar(names(params))) == 0L) { - stop("Passed invalid positional arguments") + throw_err_or_depr_msg("Passed invalid positional arguments") } - all_match <- pmatch(names(params), names(deprecated_list)) + list_renamed <- deprecated_list$renamed + list_removed <- deprecated_list$removed + has_params_arg <- list_renamed[[1L]] == deprecated_train_params$renamed[[1L]] + all_match <- pmatch(names(params), names(list_renamed)) # throw error on unrecognized parameters if (!allow_unrecognized && anyNA(all_match)) { + names_unrecognized <- names(params)[is.na(all_match)] # make it informative if they match something that goes under 'params' - if (deprecated_list[[1L]] == deprecated_train_params[[1L]]) { + if (has_params_arg) { names_params <- formalArgs(xgb.params) names_params <- c(names_params, gsub("_", ".", names_params, fixed = TRUE)) names_under_params <- intersect(names_unrecognized, names_params) if (length(names_under_params)) { - stop( - "Passed invalid function arguments: ", - paste(head(names_under_params), collapse = ", "), - ". These should be passed as a list to argument 'params'." - ) + if (error_on_deprecated) { + stop( + "Passed invalid function arguments: ", + paste(head(names_under_params), collapse = ", "), + ". These should be passed as a list to argument 'params'." + ) + } else { + warning( + "Passed invalid function arguments: ", + paste(head(names_under_params), collapse = ", "), + ". These should be passed as a list to argument 'params'.", + " Conversion from argument to 'params' entry will be done automatically, but this ", + "behavior will become an error in a future version." + ) + if (any(names_under_params %in% names(env[["params"]]))) { + repeteated_params <- intersect(names_under_params, names(env[["params"]])) + stop( + "Passed entries as both function argument(s) and as elements under 'params': ", + paste(head(repeteated_params), collapse = ", ") + ) + } else { + env[["params"]] <- c(env[["params"]], params[names_under_params]) + } + } + names_unrecognized <- setdiff(names_unrecognized, names_under_params) } } + + # check for parameters that were removed from a previous version + names_removed <- intersect(names_unrecognized, list_removed) + if (length(names_removed)) { + throw_err_or_depr_msg( + "Parameter(s) have been removed from this function: ", + paste(names_removed, collapse = ", "), "." + ) + names_unrecognized <- setdiff(names_unrecognized, list_removed) + } + # otherwise throw a generic error - stop( - "Passed unrecognized parameters: ", - paste(head(names_unrecognized), collapse = ", ") - ) + if (length(names_unrecognized)) { + throw_err_or_depr_msg( + "Passed unrecognized parameters: ", + paste(head(names_unrecognized), collapse = ", ") + ) + } + + } else { + + names_removed <- intersect(names(params)[is.na(all_match)], list_removed) + if (length(names_removed)) { + throw_err_or_depr_msg( + "Parameter(s) have been removed from this function: ", + paste(names_removed, collapse = ", "), "." + ) + } + } - matched_params <- deprecated_list[all_match[!is.na(all_match)]] + matched_params <- list_renamed[all_match[!is.na(all_match)]] idx_orig <- seq_along(params)[!is.na(all_match)] function_args_passed <- names(as.list(fn_call))[-1L] for (idx in seq_along(matched_params)) { match_old <- names(matched_params)[[idx]] match_new <- matched_params[[idx]] - warning( + throw_err_or_depr_msg( "Parameter '", match_old, "' has been renamed to '", - match_new, "' and will be removed in a future version." + match_new, "'." ) if (match_new %in% function_args_passed) { stop("Passed both '", match_new, "' and '", match_old, "'.") diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R index c2e7072f0379..f04a4918dd92 100644 --- a/R-package/R/xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -354,9 +354,7 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, avoid_transpose = FALSE, validate_features = FALSE, base_margin = NULL, ...) { - if (NROW(list(...))) { - warning("Passed unused prediction arguments: ", paste(names(list(...)), collapse = ", "), ".") - } + check.deprecation(deprecated_predict_params, match.call(), ..., allow_unrecognized = TRUE) if (validate_features) { newdata <- validate.features(object, newdata) } diff --git a/R-package/R/xgb.plot.importance.R b/R-package/R/xgb.plot.importance.R index 8acec15a0dd5..993cd6a457f3 100644 --- a/R-package/R/xgb.plot.importance.R +++ b/R-package/R/xgb.plot.importance.R @@ -75,7 +75,7 @@ #' @export xgb.plot.importance <- function(importance_matrix = NULL, top_n = NULL, measure = NULL, rel_to_first = FALSE, left_margin = 10, cex = NULL, plot = TRUE, ...) { - check.deprecation(deprecated_plot_params, match.call(), ..., allow_unrecognized = TRUE) + check.deprecation(deprecated_plotimp_params, match.call(), ..., allow_unrecognized = TRUE) if (!is.data.table(importance_matrix)) { stop("importance_matrix: must be a data.table") } diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index b20ab018b43f..77aa5eb8ccae 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -83,11 +83,18 @@ #' [xgb.save()] (but are kept when using R serializers like [saveRDS()]). #' @param ... Not used. #' -#' Some arguments are currently deprecated or have been renamed. If a deprecated argument -#' is passed, will throw a warning and use its current equivalent. +#' Some arguments that were part of this function in previous XGBoost versions are currently +#' deprecated or have been renamed. If a deprecated or renamed argument is passed, will throw +#' a warning (by default) and use its current equivalent instead. This warning will become an +#' error if using the \link[=xgboost-options]{'strict mode' option}. #' #' If some additional argument is passed that is neither a current function argument nor -#' a deprecated argument, an error will be thrown. +#' a deprecated or renamed argument, a warning or error will be thrown depending on the +#' 'strict mode' option. +#' +#' \bold{Important:} `...` will be removed in a future version, and all the current +#' deprecation warnings will become errors. Please use only arguments that form part of +#' the function signature. #' @return An object of class `xgb.Booster`. #' @details #' Compared to [xgboost()], the `xgb.train()` interface supports advanced features such as diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd index 855781c0e53a..6a9f74b8ba92 100644 --- a/R-package/man/xgb.cv.Rd +++ b/R-package/man/xgb.cv.Rd @@ -143,11 +143,18 @@ to customize the training process.} \item{...}{Not used. -Some arguments are currently deprecated or have been renamed. If a deprecated argument -is passed, will throw a warning and use its current equivalent. +Some arguments that were part of this function in previous XGBoost versions are currently +deprecated or have been renamed. If a deprecated or renamed argument is passed, will throw +a warning (by default) and use its current equivalent instead. This warning will become an +error if using the \link[=xgboost-options]{'strict mode' option}. If some additional argument is passed that is neither a current function argument nor -a deprecated argument, an error will be thrown.} +a deprecated or renamed argument, a warning or error will be thrown depending on the +'strict mode' option. + +\bold{Important:} \code{...} will be removed in a future version, and all the current +deprecation warnings will become errors. Please use only arguments that form part of +the function signature.} } \value{ An object of class 'xgb.cv.synchronous' with the following elements: diff --git a/R-package/man/xgb.dump.Rd b/R-package/man/xgb.dump.Rd index d10c61c46777..84de7ffb0951 100644 --- a/R-package/man/xgb.dump.Rd +++ b/R-package/man/xgb.dump.Rd @@ -35,11 +35,18 @@ for graph visualization, such as function \code{DiagrammeR::grViz()}} \item{...}{Not used. -Some arguments are currently deprecated or have been renamed. If a deprecated argument -is passed, will throw a warning and use its current equivalent. +Some arguments that were part of this function in previous XGBoost versions are currently +deprecated or have been renamed. If a deprecated or renamed argument is passed, will throw +a warning (by default) and use its current equivalent instead. This warning will become an +error if using the \link[=xgboost-options]{'strict mode' option}. If some additional argument is passed that is neither a current function argument nor -a deprecated argument, an error will be thrown.} +a deprecated or renamed argument, a warning or error will be thrown depending on the +'strict mode' option. + +\bold{Important:} \code{...} will be removed in a future version, and all the current +deprecation warnings will become errors. Please use only arguments that form part of +the function signature.} } \value{ If fname is not provided or set to \code{NULL} the function will return the model diff --git a/R-package/man/xgb.model.dt.tree.Rd b/R-package/man/xgb.model.dt.tree.Rd index 8564ffec6eec..be520dfbf4e3 100644 --- a/R-package/man/xgb.model.dt.tree.Rd +++ b/R-package/man/xgb.model.dt.tree.Rd @@ -22,11 +22,18 @@ character strings (when \code{FALSE}, default).} \item{...}{Not used. -Some arguments are currently deprecated or have been renamed. If a deprecated argument -is passed, will throw a warning and use its current equivalent. +Some arguments that were part of this function in previous XGBoost versions are currently +deprecated or have been renamed. If a deprecated or renamed argument is passed, will throw +a warning (by default) and use its current equivalent instead. This warning will become an +error if using the \link[=xgboost-options]{'strict mode' option}. If some additional argument is passed that is neither a current function argument nor -a deprecated argument, an error will be thrown.} +a deprecated or renamed argument, a warning or error will be thrown depending on the +'strict mode' option. + +\bold{Important:} \code{...} will be removed in a future version, and all the current +deprecation warnings will become errors. Please use only arguments that form part of +the function signature.} } \value{ A \code{data.table} with detailed information about tree nodes. It has the following columns: diff --git a/R-package/man/xgb.plot.multi.trees.Rd b/R-package/man/xgb.plot.multi.trees.Rd index a4421d239fdd..dae4d3953a73 100644 --- a/R-package/man/xgb.plot.multi.trees.Rd +++ b/R-package/man/xgb.plot.multi.trees.Rd @@ -28,11 +28,18 @@ The values are passed to \code{DiagrammeR::render_graph()}.} \item{...}{Not used. -Some arguments are currently deprecated or have been renamed. If a deprecated argument -is passed, will throw a warning and use its current equivalent. +Some arguments that were part of this function in previous XGBoost versions are currently +deprecated or have been renamed. If a deprecated or renamed argument is passed, will throw +a warning (by default) and use its current equivalent instead. This warning will become an +error if using the \link[=xgboost-options]{'strict mode' option}. If some additional argument is passed that is neither a current function argument nor -a deprecated argument, an error will be thrown.} +a deprecated or renamed argument, a warning or error will be thrown depending on the +'strict mode' option. + +\bold{Important:} \code{...} will be removed in a future version, and all the current +deprecation warnings will become errors. Please use only arguments that form part of +the function signature.} } \value{ Rendered graph object which is an htmlwidget of ' class \code{grViz}. Similar to diff --git a/R-package/man/xgb.plot.tree.Rd b/R-package/man/xgb.plot.tree.Rd index 00ed6d24864e..99445b291680 100644 --- a/R-package/man/xgb.plot.tree.Rd +++ b/R-package/man/xgb.plot.tree.Rd @@ -31,11 +31,18 @@ cover is the sum of second order gradient in each node.} \item{...}{Not used. -Some arguments are currently deprecated or have been renamed. If a deprecated argument -is passed, will throw a warning and use its current equivalent. +Some arguments that were part of this function in previous XGBoost versions are currently +deprecated or have been renamed. If a deprecated or renamed argument is passed, will throw +a warning (by default) and use its current equivalent instead. This warning will become an +error if using the \link[=xgboost-options]{'strict mode' option}. If some additional argument is passed that is neither a current function argument nor -a deprecated argument, an error will be thrown.} +a deprecated or renamed argument, a warning or error will be thrown depending on the +'strict mode' option. + +\bold{Important:} \code{...} will be removed in a future version, and all the current +deprecation warnings will become errors. Please use only arguments that form part of +the function signature.} } \value{ Rendered graph object which is an htmlwidget of ' class \code{grViz}. Similar to diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd index ba2f4a9d884e..8cd5c96d309a 100644 --- a/R-package/man/xgb.train.Rd +++ b/R-package/man/xgb.train.Rd @@ -117,11 +117,18 @@ as R attributes, and thus do not get saved when using XGBoost's own serializater \item{...}{Not used. -Some arguments are currently deprecated or have been renamed. If a deprecated argument -is passed, will throw a warning and use its current equivalent. +Some arguments that were part of this function in previous XGBoost versions are currently +deprecated or have been renamed. If a deprecated or renamed argument is passed, will throw +a warning (by default) and use its current equivalent instead. This warning will become an +error if using the \link[=xgboost-options]{'strict mode' option}. If some additional argument is passed that is neither a current function argument nor -a deprecated argument, an error will be thrown.} +a deprecated or renamed argument, a warning or error will be thrown depending on the +'strict mode' option. + +\bold{Important:} \code{...} will be removed in a future version, and all the current +deprecation warnings will become errors. Please use only arguments that form part of +the function signature.} } \value{ An object of class \code{xgb.Booster}. diff --git a/R-package/man/xgboost-options.Rd b/R-package/man/xgboost-options.Rd new file mode 100644 index 000000000000..2515fa7e8092 --- /dev/null +++ b/R-package/man/xgboost-options.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{xgboost-options} +\alias{xgboost-options} +\title{XGBoost Options} +\description{ +XGBoost offers an \link[base:options]{option setting} for controlling the behavior +of deprecated and removed function arguments. + +Some of the arguments in functions like \code{\link[=xgb.train]{xgb.train()}} or \code{\link[=predict.xgb.Booster]{predict.xgb.Booster()}} been renamed +from how they were in previous versions, or have been removed. + +In order to make the transition to newer XGBoost versions easier, some of these parameters are +still accepted but issue a warning when using them. \bold{Note that these warnings will become +errors in the future!!} - this is just a temporary workaround to make the transition easier. + +One can optionally use 'strict mode' to turn these warnings into errors, in order to ensure +that code calling xgboost will still work once those are removed in future releases. + +Currently, the only supported option is \code{xgboost.strict_mode}, which can be set to \code{TRUE} or +\code{FALSE} (default). +} +\examples{ +options("xgboost.strict_mode" = FALSE) +options("xgboost.strict_mode" = TRUE) +} diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index e2235fd7098f..9082d7069146 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -599,6 +599,17 @@ test_that("check.deprecation works", { ) # with exact name + options("xgboost.strict_mode" = TRUE) + expect_error({ + model <- xgb.train( + data = dm, + params = params, + nrounds = 10, + watchlist = list(tr = dm), + verbose = 0 + ) + }, regexp = "watchlist") + options("xgboost.strict_mode" = FALSE) expect_warning({ model <- xgb.train( data = dm, @@ -624,7 +635,8 @@ test_that("check.deprecation works", { expect_true(hasName(attributes(model), "evaluation_log")) expect_equal(names(attributes(model)$evaluation_log), c("iter", "train_rmse")) - # error is thrown if argument cannot be matched + # error/warning is thrown if argument cannot be matched + options("xgboost.strict_mode" = TRUE) expect_error({ model <- xgb.train( data = dm, @@ -634,17 +646,40 @@ test_that("check.deprecation works", { verbose = 0 ) }, regexp = "unrecognized") + options("xgboost.strict_mode" = FALSE) + expect_warning({ + model <- xgb.train( + data = dm, + params = params, + nrounds = 10, + watchlistt = list(train = dm), + verbose = 0 + ) + }, regexp = "unrecognized") # error should suggest to put under 'params' if it goes there + options("xgboost.strict_mode" = TRUE) expect_error({ model <- xgb.train( data = dm, nthread = 1, max_depth = 2, eval_metric = "rmse", nrounds = 10, - watchlistt = list(train = dm), + evals = list(train = dm), verbose = 0 ) }, regexp = "should be passed as a list to argument 'params'") + options("xgboost.strict_mode" = FALSE) + expect_warning({ + model <- xgb.train( + data = dm, + nthread = 1, max_depth = 2, eval_metric = "mae", + nrounds = 10, + evals = list(train = dm), + verbose = 0 + ) + }, regexp = "should be passed as a list to argument 'params'") + expect_true(hasName(attributes(model), "evaluation_log")) + expect_equal(names(attributes(model)$evaluation_log), c("iter", "train_mae")) # can take more than one deprecated parameter expect_warning({ From bec2d3236081dd7b429ddb791ac1e6df202e27f3 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 28 Dec 2024 23:19:11 +0800 Subject: [PATCH 05/15] Increase timeout for Multi-GPU approx tests. (#11111) --- tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py index 100c7861d55f..794f1405d7d0 100644 --- a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py +++ b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py @@ -36,7 +36,7 @@ pytestmark = [ pytest.mark.skipif(**tm.no_dask()), pytest.mark.skipif(**tm.no_dask_cuda()), - tm.timeout(60), + tm.timeout(120), ] try: From 15a1b2ffc4bdf46f04acd9334867117993863219 Mon Sep 17 00:00:00 2001 From: Ayoub Cherkaoui <128156731+ayoub317@users.noreply.github.com> Date: Tue, 31 Dec 2024 16:53:42 +0100 Subject: [PATCH 06/15] Update C API tutorial (#11131) Co-authored-by: a.cherkaoui --- doc/tutorials/c_api_tutorial.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/tutorials/c_api_tutorial.rst b/doc/tutorials/c_api_tutorial.rst index 2346ff9ac9d8..45956c38fd17 100644 --- a/doc/tutorials/c_api_tutorial.rst +++ b/doc/tutorials/c_api_tutorial.rst @@ -176,7 +176,7 @@ Sample examples along with Code snippet to use C API functions const int data1[] = { 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 }; // 2D matrix - const int ROWS = 5, COLS = 3; + const int ROWS = 6, COLS = 3; const int data2[ROWS][COLS] = { {1, 2, 3}, {2, 4, 6}, {3, -1, 9}, {4, 8, -1}, {2, 5, 1}, {0, 1, 5} }; DMatrixHandle dmatrix1, dmatrix2; // Pass the matrix, no of rows & columns contained in the matrix variable From 57ce0626184531cca7f093793366cb9f5632f414 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 2 Jan 2025 11:56:22 +0800 Subject: [PATCH 07/15] Remove redundant quote in type chec. (#11130) --- src/common/json_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/json_utils.h b/src/common/json_utils.h index a2a8a3cae88c..812aa1f2e57a 100644 --- a/src/common/json_utils.h +++ b/src/common/json_utils.h @@ -43,7 +43,7 @@ std::enable_if_t TypeCheckError() { template void TypeCheck(Json const &value, StringView name) { if (!detail::TypeCheckImpl(value)) { - LOG(FATAL) << "Invalid type for: `" << name << "`, expecting one of the: {`" + LOG(FATAL) << "Invalid type for: `" << name << "`, expecting one of the: {" << detail::TypeCheckError() << "}, got: `" << value.GetValue().TypeStr() << "`"; } From 92d1bfe6239e6a4d9d06d253615f7ea65b5ae1cc Mon Sep 17 00:00:00 2001 From: david-cortes Date: Thu, 2 Jan 2025 04:59:12 +0100 Subject: [PATCH 08/15] [R] Move gc data protection to R side (#11104) --- R-package/R/xgb.DMatrix.R | 33 ++++++++++++++++++++++++++++----- R-package/src/xgboost_R.cc | 12 +++--------- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R index f64a5439d639..9a753097e02b 100644 --- a/R-package/R/xgb.DMatrix.R +++ b/R-package/R/xgb.DMatrix.R @@ -353,6 +353,9 @@ xgb.QuantileDMatrix <- function( ) data_iterator <- .single.data.iterator(iterator_env) + env_keep_alive <- new.env() + env_keep_alive$keepalive <- NULL + # Note: the ProxyDMatrix has its finalizer assigned in the R externalptr # object, but that finalizer will only be called once the object is # garbage-collected, which doesn't happen immediately after it goes out @@ -363,9 +366,10 @@ xgb.QuantileDMatrix <- function( .Call(XGDMatrixFree_R, proxy_handle) }) iterator_next <- function() { - return(xgb.ProxyDMatrix(proxy_handle, data_iterator)) + return(xgb.ProxyDMatrix(proxy_handle, data_iterator, env_keep_alive)) } iterator_reset <- function() { + env_keep_alive$keepalive <- NULL return(data_iterator$f_reset(iterator_env)) } calling_env <- environment() @@ -553,7 +557,8 @@ xgb.DataBatch <- function( } # This is only for internal usage, class is not exposed to the user. -xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) { +xgb.ProxyDMatrix <- function(proxy_handle, data_iterator, env_keep_alive) { + env_keep_alive$keepalive <- NULL lst <- data_iterator$f_next(data_iterator$env) if (is.null(lst)) { return(0L) @@ -566,13 +571,19 @@ xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) { stop("Either one of 'group' or 'qid' should be NULL") } if (is.data.frame(lst$data)) { - tmp <- .process.df.for.dmatrix(lst$data, lst$feature_types) + data <- lst$data + lst$data <- NULL + tmp <- .process.df.for.dmatrix(data, lst$feature_types) lst$feature_types <- tmp$feature_types + data <- NULL + env_keep_alive$keepalive <- tmp .Call(XGProxyDMatrixSetDataColumnar_R, proxy_handle, tmp$lst) } else if (is.matrix(lst$data)) { + env_keep_alive$keepalive <- lst .Call(XGProxyDMatrixSetDataDense_R, proxy_handle, lst$data) } else if (inherits(lst$data, "dgRMatrix")) { tmp <- list(p = lst$data@p, j = lst$data@j, x = lst$data@x, ncol = ncol(lst$data)) + env_keep_alive$keepalive <- tmp .Call(XGProxyDMatrixSetDataCSR_R, proxy_handle, tmp) } else { stop("'data' has unsupported type.") @@ -712,14 +723,23 @@ xgb.ExtMemDMatrix <- function( cache_prefix <- path.expand(cache_prefix) nthread <- as.integer(NVL(nthread, -1L)) + # The purpose of this environment is to keep data alive (protected from the + # garbage collector) after setting the data in the proxy dmatrix. The data + # held here (under name 'keepalive') should be unset (leaving it unprotected + # for garbage collection) before the start of each data iteration batch and + # during each iterator reset. + env_keep_alive <- new.env() + env_keep_alive$keepalive <- NULL + proxy_handle <- .make.proxy.handle() on.exit({ .Call(XGDMatrixFree_R, proxy_handle) }) iterator_next <- function() { - return(xgb.ProxyDMatrix(proxy_handle, data_iterator)) + return(xgb.ProxyDMatrix(proxy_handle, data_iterator, env_keep_alive)) } iterator_reset <- function() { + env_keep_alive$keepalive <- NULL return(data_iterator$f_reset(data_iterator$env)) } calling_env <- environment() @@ -779,14 +799,17 @@ xgb.QuantileDMatrix.from_iterator <- function( # nolint nthread <- as.integer(NVL(nthread, -1L)) + env_keep_alive <- new.env() + env_keep_alive$keepalive <- NULL proxy_handle <- .make.proxy.handle() on.exit({ .Call(XGDMatrixFree_R, proxy_handle) }) iterator_next <- function() { - return(xgb.ProxyDMatrix(proxy_handle, data_iterator)) + return(xgb.ProxyDMatrix(proxy_handle, data_iterator, env_keep_alive)) } iterator_reset <- function() { + env_keep_alive$keepalive <- NULL return(data_iterator$f_reset(data_iterator$env)) } calling_env <- environment() diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc index adb9649bf33d..0e7234a18708 100644 --- a/R-package/src/xgboost_R.cc +++ b/R-package/src/xgboost_R.cc @@ -687,7 +687,6 @@ XGB_DLL SEXP XGProxyDMatrixSetDataDense_R(SEXP handle, SEXP R_mat) { { std::string array_str = MakeArrayInterfaceFromRMat(R_mat); res_code = XGProxyDMatrixSetDataDense(proxy_dmat, array_str.c_str()); - R_SetExternalPtrProtected(handle, R_mat); } CHECK_CALL(res_code); R_API_END(); @@ -708,7 +707,6 @@ XGB_DLL SEXP XGProxyDMatrixSetDataCSR_R(SEXP handle, SEXP lst) { array_str_indices.c_str(), array_str_data.c_str(), ncol); - R_SetExternalPtrProtected(handle, lst); } CHECK_CALL(res_code); R_API_END(); @@ -722,7 +720,6 @@ XGB_DLL SEXP XGProxyDMatrixSetDataColumnar_R(SEXP handle, SEXP lst) { { std::string sinterface = MakeArrayInterfaceFromRDataFrame(lst); res_code = XGProxyDMatrixSetDataColumnar(proxy_dmat, sinterface.c_str()); - R_SetExternalPtrProtected(handle, lst); } CHECK_CALL(res_code); R_API_END(); @@ -736,20 +733,17 @@ struct _RDataIterator { SEXP f_reset; SEXP calling_env; SEXP continuation_token; - SEXP proxy_dmat; _RDataIterator( - SEXP f_next, SEXP f_reset, SEXP calling_env, SEXP continuation_token, SEXP proxy_dmat) : + SEXP f_next, SEXP f_reset, SEXP calling_env, SEXP continuation_token) : f_next(f_next), f_reset(f_reset), calling_env(calling_env), - continuation_token(continuation_token), proxy_dmat(proxy_dmat) {} + continuation_token(continuation_token) {} void reset() { - R_SetExternalPtrProtected(this->proxy_dmat, R_NilValue); SafeExecFun(this->f_reset, this->calling_env, this->continuation_token); } int next() { - R_SetExternalPtrProtected(this->proxy_dmat, R_NilValue); SEXP R_res = Rf_protect( SafeExecFun(this->f_next, this->calling_env, this->continuation_token)); int res = Rf_asInteger(R_res); @@ -777,7 +771,7 @@ SEXP XGDMatrixCreateFromCallbackGeneric_R( int res_code; try { - _RDataIterator data_iterator(f_next, f_reset, calling_env, continuation_token, proxy_dmat); + _RDataIterator data_iterator(f_next, f_reset, calling_env, continuation_token); std::string str_cache_prefix; xgboost::Json jconfig{xgboost::Object{}}; From 8377973d42bcb98c23f016827425851e09d9f8c3 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Thu, 2 Jan 2025 05:08:05 +0100 Subject: [PATCH 09/15] [R] Add docs about maintenance of R interface (#11117) --- doc/R-package/adding_parameters.rst | 28 ++++++++++++++++++++++++++++ doc/R-package/index.rst | 1 + 2 files changed, 29 insertions(+) create mode 100644 doc/R-package/adding_parameters.rst diff --git a/doc/R-package/adding_parameters.rst b/doc/R-package/adding_parameters.rst new file mode 100644 index 000000000000..ed10f17f7c2e --- /dev/null +++ b/doc/R-package/adding_parameters.rst @@ -0,0 +1,28 @@ +.. _index_base: + +Developer guide: parameters from core library +============================================= + +The XGBoost core library accepts a long list of input parameters (e.g. ``max_depth`` for decision trees, regularization, ``device`` where compute happens, etc.). New parameters are constantly being added as XGBoost is developed further, and their language bindings should allow passing to the core library everything that it accepts. + +In the case of R, these parameters are passed as an R ``list`` object to function ``xgb.train``, but the R interface aims at providing a better, more idiomatic user experience by offering a parameters constructor with full in-package documentation. This requires keeping the list of parameters and their documentation up to date **in the R package** too, in addition to the general online documentation for XGBoost. + +In more detail, there is a function ``xgb.params`` which allows the user to construct such a ``list`` object to pass to ``xgb.train`` while getting full IDE autocompletion on it. This function should accept all possible XGBoost parameters as arguments, listing them in the same order as they appear in the online documentation. + +In order to add a new parameter from the core library to ``xgb.params``: + +- Add the parameter at the right location, according to the order in which it appears in the .rst file listing the parameters for the core library. If the parameter appears more than once (e.g. because it applies to more than one type of booster), then add it in a position according to to the first occurrence. +- Copy-paste the docs from the .rst file as another ``@param`` entry for ``xgb.train``. Some easy substitutions might be needed, such as changing double-backticks to single-backticks, enquoting variables that need to be passed as strings, and replacing ``:math:`` calls with their roxygen equivalent ``\eqn{}``, among others. +- If needed, make minimal modifications for the R interface - for example, since parameters are only listed once, should add at the beginning a note about which type of booster they apply to if they are only applicable for one type, or list default values by booster type if they are different. + +After adding the parameter to ``xgb.params``, it will also need to be added to the function ``xgboost`` if that function can use it. The function ``xgboost`` is not meant to support everything that the core library offers - currently parameters related to learning-to-rank are not listed there for example as they are unusable for it (but can be used for ``xgb.train``). + +In order to add the parameter to ``xgboost``: + +- Add it to the function signature. The position here differs though: there are a few selected parameters whose positions have been moved closer to the top of the signature. New parameters should not be placed within those "top" positions - instead, place it after parameter ``tree_method``, in the most similar place among the remaining parameters according to how it was inserted in ``xgb.params``. Note that the rest of the parameters that come after ``tree_method`` are still meant to follow the same relative order as in ``xgb.params``. +- If the parameter applies exactly in the same way as in ``xgb.train``, then no additional documentation is needed for ``xgboost``, because it inherits parameters from ``xgb.params`` by default. However, some parameters might need slight modifications - for example, not all objectives are supported by ``xgboost``, so modifications are needed for that parameter. +- If the parameter allows aliases, use only one alias, and prefer the most descriptive nomenclature (e.g. "learning_rate" instead of "eta"). These also need a doc entry ``@param`` in ``xgboost``, as the one in ``xgb.params`` will have the unsupported alias. + +As new objectives and evaluation metrics are added, be mindful that they need to be added to the docs of both ``xgb.params`` and ``xgboost``. Documentation for objectives in both functions was originally copied from the same .rst file for the core library, but for ``xgboost`` it undergoes additional modifications in order to list what is and isn't supported, and to refer only to the parameter aliases that are accepted by ``xgboost``. + +Keep in mind also that objectives that are a variant of one another but with a different prediction mode, are not meant to be allowed in ``xgboost`` as they'd break its intended interface - therefore, such objectives are not described in the docs for ``xgboost`` (but there is a list at the end of what isn't supported by it) and are checked against in function ``prescreen.objective``. diff --git a/doc/R-package/index.rst b/doc/R-package/index.rst index 18de8d1c0902..55a804545b3e 100644 --- a/doc/R-package/index.rst +++ b/doc/R-package/index.rst @@ -35,3 +35,4 @@ Other topics :titlesonly: Handling of indexable elements + Developer guide: parameters from core library From fb7f85d5abfda6f995391edc2cf205e027bd00f2 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 4 Jan 2025 02:25:21 +0800 Subject: [PATCH 10/15] [doc] Mention eta for glinear. [skip ci] (#11138) --- doc/parameter.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/parameter.rst b/doc/parameter.rst index 6eddde40910e..d4d8de702a8f 100644 --- a/doc/parameter.rst +++ b/doc/parameter.rst @@ -348,6 +348,11 @@ Parameters for Linear Booster (``booster=gblinear``) - L1 regularization term on weights. Increasing this value will make model more conservative. Normalised to number of training examples. +* ``eta`` [default=0.5, alias: ``learning_rate``] + + - Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and ``eta`` shrinks the feature weights to make the boosting process more conservative. + - range: [0,1] + * ``updater`` [default= ``shotgun``] - Choice of algorithm to fit linear model From bd92b1c9c0db3e75ec3dfa513e1435d518bb535d Mon Sep 17 00:00:00 2001 From: david-cortes Date: Fri, 3 Jan 2025 19:26:47 +0100 Subject: [PATCH 11/15] [R] correct default values for learning rate (#11140) --- R-package/R/xgb.train.R | 6 +++--- R-package/man/xgb.params.Rd | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index 77aa5eb8ccae..49726382c9fb 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -459,10 +459,10 @@ xgb.train <- function(params = xgb.params(), data, nrounds, evals = list(), #' @param seed Random number seed. If not specified, will take a random seed through R's own RNG engine. #' @param booster (default= `"gbtree"`) #' Which booster to use. Can be `"gbtree"`, `"gblinear"` or `"dart"`; `"gbtree"` and `"dart"` use tree based models while `"gblinear"` uses linear functions. -#' @param eta,learning_rate (two aliases for the same parameter) (default=0.3) +#' @param eta,learning_rate (two aliases for the same parameter) #' Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and `eta` shrinks the feature weights to make the boosting process more conservative. -#' -#' range: \eqn{[0,1]} +#' - range: \eqn{[0,1]} +#' - default value: 0.3 for tree-based boosters, 0.5 for linear booster. #' #' Note: should only pass one of `eta` or `learning_rate`. Both refer to the same parameter and there's thus no difference between one or the other. #' @param gamma,min_split_loss (two aliases for the same parameter) (for Tree Booster) (default=0, alias: `gamma`) diff --git a/R-package/man/xgb.params.Rd b/R-package/man/xgb.params.Rd index e5c74d85f837..f51e764e84db 100644 --- a/R-package/man/xgb.params.Rd +++ b/R-package/man/xgb.params.Rd @@ -121,10 +121,12 @@ contention and hyperthreading in mind.} \item{booster}{(default= \code{"gbtree"}) Which booster to use. Can be \code{"gbtree"}, \code{"gblinear"} or \code{"dart"}; \code{"gbtree"} and \code{"dart"} use tree based models while \code{"gblinear"} uses linear functions.} -\item{eta, learning_rate}{(two aliases for the same parameter) (default=0.3) +\item{eta, learning_rate}{(two aliases for the same parameter) Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and \code{eta} shrinks the feature weights to make the boosting process more conservative. - -range: \eqn{[0,1]} +\itemize{ +\item range: \eqn{[0,1]} +\item default value: 0.3 for tree-based boosters, 0.5 for linear booster. +} Note: should only pass one of \code{eta} or \code{learning_rate}. Both refer to the same parameter and there's thus no difference between one or the other.} From a9505620711220dda21298d9de829b1318dcd6ef Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 6 Jan 2025 15:27:29 +0800 Subject: [PATCH 12/15] Enfore cmake configure file newline style (#11129) --- CMakeLists.txt | 3 ++- cmake/Version.cmake | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0abe69821d14..761aef49d4dc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -497,7 +497,8 @@ if(GOOGLE_TEST) configure_file( ${xgboost_SOURCE_DIR}/tests/cli/machine.conf.in ${xgboost_BINARY_DIR}/tests/cli/machine.conf - @ONLY) + @ONLY + NEWLINE_STYLE UNIX) if(BUILD_DEPRECATED_CLI) add_test( NAME TestXGBoostCLI diff --git a/cmake/Version.cmake b/cmake/Version.cmake index 4af6b27d6720..1a7fae3d7645 100644 --- a/cmake/Version.cmake +++ b/cmake/Version.cmake @@ -2,5 +2,7 @@ function(write_version) message(STATUS "xgboost VERSION: ${xgboost_VERSION}") configure_file( ${xgboost_SOURCE_DIR}/cmake/version_config.h.in - ${xgboost_SOURCE_DIR}/include/xgboost/version_config.h @ONLY) + ${xgboost_SOURCE_DIR}/include/xgboost/version_config.h + @ONLY + NEWLINE_STYLE UNIX) endfunction() From 721c3890294629d8e4611e54e98e2166872b3828 Mon Sep 17 00:00:00 2001 From: Michal Golan Date: Mon, 6 Jan 2025 18:16:00 +0200 Subject: [PATCH 13/15] [doc] Update model.rst (#11137) --- doc/tutorials/model.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/tutorials/model.rst b/doc/tutorials/model.rst index 97171fc3c437..d4e04262d214 100644 --- a/doc/tutorials/model.rst +++ b/doc/tutorials/model.rst @@ -146,7 +146,7 @@ It remains to ask: which tree do we want at each step? A natural thing is to ad .. math:: - \text{obj}^{(t)} & = \sum_{i=1}^n l(y_i, \hat{y}_i^{(t)}) + \sum_{i=1}^t\omega(f_i) \\ + \text{obj}^{(t)} & = \sum_{i=1}^n l(y_i, \hat{y}_i^{(t)}) + \sum_{k=1}^t\omega(f_k) \\ & = \sum_{i=1}^n l(y_i, \hat{y}_i^{(t-1)} + f_t(x_i)) + \omega(f_t) + \mathrm{constant} If we consider using mean squared error (MSE) as our loss function, the objective becomes From 2183e4950bdc5fe492e9ae34f1342f39c4fa52fd Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 7 Jan 2025 01:16:44 +0800 Subject: [PATCH 14/15] [CI] Fix R error. (#11142) --- .github/workflows/r_tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/r_tests.yml b/.github/workflows/r_tests.yml index b3d5ad5b1f03..43ad372a1e84 100644 --- a/.github/workflows/r_tests.yml +++ b/.github/workflows/r_tests.yml @@ -46,8 +46,8 @@ jobs: uses: actions/cache@v4 with: path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-r-${{ matrix.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }} - restore-keys: ${{ runner.os }}-r-${{ matrix.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }} + key: ${{ runner.os }}-r-${{ matrix.r }}-8-${{ hashFiles('R-package/DESCRIPTION') }} + restore-keys: ${{ runner.os }}-r-${{ matrix.r }}-8-${{ hashFiles('R-package/DESCRIPTION') }} - uses: actions/setup-python@v5 with: python-version: "3.10" From a475327f02862de5c9a8d4710597a764152b9df1 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Tue, 7 Jan 2025 06:46:25 +0100 Subject: [PATCH 15/15] [R] Enable prefetch on windows configure script (#11141) --- R-package/configure.win | 21 +++++++++++++++++++ .../src/{Makevars.win => Makevars.win.in} | 2 ++ ops/script/test_r_package.py | 2 +- 3 files changed, 24 insertions(+), 1 deletion(-) rename R-package/src/{Makevars.win => Makevars.win.in} (98%) diff --git a/R-package/configure.win b/R-package/configure.win index e69de29bb2d1..3a01292ab6a3 100644 --- a/R-package/configure.win +++ b/R-package/configure.win @@ -0,0 +1,21 @@ +R_EXE="${R_HOME}/bin${R_ARCH_BIN}/R.exe" +CXX=`"${R_EXE}" CMD config CXX` + +cat > test.cpp < +int main() { + char data = 0; + const char* address = &data; + _mm_prefetch(address, _MM_HINT_NTA); + return 0; +} +EOL + +XGBOOST_MM_PREFETCH_PRESENT="" +${CXX} -o test test.cpp 2>/dev/null && ./test && XGBOOST_MM_PREFETCH_PRESENT="-DXGBOOST_MM_PREFETCH_PRESENT=1" +rm -f ./test +rm -f ./test.cpp + +sed \ + -e "s/@XGBOOST_MM_PREFETCH_PRESENT@/$XGBOOST_MM_PREFETCH_PRESENT/" \ + < src/Makevars.win.in > src/Makevars.win diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win.in similarity index 98% rename from R-package/src/Makevars.win rename to R-package/src/Makevars.win.in index 875b3aaca630..8a86ba97c34d 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win.in @@ -22,6 +22,8 @@ PKG_CPPFLAGS = \ -I$(PKGROOT)/include \ -I$(PKGROOT)/dmlc-core/include \ -I$(PKGROOT) \ + -DXGBOOST_BUILTIN_PREFETCH_PRESENT=1 \ + @XGBOOST_MM_PREFETCH_PRESENT@ \ $(XGB_RFLAGS) PKG_CXXFLAGS = \ diff --git a/ops/script/test_r_package.py b/ops/script/test_r_package.py index 3ce886c1bc41..ed0d6b1d9faa 100644 --- a/ops/script/test_r_package.py +++ b/ops/script/test_r_package.py @@ -63,7 +63,7 @@ def pkgroot(path: str) -> None: if os.path.exists(osxmakef): os.remove(osxmakef) pkgroot("Makevars.in") - pkgroot("Makevars.win") + pkgroot("Makevars.win.in") # misc rwsp = Path("R-package") / "remove_warning_suppression_pragma.sh" if system() != "Windows":