From b0327574fec24513dc308390640d051981647020 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 6 Dec 2021 17:23:11 -0600 Subject: [PATCH] [R-package] remove Dataset `setinfo()` (#4854) * [R-package] remove Dataset setinfo() * revert unintended docs changes * fix examples * revert FAQ changes * empty commit --- R-package/NAMESPACE | 2 - R-package/R/lgb.Dataset.R | 78 +------------------------ R-package/man/getinfo.Rd | 2 +- R-package/man/setinfo.Rd | 54 ----------------- R-package/tests/testthat/test_dataset.R | 19 +----- docs/FAQ.rst | 7 ++- 6 files changed, 10 insertions(+), 152 deletions(-) delete mode 100644 R-package/man/setinfo.Rd diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 91f752e36dc7..c31c8bb0e74b 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -8,7 +8,6 @@ S3method(getinfo,lgb.Dataset) S3method(predict,lgb.Booster) S3method(print,lgb.Booster) S3method(set_field,lgb.Dataset) -S3method(setinfo,lgb.Dataset) S3method(slice,lgb.Dataset) S3method(summary,lgb.Booster) export(get_field) @@ -39,7 +38,6 @@ export(lightgbm) export(readRDS.lgb.Booster) export(saveRDS.lgb.Booster) export(set_field) -export(setinfo) export(slice) import(methods) importFrom(Matrix,Matrix) diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R index f6e66908bba3..077e27a97460 100644 --- a/R-package/R/lgb.Dataset.R +++ b/R-package/R/lgb.Dataset.R @@ -529,19 +529,6 @@ Dataset <- R6::R6Class( }, - setinfo = function(name, info) { - warning(paste0( - "Dataset$setinfo() is deprecated and will be removed in a future release. " - , "Use Dataset$set_field() instead." - )) - return( - self$set_field( - field_name = name - , data = info - ) - ) - }, - set_field = function(field_name, data) { # Check if attribute key is in the known attribute list @@ -1200,7 +1187,7 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) { #' lgb.Dataset.construct(dtrain) #' #' labels <- lightgbm::getinfo(dtrain, "label") -#' lightgbm::setinfo(dtrain, "label", 1 - labels) +#' lightgbm::set_field(dtrain, "label", 1 - labels) #' #' labels2 <- lightgbm::getinfo(dtrain, "label") #' stopifnot(all(labels2 == 1 - labels)) @@ -1234,69 +1221,6 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) { } -#' @name setinfo -#' @title Set information of an \code{lgb.Dataset} object -#' @description Set one attribute of a \code{lgb.Dataset} -#' @param dataset Object of class \code{lgb.Dataset} -#' @param name the name of the field to get -#' @param info the specific field of information to set -#' @param ... other parameters (ignored) -#' @return the dataset you passed in -#' -#' @details -#' The \code{name} field can be one of the following: -#' -#' \itemize{ -#' \item{\code{label}: vector of labels to use as the target variable} -#' \item{\code{weight}: to do a weight rescale} -#' \item{\code{init_score}: initial score is the base prediction lightgbm will boost from} -#' \item{\code{group}: used for learning-to-rank tasks. An integer vector describing how to -#' group rows together as ordered results from the same set of candidate results to be ranked. -#' For example, if you have a 100-document dataset with \code{group = c(10, 20, 40, 10, 10, 10)}, -#' that means that you have 6 groups, where the first 10 records are in the first group, -#' records 11-30 are in the second group, etc.} -#' } -#' -#' @examples -#' \donttest{ -#' data(agaricus.train, package = "lightgbm") -#' train <- agaricus.train -#' dtrain <- lgb.Dataset(train$data, label = train$label) -#' lgb.Dataset.construct(dtrain) -#' -#' labels <- lightgbm::getinfo(dtrain, "label") -#' lightgbm::setinfo(dtrain, "label", 1 - labels) -#' -#' labels2 <- lightgbm::getinfo(dtrain, "label") -#' stopifnot(all.equal(labels2, 1 - labels)) -#' } -#' @export -setinfo <- function(dataset, ...) { - UseMethod("setinfo") -} - -#' @rdname setinfo -#' @export -setinfo.lgb.Dataset <- function(dataset, name, info, ...) { - - warning("Calling setinfo() on a lgb.Dataset is deprecated. Use set_field() instead.") - - additional_args <- list(...) - if (length(additional_args) > 0L) { - warning(paste0( - "setinfo.lgb.Dataset: Found the following passed through '...': " - , paste(names(additional_args), collapse = ", ") - , ". These are ignored. In future releases of lightgbm, this warning will become an error. " - , "See ?setinfo.lgb.Dataset for documentation on how to call this function." - )) - } - - if (!lgb.is.Dataset(x = dataset)) { - stop("setinfo.lgb.Dataset: input dataset should be an lgb.Dataset object") - } - - return(invisible(dataset$set_field(field_name = name, data = info))) -} #' @name get_field #' @title Get one attribute of a \code{lgb.Dataset} diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd index 33c75e92da21..da73193ecf40 100644 --- a/R-package/man/getinfo.Rd +++ b/R-package/man/getinfo.Rd @@ -44,7 +44,7 @@ dtrain <- lgb.Dataset(train$data, label = train$label) lgb.Dataset.construct(dtrain) labels <- lightgbm::getinfo(dtrain, "label") -lightgbm::setinfo(dtrain, "label", 1 - labels) +lightgbm::set_field(dtrain, "label", 1 - labels) labels2 <- lightgbm::getinfo(dtrain, "label") stopifnot(all(labels2 == 1 - labels)) diff --git a/R-package/man/setinfo.Rd b/R-package/man/setinfo.Rd deleted file mode 100644 index a6918eb6a13a..000000000000 --- a/R-package/man/setinfo.Rd +++ /dev/null @@ -1,54 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lgb.Dataset.R -\name{setinfo} -\alias{setinfo} -\alias{setinfo.lgb.Dataset} -\title{Set information of an \code{lgb.Dataset} object} -\usage{ -setinfo(dataset, ...) - -\method{setinfo}{lgb.Dataset}(dataset, name, info, ...) -} -\arguments{ -\item{dataset}{Object of class \code{lgb.Dataset}} - -\item{...}{other parameters (ignored)} - -\item{name}{the name of the field to get} - -\item{info}{the specific field of information to set} -} -\value{ -the dataset you passed in -} -\description{ -Set one attribute of a \code{lgb.Dataset} -} -\details{ -The \code{name} field can be one of the following: - -\itemize{ - \item{\code{label}: vector of labels to use as the target variable} - \item{\code{weight}: to do a weight rescale} - \item{\code{init_score}: initial score is the base prediction lightgbm will boost from} - \item{\code{group}: used for learning-to-rank tasks. An integer vector describing how to - group rows together as ordered results from the same set of candidate results to be ranked. - For example, if you have a 100-document dataset with \code{group = c(10, 20, 40, 10, 10, 10)}, - that means that you have 6 groups, where the first 10 records are in the first group, - records 11-30 are in the second group, etc.} -} -} -\examples{ -\donttest{ -data(agaricus.train, package = "lightgbm") -train <- agaricus.train -dtrain <- lgb.Dataset(train$data, label = train$label) -lgb.Dataset.construct(dtrain) - -labels <- lightgbm::getinfo(dtrain, "label") -lightgbm::setinfo(dtrain, "label", 1 - labels) - -labels2 <- lightgbm::getinfo(dtrain, "label") -stopifnot(all.equal(labels2, 1 - labels)) -} -} diff --git a/R-package/tests/testthat/test_dataset.R b/R-package/tests/testthat/test_dataset.R index 4207e4f241ca..fc8f5e45f840 100644 --- a/R-package/tests/testthat/test_dataset.R +++ b/R-package/tests/testthat/test_dataset.R @@ -27,11 +27,11 @@ test_that("lgb.Dataset: basic construction, saving, loading", { expect_equal(get_field(dtest1, "label"), get_field(dtest3, "label")) }) -test_that("lgb.Dataset: getinfo & setinfo", { +test_that("lgb.Dataset: getinfo", { dtest <- lgb.Dataset(test_data) dtest$construct() - setinfo(dtest, "label", test_label) + set_field(dtest, "label", test_label) labels <- getinfo(dtest, "label") expect_equal(test_label, getinfo(dtest, "label")) @@ -39,7 +39,7 @@ test_that("lgb.Dataset: getinfo & setinfo", { expect_true(length(getinfo(dtest, "init_score")) == 0L) # any other label should error - expect_error(setinfo(dtest, "asdf", test_label)) + expect_error(set_field(dtest, "asdf", test_label)) }) test_that("lgb.Dataset: get_field & set_field", { @@ -259,19 +259,6 @@ test_that("cpp errors should be raised as proper R errors", { }, regexp = "Initial score size doesn't match data size") }) -test_that("lgb.Dataset$setinfo() should convert 'group' to integer", { - ds <- lgb.Dataset( - data = matrix(rnorm(100L), nrow = 50L, ncol = 2L) - , label = sample(c(0L, 1L), size = 50L, replace = TRUE) - ) - ds$construct() - current_group <- ds$getinfo("group") - expect_null(current_group) - group_as_numeric <- rep(25.0, 2L) - ds$setinfo("group", group_as_numeric) - expect_identical(ds$getinfo("group"), as.integer(group_as_numeric)) -}) - test_that("lgb.Dataset$set_field() should convert 'group' to integer", { ds <- lgb.Dataset( data = matrix(rnorm(100L), nrow = 50L, ncol = 2L) diff --git a/docs/FAQ.rst b/docs/FAQ.rst index dec70eb45312..63c729b829b4 100644 --- a/docs/FAQ.rst +++ b/docs/FAQ.rst @@ -230,8 +230,11 @@ This is a known issue: `Microsoft/LightGBM#698 `__. +As of at least LightGBM v3.3.0, this issue has been resolved and printing a ``Dataset`` object does not cause the console to freeze. + +In older versions, avoid printing the ``Dataset`` after calling ``setinfo()``. + +As of LightGBM v4.0.0, ``setinfo()`` has been replaced by a new method, ``set_field()``. 3. ``error in data.table::data.table()...argument 2 is NULL`` -------------------------------------------------------------