diff --git a/.ci/lint_r_code.R b/.ci/lint_r_code.R index ef54fa929fbb..ce4bad696dc6 100755 --- a/.ci/lint_r_code.R +++ b/.ci/lint_r_code.R @@ -16,28 +16,73 @@ FILES_TO_LINT <- list.files( , include.dirs = FALSE ) +# text to use for pipe operators from packages like 'magrittr' +pipe_text <- paste0( + "For consistency and the sake of being explicit, this project's code " + , "does not use the pipe operator." +) + +# text to use for functions that should only be called interactively +interactive_text <- paste0( + "Functions like '?', 'help', and 'install.packages()' should only be used " + , "interactively, not in package code." +) + LINTERS_TO_USE <- list( - "assignment" = lintr::assignment_linter - , "closed_curly" = lintr::closed_curly_linter - , "equals_na" = lintr::equals_na_linter - , "function_left" = lintr::function_left_parentheses_linter - , "commas" = lintr::commas_linter - , "concatenation" = lintr::unneeded_concatenation_linter - , "implicit_integers" = lintr::implicit_integer_linter - , "infix_spaces" = lintr::infix_spaces_linter - , "long_lines" = lintr::line_length_linter(length = 120L) - , "tabs" = lintr::no_tab_linter - , "open_curly" = lintr::open_curly_linter - , "paren_brace_linter" = lintr::paren_brace_linter - , "semicolon" = lintr::semicolon_terminator_linter - , "seq" = lintr::seq_linter - , "single_quotes" = lintr::single_quotes_linter - , "spaces_inside" = lintr::spaces_inside_linter - , "spaces_left_parens" = lintr::spaces_left_parentheses_linter - , "todo_comments" = lintr::todo_comment_linter(c("todo", "fixme", "to-do")) - , "trailing_blank" = lintr::trailing_blank_lines_linter - , "trailing_white" = lintr::trailing_whitespace_linter - , "true_false" = lintr::T_and_F_symbol_linter + "absolute_path" = lintr::absolute_path_linter + , "assignment" = lintr::assignment_linter + , "closed_curly" = lintr::closed_curly_linter + , "commas" = lintr::commas_linter + , "equals_na" = lintr::equals_na_linter + , "function_left" = lintr::function_left_parentheses_linter + , "implicit_integers" = lintr::implicit_integer_linter + , "infix_spaces" = lintr::infix_spaces_linter + , "long_lines" = lintr::line_length_linter(length = 120L) + , "no_tabs" = lintr::no_tab_linter + , "non_portable_path" = lintr::nonportable_path_linter + , "open_curly" = lintr::open_curly_linter + , "paren_brace_linter" = lintr::paren_brace_linter + , "semicolon" = lintr::semicolon_terminator_linter + , "seq" = lintr::seq_linter + , "single_quotes" = lintr::single_quotes_linter + , "spaces_inside" = lintr::spaces_inside_linter + , "spaces_left_parens" = lintr::spaces_left_parentheses_linter + , "todo_comments" = lintr::todo_comment_linter(c("todo", "fixme", "to-do")) + , "trailing_blank" = lintr::trailing_blank_lines_linter + , "trailing_white" = lintr::trailing_whitespace_linter + , "true_false" = lintr::T_and_F_symbol_linter + , "undesirable_function" = lintr::undesirable_function_linter( + fun = c( + "cbind" = paste0( + "cbind is an unsafe way to build up a data frame. merge() or direct " + , "column assignment is preferred." + ) + , "dyn.load" = "Directly loading/unloading .dll/.so files in package code should not be necessary." + , "dyn.unload" = "Directly loading/unloading .dll/.so files in package code should not be necessary." + , "help" = interactive_text + , "ifelse" = "The use of ifelse() is dangerous because it will silently allow mixing types." + , "install.packages" = interactive_text + , "is.list" = paste0( + "This project uses data.table, and is.list(x) is TRUE for a data.table. " + , "identical(class(x), 'list') is a safer way to check that something is an R list object." + ) + , "rbind" = "data.table::rbindlist() is faster and safer than rbind(), and is preferred in this project." + , "require" = paste0( + "library() is preferred to require() because it will raise an error immediately " + , "if a package is missing." + ) + ) + ) + , "undesirable_operator" = lintr::undesirable_operator_linter( + op = c( + "%>%" = pipe_text + , "%.%" = pipe_text + , "%..%" = pipe_text + , "?" = interactive_text + , "??" = interactive_text + ) + ) + , "unneeded_concatenation" = lintr::unneeded_concatenation_linter ) cat(sprintf("Found %i R files to lint\n", length(FILES_TO_LINT))) @@ -52,11 +97,14 @@ for (r_file in FILES_TO_LINT) { , cache = FALSE ) - cat(sprintf( - "Found %i linting errors in %s\n" - , length(this_result) - , r_file - )) + print( + sprintf( + "Found %i linting errors in %s" + , length(this_result) + , r_file + ) + , quote = FALSE + ) results <- c(results, this_result) @@ -65,7 +113,6 @@ for (r_file in FILES_TO_LINT) { issues_found <- length(results) if (issues_found > 0L) { - cat("\n") print(results) } diff --git a/R-package/R/callback.R b/R-package/R/callback.R index c159732032d4..3c8bb243783b 100644 --- a/R-package/R/callback.R +++ b/R-package/R/callback.R @@ -18,7 +18,7 @@ CB_ENV <- R6::R6Class( cb.reset.parameters <- function(new_params) { # Check for parameter list - if (!is.list(new_params)) { + if (!identical(class(new_params), "list")) { stop(sQuote("new_params"), " must be a list") } diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R index 964f1a8007be..c361a6c423c3 100644 --- a/R-package/R/lgb.Dataset.R +++ b/R-package/R/lgb.Dataset.R @@ -892,7 +892,7 @@ dimnames.lgb.Dataset <- function(x) { `dimnames<-.lgb.Dataset` <- function(x, value) { # Check if invalid element list - if (!is.list(value) || length(value) != 2L) { + if (!identical(class(value), "list") || length(value) != 2L) { stop("invalid ", sQuote("value"), " given: must be a list of two elements") } diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index 7561cb05a866..3433aade6594 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -178,7 +178,7 @@ lgb.cv <- function(params = list() if (!is.null(folds)) { # Check for list of folds or for single value - if (!is.list(folds) || length(folds) < 2L) { + if (!identical(class(folds), "list") || length(folds) < 2L) { stop(sQuote("folds"), " must be a list with 2 or more elements that are vectors of indices for each CV-fold") } diff --git a/R-package/R/lgb.plot.interpretation.R b/R-package/R/lgb.plot.interpretation.R index 9e239192a8fe..2914ddf94f97 100644 --- a/R-package/R/lgb.plot.interpretation.R +++ b/R-package/R/lgb.plot.interpretation.R @@ -125,13 +125,15 @@ multiple.tree.plot.interpretation <- function(tree_interpretation, cex <- 2.5 / log2(1.0 + top_n) } - # Do plot + # create plot + tree_interpretation[Contribution > 0.0, bar_color := "firebrick"] + tree_interpretation[Contribution == 0.0, bar_color := "steelblue"] tree_interpretation[.N:1L, graphics::barplot( height = Contribution , names.arg = Feature , horiz = TRUE - , col = ifelse(Contribution > 0L, "firebrick", "steelblue") + , col = bar_color , border = NA , main = title , cex.names = cex diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index 73b6f19b160e..d0dacecc0bd1 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -73,7 +73,7 @@ lgb.train <- function(params = list(), stop("lgb.train: data must be an lgb.Dataset instance") } if (length(valids) > 0L) { - if (!is.list(valids) || !all(vapply(valids, lgb.is.Dataset, logical(1L)))) { + if (!identical(class(valids), "list") || !all(vapply(valids, lgb.is.Dataset, logical(1L)))) { stop("lgb.train: valids must be a list of lgb.Dataset elements") } evnames <- names(valids) diff --git a/R-package/R/utils.R b/R-package/R/utils.R index b7312da8661f..b5a0b73109ac 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -105,7 +105,7 @@ lgb.call.return.str <- function(fun_name, ...) { lgb.params2str <- function(params, ...) { # Check for a list as input - if (!is.list(params)) { + if (!identical(class(params), "list")) { stop("params must be a list") } @@ -254,7 +254,7 @@ lgb.check.eval <- function(params, eval) { } # If 'eval' is a list or character vector, store it in 'metric' - if (is.character(eval) || is.list(eval)) { + if (is.character(eval) || identical(class(eval), "list")) { params$metric <- append(params$metric, eval) } diff --git a/R-package/demo/basic_walkthrough.R b/R-package/demo/basic_walkthrough.R index d241bd858f5d..6716bb894840 100644 --- a/R-package/demo/basic_walkthrough.R +++ b/R-package/demo/basic_walkthrough.R @@ -1,5 +1,5 @@ -require(lightgbm) -require(methods) +library(lightgbm) +library(methods) # We load in the agaricus dataset # In this example, we are aiming to predict whether a mushroom is edible diff --git a/R-package/demo/boost_from_prediction.R b/R-package/demo/boost_from_prediction.R index d96113b12861..457561cd5f70 100644 --- a/R-package/demo/boost_from_prediction.R +++ b/R-package/demo/boost_from_prediction.R @@ -1,5 +1,5 @@ -require(lightgbm) -require(methods) +library(lightgbm) +library(methods) # Load in the agaricus dataset data(agaricus.train, package = "lightgbm") diff --git a/R-package/demo/cross_validation.R b/R-package/demo/cross_validation.R index 5ae0503038f4..f685b520822d 100644 --- a/R-package/demo/cross_validation.R +++ b/R-package/demo/cross_validation.R @@ -1,4 +1,5 @@ -require(lightgbm) +library(lightgbm) + # load in the agaricus dataset data(agaricus.train, package = "lightgbm") data(agaricus.test, package = "lightgbm") diff --git a/R-package/demo/early_stopping.R b/R-package/demo/early_stopping.R index 4108c51103b2..f68b82dc3dcb 100644 --- a/R-package/demo/early_stopping.R +++ b/R-package/demo/early_stopping.R @@ -1,5 +1,5 @@ -require(lightgbm) -require(methods) +library(lightgbm) +library(methods) # Load in the agaricus dataset data(agaricus.train, package = "lightgbm") diff --git a/R-package/demo/multiclass.R b/R-package/demo/multiclass.R index 6aeff9b95787..00b49e83f6de 100644 --- a/R-package/demo/multiclass.R +++ b/R-package/demo/multiclass.R @@ -1,4 +1,4 @@ -require(lightgbm) +library(lightgbm) # We load the default iris dataset shipped with R data(iris) diff --git a/R-package/demo/multiclass_custom_objective.R b/R-package/demo/multiclass_custom_objective.R index fee8f42ef28d..ec2ed90cdf64 100644 --- a/R-package/demo/multiclass_custom_objective.R +++ b/R-package/demo/multiclass_custom_objective.R @@ -1,4 +1,4 @@ -require(lightgbm) +library(lightgbm) # We load the default iris dataset shipped with R data(iris) @@ -43,16 +43,25 @@ probs_builtin <- exp(preds_builtin) / rowSums(exp(preds_builtin)) custom_multiclass_obj <- function(preds, dtrain) { labels <- getinfo(dtrain, "label") - # preds is a matrix with rows corresponding to samples and colums corresponding to choices + # preds is a matrix with rows corresponding to samples and columns corresponding to choices preds <- matrix(preds, nrow = length(labels)) # to prevent overflow, normalize preds by row - preds <- preds - apply(preds, 1L, max) + preds <- preds - apply(preds, MARGIN = 1L, max) prob <- exp(preds) / rowSums(exp(preds)) # compute gradient grad <- prob - grad[cbind(seq_len(length(labels)), labels + 1L)] <- grad[cbind(seq_len(length(labels)), labels + 1L)] - 1L + subset_index <- as.matrix( + data.frame( + seq_len(length(labels)) + , labels + 1L + , fix.empty.names = FALSE + ) + , nrow = length(labels) + , dimnames = NULL + ) + grad[subset_index] <- grad[subset_index] - 1L # compute hessian (approximation) hess <- 2.0 * prob * (1.0 - prob) @@ -67,9 +76,18 @@ custom_multiclass_metric <- function(preds, dtrain) { preds <- preds - apply(preds, 1L, max) prob <- exp(preds) / rowSums(exp(preds)) + subset_index <- as.matrix( + data.frame( + seq_len(length(labels)) + , labels + 1L + , fix.empty.names = FALSE + ) + , nrow = length(labels) + , dimnames = NULL + ) return(list( name = "error" - , value = -mean(log(prob[cbind(seq_len(length(labels)), labels + 1L)])) + , value = -mean(log(prob[subset_index])) , higher_better = FALSE )) } diff --git a/R-package/tests/testthat/test_dataset.R b/R-package/tests/testthat/test_dataset.R index 7e8d4b79344a..f8e26b269d1e 100644 --- a/R-package/tests/testthat/test_dataset.R +++ b/R-package/tests/testthat/test_dataset.R @@ -1,5 +1,5 @@ -require(lightgbm) -require(Matrix) +library(lightgbm) +library(Matrix) context("testing lgb.Dataset functionality") @@ -140,7 +140,7 @@ test_that("Dataset$get_params() successfully returns parameters if you passed th , params = params ) returned_params <- ds$get_params() - expect_true(methods::is(returned_params, "list")) + expect_identical(class(returned_params), "list") expect_identical(length(params), length(returned_params)) expect_identical(sort(names(params)), sort(names(returned_params))) for (param_name in names(params)) { diff --git a/R-package/tests/testthat/test_lgb.interprete.R b/R-package/tests/testthat/test_lgb.interprete.R index e4664710d9cb..b73cfdb8bf9e 100644 --- a/R-package/tests/testthat/test_lgb.interprete.R +++ b/R-package/tests/testthat/test_lgb.interprete.R @@ -40,7 +40,7 @@ test_that("lgb.intereprete works as expected for binary classification", { , data = test$data , idxset = seq_len(num_trees) ) - expect_true(methods::is(tree_interpretation, "list")) + expect_identical(class(tree_interpretation), "list") expect_true(length(tree_interpretation) == num_trees) expect_null(names(tree_interpretation)) expect_true(all( @@ -91,7 +91,7 @@ test_that("lgb.intereprete works as expected for multiclass classification", { , data = test[, 1L:4L] , idxset = seq_len(num_trees) ) - expect_true(methods::is(tree_interpretation, "list")) + expect_identical(class(tree_interpretation), "list") expect_true(length(tree_interpretation) == num_trees) expect_null(names(tree_interpretation)) expect_true(all( diff --git a/R-package/tests/testthat/test_parameters.R b/R-package/tests/testthat/test_parameters.R index 39a13440c4d4..dc14f9a99fce 100644 --- a/R-package/tests/testthat/test_parameters.R +++ b/R-package/tests/testthat/test_parameters.R @@ -48,7 +48,7 @@ context("parameter aliases") test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where names are unique", { param_aliases <- .PARAMETER_ALIASES() - expect_true(is.list(param_aliases)) + expect_identical(class(param_aliases), "list") expect_true(is.character(names(param_aliases))) expect_true(is.character(param_aliases[["boosting"]])) expect_true(is.character(param_aliases[["early_stopping_round"]]))