microsoft · StrikerRUS · Mar 17, 2020 · Feb 22, 2020 · Feb 23, 2020 · Mar 10, 2020
@@ -16,28 +16,73 @@ FILES_TO_LINT <- list.files(
     , include.dirs = FALSE
 )
 
+# text to use for pipe operators from packages like 'magrittr'
+pipe_text <- paste0(
+    "For consistency and the sake of being explicit, this project's code "
+    , "does not use the pipe operator."
+)
+
+# text to use for functions that should only be called interactively
+interactive_text <- paste0(
+    "Functions like '?', 'help', and 'install.packages()' should only be used "
+    , "interactively, not in package code."
+)
+
 LINTERS_TO_USE <- list(
-    "assignment" = lintr::assignment_linter
-    , "closed_curly" = lintr::closed_curly_linter
-    , "equals_na" = lintr::equals_na_linter
-    , "function_left" = lintr::function_left_parentheses_linter
-    , "commas" = lintr::commas_linter
-    , "concatenation" = lintr::unneeded_concatenation_linter
-    , "implicit_integers" = lintr::implicit_integer_linter
-    , "infix_spaces" = lintr::infix_spaces_linter
-    , "long_lines" = lintr::line_length_linter(length = 120L)
-    , "tabs" = lintr::no_tab_linter
-    , "open_curly" = lintr::open_curly_linter
-    , "paren_brace_linter" = lintr::paren_brace_linter
-    , "semicolon" = lintr::semicolon_terminator_linter
-    , "seq" = lintr::seq_linter
-    , "single_quotes" = lintr::single_quotes_linter
-    , "spaces_inside" = lintr::spaces_inside_linter
-    , "spaces_left_parens" = lintr::spaces_left_parentheses_linter
-    , "todo_comments" = lintr::todo_comment_linter(c("todo", "fixme", "to-do"))
-    , "trailing_blank" = lintr::trailing_blank_lines_linter
-    , "trailing_white" = lintr::trailing_whitespace_linter
-    , "true_false" = lintr::T_and_F_symbol_linter
+    "absolute_path"          = lintr::absolute_path_linter
+    , "assignment"           = lintr::assignment_linter
+    , "closed_curly"         = lintr::closed_curly_linter
+    , "commas"               = lintr::commas_linter
+    , "equals_na"            = lintr::equals_na_linter
+    , "function_left"        = lintr::function_left_parentheses_linter
+    , "implicit_integers"    = lintr::implicit_integer_linter
+    , "infix_spaces"         = lintr::infix_spaces_linter
+    , "long_lines"           = lintr::line_length_linter(length = 120L)
+    , "no_tabs"              = lintr::no_tab_linter
+    , "non_portable_path"    = lintr::nonportable_path_linter
+    , "open_curly"           = lintr::open_curly_linter
+    , "paren_brace_linter"   = lintr::paren_brace_linter
+    , "semicolon"            = lintr::semicolon_terminator_linter
+    , "seq"                  = lintr::seq_linter
+    , "single_quotes"        = lintr::single_quotes_linter
+    , "spaces_inside"        = lintr::spaces_inside_linter
+    , "spaces_left_parens"   = lintr::spaces_left_parentheses_linter
+    , "todo_comments"        = lintr::todo_comment_linter(c("todo", "fixme", "to-do"))
+    , "trailing_blank"       = lintr::trailing_blank_lines_linter
+    , "trailing_white"       = lintr::trailing_whitespace_linter
+    , "true_false"           = lintr::T_and_F_symbol_linter
+    , "undesirable_function" = lintr::undesirable_function_linter(
+        fun = c(
+            "cbind" = paste0(
+                "cbind is an unsafe way to build up a data frame. merge() or direct "
+                , "column assignment is preferred."
+            )
+            , "dyn.load" = "Directly loading/unloading .dll/.so files in package code should not be necessary."
+            , "dyn.unload" = "Directly loading/unloading .dll/.so files in package code should not be necessary."
+            , "help" = interactive_text
+            , "ifelse" = "The use of ifelse() is dangerous because it will silently allow mixing types."
+            , "install.packages" = interactive_text
+            , "is.list" = paste0(
+                "This project uses data.table, and is.list(x) is TRUE for a data.table. "
+                , "identical(class(x), 'list') is a safer way to check that something is an R list object."
+            )
+            , "rbind" = "data.table::rbindlist() is faster and safer than rbind(), and is preferred in this project."
+            , "require" = paste0(
+                "library() is preferred to require() because it will raise an error immediately "
+                , "if a package is missing."
+            )
+        )
+    )
+    , "undesirable_operator" = lintr::undesirable_operator_linter(
+        op = c(
+            "%>%" = pipe_text
+            , "%.%" = pipe_text
+            , "%..%" = pipe_text
+            , "?" = interactive_text
+            , "??" = interactive_text
+        )
+    )
+    , "unneeded_concatenation" = lintr::unneeded_concatenation_linter
 )
 
 cat(sprintf("Found %i R files to lint\n", length(FILES_TO_LINT)))
@@ -52,11 +97,14 @@ for (r_file in FILES_TO_LINT) {
         , cache = FALSE
     )
 
-    cat(sprintf(
-        "Found %i linting errors in %s\n"
-        , length(this_result)
-        , r_file
-    ))
+    print(
+        sprintf(
+            "Found %i linting errors in %s"
+            , length(this_result)
+            , r_file
+        )
+        , quote = FALSE
+    )
 
     results <- c(results, this_result)
 
@@ -65,7 +113,6 @@ for (r_file in FILES_TO_LINT) {
 issues_found <- length(results)
 
 if (issues_found > 0L) {
-    cat("\n")
     print(results)
 }
 

@@ -18,7 +18,7 @@ CB_ENV <- R6::R6Class(
 cb.reset.parameters <- function(new_params) {
 
   # Check for parameter list
-  if (!is.list(new_params)) {
+  if (!identical(class(new_params), "list")) {
     stop(sQuote("new_params"), " must be a list")
   }
 

@@ -892,7 +892,7 @@ dimnames.lgb.Dataset <- function(x) {
 `dimnames<-.lgb.Dataset` <- function(x, value) {
 
   # Check if invalid element list
-  if (!is.list(value) || length(value) != 2L) {
+  if (!identical(class(value), "list") || length(value) != 2L) {
     stop("invalid ", sQuote("value"), " given: must be a list of two elements")
   }
 

@@ -178,7 +178,7 @@ lgb.cv <- function(params = list()
   if (!is.null(folds)) {
 
     # Check for list of folds or for single value
-    if (!is.list(folds) || length(folds) < 2L) {
+    if (!identical(class(folds), "list") || length(folds) < 2L) {
       stop(sQuote("folds"), " must be a list with 2 or more elements that are vectors of indices for each CV-fold")
     }
 

@@ -125,13 +125,15 @@ multiple.tree.plot.interpretation <- function(tree_interpretation,
     cex <- 2.5 / log2(1.0 + top_n)
   }
 
-  # Do plot
+  # create plot
+  tree_interpretation[Contribution > 0.0, bar_color := "firebrick"]
+  tree_interpretation[Contribution == 0.0, bar_color := "steelblue"]
   tree_interpretation[.N:1L,
                       graphics::barplot(
                           height = Contribution
                           , names.arg = Feature
                           , horiz = TRUE
-                          , col = ifelse(Contribution > 0L, "firebrick", "steelblue")
+                          , col = bar_color
                           , border = NA
                           , main = title
                           , cex.names = cex

@@ -73,7 +73,7 @@ lgb.train <- function(params = list(),
     stop("lgb.train: data must be an lgb.Dataset instance")
   }
   if (length(valids) > 0L) {
-    if (!is.list(valids) || !all(vapply(valids, lgb.is.Dataset, logical(1L)))) {
+    if (!identical(class(valids), "list") || !all(vapply(valids, lgb.is.Dataset, logical(1L)))) {
       stop("lgb.train: valids must be a list of lgb.Dataset elements")
     }
     evnames <- names(valids)

@@ -105,7 +105,7 @@ lgb.call.return.str <- function(fun_name, ...) {
 lgb.params2str <- function(params, ...) {
 
   # Check for a list as input
-  if (!is.list(params)) {
+  if (!identical(class(params), "list")) {
     stop("params must be a list")
   }
 
@@ -254,7 +254,7 @@ lgb.check.eval <- function(params, eval) {
   }
 
   # If 'eval' is a list or character vector, store it in 'metric'
-  if (is.character(eval) || is.list(eval)) {
+  if (is.character(eval) || identical(class(eval), "list")) {
     params$metric <- append(params$metric, eval)
   }
 

@@ -1,5 +1,5 @@
-require(lightgbm)
-require(methods)
+library(lightgbm)
+library(methods)
 
 # We load in the agaricus dataset
 # In this example, we are aiming to predict whether a mushroom is edible

@@ -1,5 +1,5 @@
-require(lightgbm)
-require(methods)
+library(lightgbm)
+library(methods)
 
 # Load in the agaricus dataset
 data(agaricus.train, package = "lightgbm")

@@ -1,4 +1,5 @@
-require(lightgbm)
+library(lightgbm)
+
 # load in the agaricus dataset
 data(agaricus.train, package = "lightgbm")
 data(agaricus.test, package = "lightgbm")

@@ -1,5 +1,5 @@
-require(lightgbm)
-require(methods)
+library(lightgbm)
+library(methods)
 
 # Load in the agaricus dataset
 data(agaricus.train, package = "lightgbm")

@@ -1,4 +1,4 @@
-require(lightgbm)
+library(lightgbm)
 
 # We load the default iris dataset shipped with R
 data(iris)

@@ -1,4 +1,4 @@
-require(lightgbm)
+library(lightgbm)
 
 # We load the default iris dataset shipped with R
 data(iris)
@@ -43,16 +43,25 @@ probs_builtin <- exp(preds_builtin) / rowSums(exp(preds_builtin))
 custom_multiclass_obj <- function(preds, dtrain) {
     labels <- getinfo(dtrain, "label")
 
-    # preds is a matrix with rows corresponding to samples and colums corresponding to choices
+    # preds is a matrix with rows corresponding to samples and columns corresponding to choices
     preds <- matrix(preds, nrow = length(labels))
 
     # to prevent overflow, normalize preds by row
-    preds <- preds - apply(preds, 1L, max)
+    preds <- preds - apply(preds, MARGIN = 1L, max)
     prob <- exp(preds) / rowSums(exp(preds))
 
     # compute gradient
     grad <- prob
-    grad[cbind(seq_len(length(labels)), labels + 1L)] <- grad[cbind(seq_len(length(labels)), labels + 1L)] - 1L
+    subset_index <- as.matrix(
+        data.frame(
+            seq_len(length(labels))
+            , labels + 1L
+            , fix.empty.names = FALSE
+        )
+        , nrow = length(labels)
+        , dimnames = NULL
+    )
+    grad[subset_index] <- grad[subset_index] - 1L
 
     # compute hessian (approximation)
     hess <- 2.0 * prob * (1.0 - prob)
@@ -67,9 +76,18 @@ custom_multiclass_metric <- function(preds, dtrain) {
     preds <- preds - apply(preds, 1L, max)
     prob <- exp(preds) / rowSums(exp(preds))
 
+    subset_index <- as.matrix(
+        data.frame(
+            seq_len(length(labels))
+            , labels + 1L
+            , fix.empty.names = FALSE
+        )
+        , nrow = length(labels)
+        , dimnames = NULL
+    )
     return(list(
         name = "error"
-        , value = -mean(log(prob[cbind(seq_len(length(labels)), labels + 1L)]))
+        , value = -mean(log(prob[subset_index]))
         , higher_better = FALSE
     ))
 }

@@ -1,5 +1,5 @@
-require(lightgbm)
-require(Matrix)
+library(lightgbm)
+library(Matrix)
 
 context("testing lgb.Dataset functionality")
 
@@ -140,7 +140,7 @@ test_that("Dataset$get_params() successfully returns parameters if you passed th
     , params = params
   )
   returned_params <- ds$get_params()
-  expect_true(methods::is(returned_params, "list"))
+  expect_identical(class(returned_params), "list")
   expect_identical(length(params), length(returned_params))
   expect_identical(sort(names(params)), sort(names(returned_params)))
   for (param_name in names(params)) {

@@ -40,7 +40,7 @@ test_that("lgb.intereprete works as expected for binary classification", {
         , data = test$data
         , idxset = seq_len(num_trees)
     )
-    expect_true(methods::is(tree_interpretation, "list"))
+    expect_identical(class(tree_interpretation), "list")
     expect_true(length(tree_interpretation) == num_trees)
     expect_null(names(tree_interpretation))
     expect_true(all(
@@ -91,7 +91,7 @@ test_that("lgb.intereprete works as expected for multiclass classification", {
         , data = test[, 1L:4L]
         , idxset = seq_len(num_trees)
     )
-    expect_true(methods::is(tree_interpretation, "list"))
+    expect_identical(class(tree_interpretation), "list")
     expect_true(length(tree_interpretation) == num_trees)
     expect_null(names(tree_interpretation))
     expect_true(all(

@@ -48,7 +48,7 @@ context("parameter aliases")
 
 test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where names are unique", {
   param_aliases <- .PARAMETER_ALIASES()
-  expect_true(is.list(param_aliases))
+  expect_identical(class(param_aliases), "list")
   expect_true(is.character(names(param_aliases)))
   expect_true(is.character(param_aliases[["boosting"]]))
   expect_true(is.character(param_aliases[["early_stopping_round"]]))