Merge branch 'master' into qemu_aarch64

microsoft · Feb 16, 2021 · f4d1b0d · f4d1b0d
2 parents 9c3c640 + 4ae5949
commit f4d1b0d
Show file tree

Hide file tree

Showing 34 changed files with 131 additions and 107 deletions.
diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R
@@ -908,7 +908,6 @@ dimnames.lgb.Dataset <- function(x) {
 }
 
 #' @rdname dimnames.lgb.Dataset
-#' @return A list with the dimension names of the dataset
 #' @export
 `dimnames<-.lgb.Dataset` <- function(x, value) {
 
@@ -1024,7 +1023,6 @@ getinfo <- function(dataset, ...) {
 }
 
 #' @rdname getinfo
-#' @return info data
 #' @export
 getinfo.lgb.Dataset <- function(dataset, name, ...) {
 
@@ -1079,7 +1077,6 @@ setinfo <- function(dataset, ...) {
 }
 
 #' @rdname setinfo
-#' @return the dataset you passed in
 #' @export
 setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
 

diff --git a/R-package/man/dimnames.lgb.Dataset.Rd b/R-package/man/dimnames.lgb.Dataset.Rd
diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd
diff --git a/R-package/man/setinfo.Rd b/R-package/man/setinfo.Rd
diff --git a/README.md b/README.md
@@ -93,6 +93,8 @@ Ruby gem: https://github.com/ankane/lightgbm
 
 LightGBM4j (Java high-level binding): https://github.com/metarank/lightgbm4j
 
+lightgbm-rs (Rust binding): https://github.com/vaaaaanquish/lightgbm-rs
+
 MLflow (experiment tracking, model monitoring framework): https://github.com/mlflow/mlflow
 
 `{treesnip}` (R `{parsnip}`-compliant interface): https://github.com/curso-r/treesnip

diff --git a/docs/Parameters.rst b/docs/Parameters.rst
@@ -139,6 +139,8 @@ Core Parameters
 
       -  **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM
 
+      -  **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves
+
 -  ``data`` :raw-html:`<a id="data" title="Permalink to this parameter" href="#data">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``train_data_file``, ``data_filename``
 
    -  path of training data, LightGBM will train from this data

diff --git a/docs/conf.py b/docs/conf.py
@@ -20,16 +20,16 @@
 import datetime
 import os
 import sys
-import sphinx
-
 from distutils.dir_util import copy_tree
+from re import compile
+from subprocess import PIPE, Popen
+from unittest.mock import Mock
+
+import sphinx
 from docutils.nodes import reference
 from docutils.parsers.rst import Directive
 from docutils.transforms import Transform
-from re import compile
 from sphinx.errors import VersionRequirementError
-from subprocess import PIPE, Popen
-from unittest.mock import Mock
 
 CURR_PATH = os.path.abspath(os.path.dirname(__file__))
 LIB_PATH = os.path.join(CURR_PATH, os.path.pardir, 'python-package')

diff --git a/examples/python-guide/advanced_example.py b/examples/python-guide/advanced_example.py
@@ -1,11 +1,12 @@
 # coding: utf-8
 import json
 import pickle
-import lightgbm as lgb
-import pandas as pd
+
 import numpy as np
+import pandas as pd
 from sklearn.metrics import mean_squared_error
 
+import lightgbm as lgb
 
 print('Loading data...')
 # load or create your dataset

diff --git a/examples/python-guide/logistic_regression.py b/examples/python-guide/logistic_regression.py
@@ -13,11 +13,12 @@
 
 import time
 
-import lightgbm as lgb
 import numpy as np
 import pandas as pd
 from scipy.special import expit
 
+import lightgbm as lgb
+
 #################
 # Simulate some binary data with a single categorical and
 #   single continuous predictor

diff --git a/examples/python-guide/plot_example.py b/examples/python-guide/plot_example.py
@@ -1,7 +1,8 @@
 # coding: utf-8
-import lightgbm as lgb
 import pandas as pd
 
+import lightgbm as lgb
+
 if lgb.compat.MATPLOTLIB_INSTALLED:
     import matplotlib.pyplot as plt
 else:

diff --git a/examples/python-guide/simple_example.py b/examples/python-guide/simple_example.py
@@ -1,8 +1,9 @@
 # coding: utf-8
-import lightgbm as lgb
 import pandas as pd
 from sklearn.metrics import mean_squared_error
 
+import lightgbm as lgb
+
 print('Loading data...')
 # load or create your dataset
 df_train = pd.read_csv('../regression/regression.train', header=None, sep='\t')

diff --git a/examples/python-guide/sklearn_example.py b/examples/python-guide/sklearn_example.py
@@ -1,11 +1,11 @@
 # coding: utf-8
 import numpy as np
 import pandas as pd
-import lightgbm as lgb
-
 from sklearn.metrics import mean_squared_error
 from sklearn.model_selection import GridSearchCV
 
+import lightgbm as lgb
+
 print('Loading data...')
 # load or create your dataset
 df_train = pd.read_csv('../regression/regression.train', header=None, sep='\t')

diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
@@ -158,6 +158,7 @@ struct Config {
   // descl2 = **Note**: only works with CPU and ``serial`` tree learner
   // descl2 = **Note**: ``regression_l1`` objective is not supported with linear tree boosting
   // descl2 = **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM
+  // descl2 = **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves
   bool linear_tree = false;
 
   // alias = train, train_data, train_data_file, data_filename

diff --git a/include/LightGBM/utils/openmp_wrapper.h b/include/LightGBM/utils/openmp_wrapper.h
@@ -4,6 +4,7 @@
  */
 #ifndef LIGHTGBM_OPENMP_WRAPPER_H_
 #define LIGHTGBM_OPENMP_WRAPPER_H_
+
 #ifdef _OPENMP
 
 #include <LightGBM/utils/log.h>
@@ -66,6 +67,22 @@ class ThreadExceptionHelper {
 
 #else
 
+/*
+ * To be compatible with openmp, define a nothrow macro which is used by gcc
+ * openmp, but not by clang.
+ * See also https://github.com/dmlc/dmlc-core/blob/3106c1cbdcc9fc9ef3a2c1d2196a7a6f6616c13d/include/dmlc/omp.h#L14
+ */
+#if defined(__clang__)
+#undef __GOMP_NOTHROW
+#define __GOMP_NOTHROW
+#elif defined(__cplusplus)
+#undef __GOMP_NOTHROW
+#define __GOMP_NOTHROW throw()
+#else
+#undef __GOMP_NOTHROW
+#define __GOMP_NOTHROW __attribute__((__nothrow__))
+#endif
+
 #ifdef _MSC_VER
   #pragma warning(disable : 4068)  // disable unknown pragma warning
 #endif
@@ -76,11 +93,11 @@ class ThreadExceptionHelper {
   /** Fall here if no OPENMP support, so just
       simulate a single thread running.
       All #pragma omp should be ignored by the compiler **/
-  inline void omp_set_num_threads(int) {}
-  inline int omp_get_num_threads() {return 1;}
-  inline int omp_get_max_threads() {return 1;}
-  inline int omp_get_thread_num() {return 0;}
-  inline int OMP_NUM_THREADS() { return 1; }
+  inline void omp_set_num_threads(int) __GOMP_NOTHROW {}  // NOLINT (no cast done here)
+  inline int omp_get_num_threads() __GOMP_NOTHROW {return 1;}
+  inline int omp_get_max_threads() __GOMP_NOTHROW {return 1;}
+  inline int omp_get_thread_num() __GOMP_NOTHROW {return 0;}
+  inline int OMP_NUM_THREADS() __GOMP_NOTHROW { return 1; }
 #ifdef __cplusplus
 }  // extern "C"
 #endif

diff --git a/include/LightGBM/utils/threading.h b/include/LightGBM/utils/threading.h
@@ -78,7 +78,9 @@ class Threading {
       OMP_LOOP_EX_BEGIN();
       INDEX_T inner_start = start + num_inner * i;
       INDEX_T inner_end = std::min(end, inner_start + num_inner);
-      inner_fun(i, inner_start, inner_end);
+      if (inner_start < inner_end) {
+          inner_fun(i, inner_start, inner_end);
+      }
       OMP_LOOP_EX_END();
     }
     OMP_THROW_EX();

diff --git a/python-package/lightgbm/__init__.py b/python-package/lightgbm/__init__.py
@@ -3,24 +3,24 @@
 
 Contributors: https://github.com/microsoft/LightGBM/graphs/contributors.
 """
+import os
+
 from .basic import Booster, Dataset, register_logger
 from .callback import (early_stopping, print_evaluation, record_evaluation,
                        reset_parameter)
-from .engine import cv, train, CVBooster
-
-import os
+from .engine import CVBooster, cv, train
 
 try:
-    from .sklearn import LGBMModel, LGBMRegressor, LGBMClassifier, LGBMRanker
+    from .sklearn import LGBMClassifier, LGBMModel, LGBMRanker, LGBMRegressor
 except ImportError:
     pass
 try:
-    from .plotting import (plot_importance, plot_split_value_histogram, plot_metric,
-                           plot_tree, create_tree_digraph)
+    from .plotting import (create_tree_digraph, plot_importance, plot_metric,
+                           plot_split_value_histogram, plot_tree)
 except ImportError:
     pass
 try:
-    from .dask import DaskLGBMRegressor, DaskLGBMClassifier, DaskLGBMRanker
+    from .dask import DaskLGBMClassifier, DaskLGBMRanker, DaskLGBMRegressor
 except ImportError:
     pass
 

diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
@@ -14,7 +14,8 @@
 import numpy as np
 import scipy.sparse
 
-from .compat import PANDAS_INSTALLED, pd_DataFrame, pd_Series, concat, is_dtype_sparse, dt_DataTable
+from .compat import (PANDAS_INSTALLED, concat, dt_DataTable, is_dtype_sparse,
+                     pd_DataFrame, pd_Series)
 from .libpath import find_lib_path
 
 

diff --git a/python-package/lightgbm/compat.py b/python-package/lightgbm/compat.py
@@ -3,9 +3,9 @@
 
 """pandas"""
 try:
-    from pandas import concat
-    from pandas import Series as pd_Series
     from pandas import DataFrame as pd_DataFrame
+    from pandas import Series as pd_Series
+    from pandas import concat
     from pandas.api.types import is_sparse as is_dtype_sparse
     PANDAS_INSTALLED = True
 except ImportError:
@@ -57,17 +57,17 @@ class dt_DataTable:
 
 """sklearn"""
 try:
-    from sklearn.base import BaseEstimator
-    from sklearn.base import RegressorMixin, ClassifierMixin
+    from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
     from sklearn.preprocessing import LabelEncoder
     from sklearn.utils.class_weight import compute_sample_weight
     from sklearn.utils.multiclass import check_classification_targets
-    from sklearn.utils.validation import assert_all_finite, check_X_y, check_array
+    from sklearn.utils.validation import (assert_all_finite, check_array,
+                                          check_X_y)
     try:
-        from sklearn.model_selection import StratifiedKFold, GroupKFold
         from sklearn.exceptions import NotFittedError
+        from sklearn.model_selection import GroupKFold, StratifiedKFold
     except ImportError:
-        from sklearn.cross_validation import StratifiedKFold, GroupKFold
+        from sklearn.cross_validation import GroupKFold, StratifiedKFold
         from sklearn.utils.validation import NotFittedError
     try:
         from sklearn.utils.validation import _check_sample_weight

diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py
@@ -9,24 +9,20 @@
 import socket
 from collections import defaultdict
 from copy import deepcopy
-from typing import Any, Callable, Dict, Iterable, List, Optional, Type, Union
+from typing import Any, Callable, Dict, Iterable, List, Optional, Type, Union, Set
 from urllib.parse import urlparse
 
 import numpy as np
 import scipy.sparse as ss
 
-from .basic import _choose_param_value, _ConfigAliases, _LIB, _log_warning, _safe_call, LightGBMError
-from .compat import (PANDAS_INSTALLED, pd_DataFrame, pd_Series, concat,
-                     SKLEARN_INSTALLED, LGBMNotFittedError,
-                     DASK_INSTALLED, dask_DataFrame, dask_Array, dask_Series, delayed, Client, default_client, get_worker, wait)
-from .sklearn import (
-    _lgbmmodel_doc_fit,
-    _lgbmmodel_doc_predict,
-    LGBMClassifier,
-    LGBMModel,
-    LGBMRegressor,
-    LGBMRanker
-)
+from .basic import (_LIB, LightGBMError, _choose_param_value, _ConfigAliases,
+                    _log_warning, _safe_call)
+from .compat import (DASK_INSTALLED, PANDAS_INSTALLED, SKLEARN_INSTALLED,
+                     Client, LGBMNotFittedError, concat, dask_Array,
+                     dask_DataFrame, dask_Series, default_client, delayed,
+                     get_worker, pd_DataFrame, pd_Series, wait)
+from .sklearn import (LGBMClassifier, LGBMModel, LGBMRanker, LGBMRegressor,
+                      _lgbmmodel_doc_fit, _lgbmmodel_doc_predict)
 
 _DaskCollection = Union[dask_Array, dask_DataFrame, dask_Series]
 _DaskMatrixLike = Union[dask_Array, dask_DataFrame]
@@ -77,7 +73,6 @@ def _find_open_port(worker_ip: str, local_listen_port: int, ports_to_skip: Itera
         A free port on the machine referenced by ``worker_ip``.
     """
     max_tries = 1000
-    out_port = None
     found_port = False
     for i in range(max_tries):
         out_port = local_listen_port + i
@@ -117,7 +112,7 @@ def _find_ports_for_workers(client: Client, worker_addresses: Iterable[str], loc
     result : Dict[str, int]
         Dictionary where keys are worker addresses and values are an open port for LightGBM to use.
     """
-    lightgbm_ports = set()
+    lightgbm_ports: Set[int] = set()
     worker_ip_to_port = {}
     for worker_address in worker_addresses:
         port = client.submit(
@@ -289,15 +284,16 @@ def _train(
     data_parts = _split_to_parts(data=data, is_matrix=True)
     label_parts = _split_to_parts(data=label, is_matrix=False)
     parts = [{'data': x, 'label': y} for (x, y) in zip(data_parts, label_parts)]
+    n_parts = len(parts)
 
     if sample_weight is not None:
         weight_parts = _split_to_parts(data=sample_weight, is_matrix=False)
-        for i in range(len(parts)):
+        for i in range(n_parts):
             parts[i]['weight'] = weight_parts[i]
 
     if group is not None:
         group_parts = _split_to_parts(data=group, is_matrix=False)
-        for i in range(len(parts)):
+        for i in range(n_parts):
             parts[i]['group'] = group_parts[i]
 
     # Start computation in the background
@@ -306,11 +302,11 @@ def _train(
     wait(parts)
 
     for part in parts:
-        if part.status == 'error':
+        if part.status == 'error':  # type: ignore
             return part  # trigger error locally
 
     # Find locations of all parts and map them to particular Dask workers
-    key_to_part_dict = {part.key: part for part in parts}
+    key_to_part_dict = {part.key: part for part in parts}  # type: ignore
     who_has = client.who_has(parts)
     worker_map = defaultdict(list)
     for key, workers in who_has.items():

diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py
@@ -7,7 +7,8 @@
 import numpy as np
 
 from . import callback
-from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor, _log_warning
+from .basic import (Booster, Dataset, LightGBMError, _ConfigAliases,
+                    _InnerPredictor, _log_warning)
 from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold