Skip to content

Commit

Permalink
Merge branch 'master' into qemu_aarch64
Browse files Browse the repository at this point in the history
  • Loading branch information
StrikerRUS authored Feb 16, 2021
2 parents 9c3c640 + 4ae5949 commit f4d1b0d
Show file tree
Hide file tree
Showing 34 changed files with 131 additions and 107 deletions.
3 changes: 0 additions & 3 deletions R-package/R/lgb.Dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -908,7 +908,6 @@ dimnames.lgb.Dataset <- function(x) {
}

#' @rdname dimnames.lgb.Dataset
#' @return A list with the dimension names of the dataset
#' @export
`dimnames<-.lgb.Dataset` <- function(x, value) {

Expand Down Expand Up @@ -1024,7 +1023,6 @@ getinfo <- function(dataset, ...) {
}

#' @rdname getinfo
#' @return info data
#' @export
getinfo.lgb.Dataset <- function(dataset, name, ...) {

Expand Down Expand Up @@ -1079,7 +1077,6 @@ setinfo <- function(dataset, ...) {
}

#' @rdname setinfo
#' @return the dataset you passed in
#' @export
setinfo.lgb.Dataset <- function(dataset, name, info, ...) {

Expand Down
2 changes: 0 additions & 2 deletions R-package/man/dimnames.lgb.Dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions R-package/man/getinfo.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions R-package/man/setinfo.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ Ruby gem: https://github.com/ankane/lightgbm

LightGBM4j (Java high-level binding): https://github.com/metarank/lightgbm4j

lightgbm-rs (Rust binding): https://github.com/vaaaaanquish/lightgbm-rs

MLflow (experiment tracking, model monitoring framework): https://github.com/mlflow/mlflow

`{treesnip}` (R `{parsnip}`-compliant interface): https://github.com/curso-r/treesnip
Expand Down
2 changes: 2 additions & 0 deletions docs/Parameters.rst
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ Core Parameters

- **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM

- **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves

- ``data`` :raw-html:`<a id="data" title="Permalink to this parameter" href="#data">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``train_data_file``, ``data_filename``

- path of training data, LightGBM will train from this data
Expand Down
10 changes: 5 additions & 5 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@
import datetime
import os
import sys
import sphinx

from distutils.dir_util import copy_tree
from re import compile
from subprocess import PIPE, Popen
from unittest.mock import Mock

import sphinx
from docutils.nodes import reference
from docutils.parsers.rst import Directive
from docutils.transforms import Transform
from re import compile
from sphinx.errors import VersionRequirementError
from subprocess import PIPE, Popen
from unittest.mock import Mock

CURR_PATH = os.path.abspath(os.path.dirname(__file__))
LIB_PATH = os.path.join(CURR_PATH, os.path.pardir, 'python-package')
Expand Down
5 changes: 3 additions & 2 deletions examples/python-guide/advanced_example.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# coding: utf-8
import json
import pickle
import lightgbm as lgb
import pandas as pd

import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error

import lightgbm as lgb

print('Loading data...')
# load or create your dataset
Expand Down
3 changes: 2 additions & 1 deletion examples/python-guide/logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@

import time

import lightgbm as lgb
import numpy as np
import pandas as pd
from scipy.special import expit

import lightgbm as lgb

#################
# Simulate some binary data with a single categorical and
# single continuous predictor
Expand Down
3 changes: 2 additions & 1 deletion examples/python-guide/plot_example.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# coding: utf-8
import lightgbm as lgb
import pandas as pd

import lightgbm as lgb

if lgb.compat.MATPLOTLIB_INSTALLED:
import matplotlib.pyplot as plt
else:
Expand Down
3 changes: 2 additions & 1 deletion examples/python-guide/simple_example.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# coding: utf-8
import lightgbm as lgb
import pandas as pd
from sklearn.metrics import mean_squared_error

import lightgbm as lgb

print('Loading data...')
# load or create your dataset
df_train = pd.read_csv('../regression/regression.train', header=None, sep='\t')
Expand Down
4 changes: 2 additions & 2 deletions examples/python-guide/sklearn_example.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# coding: utf-8
import numpy as np
import pandas as pd
import lightgbm as lgb

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV

import lightgbm as lgb

print('Loading data...')
# load or create your dataset
df_train = pd.read_csv('../regression/regression.train', header=None, sep='\t')
Expand Down
1 change: 1 addition & 0 deletions include/LightGBM/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ struct Config {
// descl2 = **Note**: only works with CPU and ``serial`` tree learner
// descl2 = **Note**: ``regression_l1`` objective is not supported with linear tree boosting
// descl2 = **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM
// descl2 = **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves
bool linear_tree = false;

// alias = train, train_data, train_data_file, data_filename
Expand Down
27 changes: 22 additions & 5 deletions include/LightGBM/utils/openmp_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/
#ifndef LIGHTGBM_OPENMP_WRAPPER_H_
#define LIGHTGBM_OPENMP_WRAPPER_H_

#ifdef _OPENMP

#include <LightGBM/utils/log.h>
Expand Down Expand Up @@ -66,6 +67,22 @@ class ThreadExceptionHelper {

#else

/*
* To be compatible with openmp, define a nothrow macro which is used by gcc
* openmp, but not by clang.
* See also https://github.com/dmlc/dmlc-core/blob/3106c1cbdcc9fc9ef3a2c1d2196a7a6f6616c13d/include/dmlc/omp.h#L14
*/
#if defined(__clang__)
#undef __GOMP_NOTHROW
#define __GOMP_NOTHROW
#elif defined(__cplusplus)
#undef __GOMP_NOTHROW
#define __GOMP_NOTHROW throw()
#else
#undef __GOMP_NOTHROW
#define __GOMP_NOTHROW __attribute__((__nothrow__))
#endif

#ifdef _MSC_VER
#pragma warning(disable : 4068) // disable unknown pragma warning
#endif
Expand All @@ -76,11 +93,11 @@ class ThreadExceptionHelper {
/** Fall here if no OPENMP support, so just
simulate a single thread running.
All #pragma omp should be ignored by the compiler **/
inline void omp_set_num_threads(int) {}
inline int omp_get_num_threads() {return 1;}
inline int omp_get_max_threads() {return 1;}
inline int omp_get_thread_num() {return 0;}
inline int OMP_NUM_THREADS() { return 1; }
inline void omp_set_num_threads(int) __GOMP_NOTHROW {} // NOLINT (no cast done here)
inline int omp_get_num_threads() __GOMP_NOTHROW {return 1;}
inline int omp_get_max_threads() __GOMP_NOTHROW {return 1;}
inline int omp_get_thread_num() __GOMP_NOTHROW {return 0;}
inline int OMP_NUM_THREADS() __GOMP_NOTHROW { return 1; }
#ifdef __cplusplus
} // extern "C"
#endif
Expand Down
4 changes: 3 additions & 1 deletion include/LightGBM/utils/threading.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ class Threading {
OMP_LOOP_EX_BEGIN();
INDEX_T inner_start = start + num_inner * i;
INDEX_T inner_end = std::min(end, inner_start + num_inner);
inner_fun(i, inner_start, inner_end);
if (inner_start < inner_end) {
inner_fun(i, inner_start, inner_end);
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
Expand Down
14 changes: 7 additions & 7 deletions python-package/lightgbm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,24 @@
Contributors: https://github.com/microsoft/LightGBM/graphs/contributors.
"""
import os

from .basic import Booster, Dataset, register_logger
from .callback import (early_stopping, print_evaluation, record_evaluation,
reset_parameter)
from .engine import cv, train, CVBooster

import os
from .engine import CVBooster, cv, train

try:
from .sklearn import LGBMModel, LGBMRegressor, LGBMClassifier, LGBMRanker
from .sklearn import LGBMClassifier, LGBMModel, LGBMRanker, LGBMRegressor
except ImportError:
pass
try:
from .plotting import (plot_importance, plot_split_value_histogram, plot_metric,
plot_tree, create_tree_digraph)
from .plotting import (create_tree_digraph, plot_importance, plot_metric,
plot_split_value_histogram, plot_tree)
except ImportError:
pass
try:
from .dask import DaskLGBMRegressor, DaskLGBMClassifier, DaskLGBMRanker
from .dask import DaskLGBMClassifier, DaskLGBMRanker, DaskLGBMRegressor
except ImportError:
pass

Expand Down
3 changes: 2 additions & 1 deletion python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
import numpy as np
import scipy.sparse

from .compat import PANDAS_INSTALLED, pd_DataFrame, pd_Series, concat, is_dtype_sparse, dt_DataTable
from .compat import (PANDAS_INSTALLED, concat, dt_DataTable, is_dtype_sparse,
pd_DataFrame, pd_Series)
from .libpath import find_lib_path


Expand Down
14 changes: 7 additions & 7 deletions python-package/lightgbm/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

"""pandas"""
try:
from pandas import concat
from pandas import Series as pd_Series
from pandas import DataFrame as pd_DataFrame
from pandas import Series as pd_Series
from pandas import concat
from pandas.api.types import is_sparse as is_dtype_sparse
PANDAS_INSTALLED = True
except ImportError:
Expand Down Expand Up @@ -57,17 +57,17 @@ class dt_DataTable:

"""sklearn"""
try:
from sklearn.base import BaseEstimator
from sklearn.base import RegressorMixin, ClassifierMixin
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import assert_all_finite, check_X_y, check_array
from sklearn.utils.validation import (assert_all_finite, check_array,
check_X_y)
try:
from sklearn.model_selection import StratifiedKFold, GroupKFold
from sklearn.exceptions import NotFittedError
from sklearn.model_selection import GroupKFold, StratifiedKFold
except ImportError:
from sklearn.cross_validation import StratifiedKFold, GroupKFold
from sklearn.cross_validation import GroupKFold, StratifiedKFold
from sklearn.utils.validation import NotFittedError
try:
from sklearn.utils.validation import _check_sample_weight
Expand Down
34 changes: 15 additions & 19 deletions python-package/lightgbm/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,20 @@
import socket
from collections import defaultdict
from copy import deepcopy
from typing import Any, Callable, Dict, Iterable, List, Optional, Type, Union
from typing import Any, Callable, Dict, Iterable, List, Optional, Type, Union, Set
from urllib.parse import urlparse

import numpy as np
import scipy.sparse as ss

from .basic import _choose_param_value, _ConfigAliases, _LIB, _log_warning, _safe_call, LightGBMError
from .compat import (PANDAS_INSTALLED, pd_DataFrame, pd_Series, concat,
SKLEARN_INSTALLED, LGBMNotFittedError,
DASK_INSTALLED, dask_DataFrame, dask_Array, dask_Series, delayed, Client, default_client, get_worker, wait)
from .sklearn import (
_lgbmmodel_doc_fit,
_lgbmmodel_doc_predict,
LGBMClassifier,
LGBMModel,
LGBMRegressor,
LGBMRanker
)
from .basic import (_LIB, LightGBMError, _choose_param_value, _ConfigAliases,
_log_warning, _safe_call)
from .compat import (DASK_INSTALLED, PANDAS_INSTALLED, SKLEARN_INSTALLED,
Client, LGBMNotFittedError, concat, dask_Array,
dask_DataFrame, dask_Series, default_client, delayed,
get_worker, pd_DataFrame, pd_Series, wait)
from .sklearn import (LGBMClassifier, LGBMModel, LGBMRanker, LGBMRegressor,
_lgbmmodel_doc_fit, _lgbmmodel_doc_predict)

_DaskCollection = Union[dask_Array, dask_DataFrame, dask_Series]
_DaskMatrixLike = Union[dask_Array, dask_DataFrame]
Expand Down Expand Up @@ -77,7 +73,6 @@ def _find_open_port(worker_ip: str, local_listen_port: int, ports_to_skip: Itera
A free port on the machine referenced by ``worker_ip``.
"""
max_tries = 1000
out_port = None
found_port = False
for i in range(max_tries):
out_port = local_listen_port + i
Expand Down Expand Up @@ -117,7 +112,7 @@ def _find_ports_for_workers(client: Client, worker_addresses: Iterable[str], loc
result : Dict[str, int]
Dictionary where keys are worker addresses and values are an open port for LightGBM to use.
"""
lightgbm_ports = set()
lightgbm_ports: Set[int] = set()
worker_ip_to_port = {}
for worker_address in worker_addresses:
port = client.submit(
Expand Down Expand Up @@ -289,15 +284,16 @@ def _train(
data_parts = _split_to_parts(data=data, is_matrix=True)
label_parts = _split_to_parts(data=label, is_matrix=False)
parts = [{'data': x, 'label': y} for (x, y) in zip(data_parts, label_parts)]
n_parts = len(parts)

if sample_weight is not None:
weight_parts = _split_to_parts(data=sample_weight, is_matrix=False)
for i in range(len(parts)):
for i in range(n_parts):
parts[i]['weight'] = weight_parts[i]

if group is not None:
group_parts = _split_to_parts(data=group, is_matrix=False)
for i in range(len(parts)):
for i in range(n_parts):
parts[i]['group'] = group_parts[i]

# Start computation in the background
Expand All @@ -306,11 +302,11 @@ def _train(
wait(parts)

for part in parts:
if part.status == 'error':
if part.status == 'error': # type: ignore
return part # trigger error locally

# Find locations of all parts and map them to particular Dask workers
key_to_part_dict = {part.key: part for part in parts}
key_to_part_dict = {part.key: part for part in parts} # type: ignore
who_has = client.who_has(parts)
worker_map = defaultdict(list)
for key, workers in who_has.items():
Expand Down
3 changes: 2 additions & 1 deletion python-package/lightgbm/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
import numpy as np

from . import callback
from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor, _log_warning
from .basic import (Booster, Dataset, LightGBMError, _ConfigAliases,
_InnerPredictor, _log_warning)
from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold


Expand Down
Loading

0 comments on commit f4d1b0d

Please sign in to comment.