From a70e7799ed6210e7b7de9f727a8ea845bebf4860 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Thu, 3 Mar 2022 16:42:37 -0600 Subject: [PATCH 01/37] feat: support custom metrics in params --- python-package/lightgbm/basic.py | 27 +++++++++++++++++++++++++++ python-package/lightgbm/engine.py | 10 +++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 7aa1d85a069c..d7125a60081e 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -19,6 +19,7 @@ from .compat import PANDAS_INSTALLED, concat, dt_DataTable, pd_CategoricalDtype, pd_DataFrame, pd_Series from .libpath import find_lib_path +from .sklearn import _EvalFunctionWrapper ZERO_THRESHOLD = 1e-35 @@ -296,6 +297,32 @@ def to_string(x): return ' '.join(pairs) +def _separate_metrics_list(metrics_list): + """Separate built-in from callable evaluation metrics.""" + metrics_callable = [_EvalFunctionWrapper(f) for f in metrics_list if callable(f)] + metrics_builtin = [m for m in metrics_list if isinstance(m, str)] + return metrics_callable, metrics_builtin + + +def _concat_params_metrics(params, metrics_builtin): + """Concatenate metric from params (or default if not provided in params) and eval_metric.""" + params = deepcopy(params) + params_metric = deepcopy(params['metric']) + params_metric = [params_metric] if isinstance(params_metric, (str, type(None))) else params_metric + params_metric = [e for e in metrics_builtin if e not in params_metric] + params_metric + params_metric = [metric for metric in params_metric if metric is not None] + params['metric'] = params_metric + return params + + +def _concat_metric_feval_callables(metrics_callable, feval_callable): + """Concatenate evaluation metric from params and feval.""" + feval_callable = [feval_callable] if (isinstance(feval_callable, type(None)) or callable(feval_callable)) else feval_callable + feval_callable = [e for e in metrics_callable if e not in feval_callable] + feval_callable + feval_callable = [metric for metric in feval_callable if metric is not None] + return feval_callable + + class _TempFile: """Proxy class to workaround errors on Windows.""" diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 9b4ff70c217c..8ac34506ab8f 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -9,7 +9,7 @@ import numpy as np from . import callback -from .basic import Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor, _log_warning +from .basic import Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor, _log_warning, _separate_metrics_list, _concat_params_metrics, _concat_metric_feval_callables from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold _LGBM_CustomObjectiveFunction = Callable[ @@ -125,6 +125,14 @@ def train( """ # create predictor first params = copy.deepcopy(params) + eval_metric = params['metric'] + if eval_metric is not None: + eval_metric_list = copy.deepcopy(eval_metric) + if not isinstance(eval_metric_list, list): + eval_metric_list = [eval_metric_list] + eval_metrics_callable, eval_metrics_builtin = _separate_metrics_list(eval_metric_list) + params = _concat_params_metrics(params, eval_metrics_builtin) + feval = _concat_metric_feval_callables(feval, eval_metrics_callable) if fobj is not None: for obj_alias in _ConfigAliases.get("objective"): params.pop(obj_alias, None) From 9c41c6b9e2fc42e0972c5e661bc557ccf5a27073 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Thu, 3 Mar 2022 16:56:38 -0600 Subject: [PATCH 02/37] feat: support objective in params --- python-package/lightgbm/basic.py | 10 ++++++++++ python-package/lightgbm/engine.py | 5 ++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index d7125a60081e..9ab8804711db 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -323,6 +323,16 @@ def _concat_metric_feval_callables(metrics_callable, feval_callable): return feval_callable +def _objective_is_callable(params, fobj_callable): + """Check if objective function from params or from fobj is callable.""" + params_objective = deepcopy(params['objective']) + # if objective in params is callable ignore the callable from fobj + if callable(params_objective): + return params_objective + elif callable(fobj_callable): + return fobj_callable + + class _TempFile: """Proxy class to workaround errors on Windows.""" diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 8ac34506ab8f..4085df974841 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -9,7 +9,7 @@ import numpy as np from . import callback -from .basic import Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor, _log_warning, _separate_metrics_list, _concat_params_metrics, _concat_metric_feval_callables +from .basic import Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor, _log_warning, _separate_metrics_list, _concat_params_metrics, _concat_metric_feval_callables, _objective_is_callable from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold _LGBM_CustomObjectiveFunction = Callable[ @@ -126,6 +126,7 @@ def train( # create predictor first params = copy.deepcopy(params) eval_metric = params['metric'] + # merge metrics through params and feval if eval_metric is not None: eval_metric_list = copy.deepcopy(eval_metric) if not isinstance(eval_metric_list, list): @@ -133,6 +134,8 @@ def train( eval_metrics_callable, eval_metrics_builtin = _separate_metrics_list(eval_metric_list) params = _concat_params_metrics(params, eval_metrics_builtin) feval = _concat_metric_feval_callables(feval, eval_metrics_callable) + # objective can be passed either through params or fobj + fobj = _objective_is_callable(params, fobj) if fobj is not None: for obj_alias in _ConfigAliases.get("objective"): params.pop(obj_alias, None) From 8ba0b7281ce1d6dcc2d771970f5df5f8528605c6 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Thu, 3 Mar 2022 20:34:10 -0600 Subject: [PATCH 03/37] test: custom objective and metric --- python-package/lightgbm/basic.py | 9 +-- python-package/lightgbm/engine.py | 4 +- tests/python_package_test/test_engine.py | 86 ++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 6 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 9ab8804711db..1b57687de1c9 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -19,7 +19,6 @@ from .compat import PANDAS_INSTALLED, concat, dt_DataTable, pd_CategoricalDtype, pd_DataFrame, pd_Series from .libpath import find_lib_path -from .sklearn import _EvalFunctionWrapper ZERO_THRESHOLD = 1e-35 @@ -299,7 +298,7 @@ def to_string(x): def _separate_metrics_list(metrics_list): """Separate built-in from callable evaluation metrics.""" - metrics_callable = [_EvalFunctionWrapper(f) for f in metrics_list if callable(f)] + metrics_callable = [f for f in metrics_list if callable(f)] metrics_builtin = [m for m in metrics_list if isinstance(m, str)] return metrics_callable, metrics_builtin @@ -307,7 +306,7 @@ def _separate_metrics_list(metrics_list): def _concat_params_metrics(params, metrics_builtin): """Concatenate metric from params (or default if not provided in params) and eval_metric.""" params = deepcopy(params) - params_metric = deepcopy(params['metric']) + params_metric = deepcopy(params.get('metric')) params_metric = [params_metric] if isinstance(params_metric, (str, type(None))) else params_metric params_metric = [e for e in metrics_builtin if e not in params_metric] + params_metric params_metric = [metric for metric in params_metric if metric is not None] @@ -325,12 +324,14 @@ def _concat_metric_feval_callables(metrics_callable, feval_callable): def _objective_is_callable(params, fobj_callable): """Check if objective function from params or from fobj is callable.""" - params_objective = deepcopy(params['objective']) + params_objective = deepcopy(params.get('objective')) # if objective in params is callable ignore the callable from fobj if callable(params_objective): return params_objective elif callable(fobj_callable): return fobj_callable + else: + return None class _TempFile: diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 4085df974841..b0c976117bbf 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -125,7 +125,7 @@ def train( """ # create predictor first params = copy.deepcopy(params) - eval_metric = params['metric'] + eval_metric = params.get('metric') # merge metrics through params and feval if eval_metric is not None: eval_metric_list = copy.deepcopy(eval_metric) @@ -133,7 +133,7 @@ def train( eval_metric_list = [eval_metric_list] eval_metrics_callable, eval_metrics_builtin = _separate_metrics_list(eval_metric_list) params = _concat_params_metrics(params, eval_metrics_builtin) - feval = _concat_metric_feval_callables(feval, eval_metrics_callable) + feval = _concat_metric_feval_callables(eval_metrics_callable, feval) # objective can be passed either through params or fobj fobj = _objective_is_callable(params, fobj) if fobj is not None: diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index df0a8c407dc0..d969073b7153 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2267,6 +2267,92 @@ def test_multiple_feval_train(): assert 'decreasing_metric' in evals_result['valid_0'] +def test_params_metric_feval_callable_train(): + # Test classification + X, y = load_breast_cancer(return_X_y=True) + params = {'verbose': -1, 'objective': 'binary', 'metric': ['binary_logloss', constant_metric]} + X_train, X_validation, y_train, y_validation = train_test_split(X, y, test_size=0.2) + train_dataset = lgb.Dataset(data=X_train, label=y_train) + validation_dataset = lgb.Dataset(data=X_validation, label=y_validation, reference=train_dataset) + evals_result = {} + lgb.train( + params=params, + train_set=train_dataset, + valid_sets=validation_dataset, + num_boost_round=5, + feval=decreasing_metric, + callbacks=[lgb.record_evaluation(evals_result)] + ) + assert len(evals_result['valid_0']) == 3 + assert 'binary_logloss' in evals_result['valid_0'] + assert 'error' in evals_result['valid_0'] + assert 'decreasing_metric' in evals_result['valid_0'] + + # Test regression + X, y = load_boston(return_X_y=True) + params = {'verbose': -1, 'metric': ['l2', constant_metric]} + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) + lgb_train = lgb.Dataset(X_train, y_train) + lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train) + evals_result = {} + gbm = lgb.train( + params, + lgb_train, + num_boost_round=50, + valid_sets=lgb_eval, + feval=(lambda p, d: ('custom_mae', mean_absolute_error(p, d.get_label()), False)), + callbacks=[lgb.record_evaluation(evals_result)] + ) + assert len(evals_result['valid_0']) == 3 + assert 'l2' in evals_result['valid_0'] + assert 'error' in evals_result['valid_0'] + assert 'custom_mae' in evals_result['valid_0'] + + +def test_objective_callable_train(): + # Test classification + X, y = load_breast_cancer(return_X_y=True) + params = {'verbose': -1, 'objective': dummy_obj, 'metric': 'binary_logloss'} + X_train, X_validation, y_train, y_validation = train_test_split(X, y, test_size=0.2) + train_dataset = lgb.Dataset(data=X_train, label=y_train) + validation_dataset = lgb.Dataset(data=X_validation, label=y_validation, reference=train_dataset) + evals_result = {} + lgb.train( + params=params, + train_set=train_dataset, + valid_sets=validation_dataset, + num_boost_round=5, + feval=decreasing_metric, + callbacks=[lgb.record_evaluation(evals_result)] + ) + assert len(evals_result['valid_0']) == 2 + assert 'binary_logloss' in evals_result['valid_0'] + assert 'decreasing_metric' in evals_result['valid_0'] + + # Test regression + def mse_obj(y_pred, dtrain): + y_true = dtrain.get_label() + grad = (y_pred - y_true) + hess = np.ones(len(grad)) + return grad, hess + X, y = load_boston(return_X_y=True) + params = {'verbose': -1, 'objective': mse_obj} + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) + lgb_train = lgb.Dataset(X_train, y_train) + lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train) + evals_result = {} + lgb.train( + params, + lgb_train, + num_boost_round=50, + valid_sets=lgb_eval, + feval=decreasing_metric, + callbacks=[lgb.record_evaluation(evals_result)] + ) + assert len(evals_result['valid_0']) == 1 + assert 'decreasing_metric' in evals_result['valid_0'] + + def test_multiple_feval_cv(): X, y = load_breast_cancer(return_X_y=True) From 055ab28cfa4660526c0d197a0991a56df27359e6 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Thu, 3 Mar 2022 20:51:16 -0600 Subject: [PATCH 04/37] fix: imports are incorrectly sorted --- python-package/lightgbm/engine.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index b0c976117bbf..829a0002c2b5 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -9,7 +9,9 @@ import numpy as np from . import callback -from .basic import Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor, _log_warning, _separate_metrics_list, _concat_params_metrics, _concat_metric_feval_callables, _objective_is_callable +from .basic import (Booster, Dataset, LightGBMError, _choose_param_value, _concat_metric_feval_callables, + _concat_params_metrics, _ConfigAliases, _InnerPredictor, _log_warning, _objective_is_callable, + _separate_metrics_list) from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold _LGBM_CustomObjectiveFunction = Callable[ From c858d61094f40be16ddd037035033740463245bd Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Fri, 4 Mar 2022 16:15:47 -0600 Subject: [PATCH 05/37] feat: convert eval metrics str and set to list --- python-package/lightgbm/engine.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 829a0002c2b5..cd9c2d0e9622 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -131,8 +131,9 @@ def train( # merge metrics through params and feval if eval_metric is not None: eval_metric_list = copy.deepcopy(eval_metric) - if not isinstance(eval_metric_list, list): - eval_metric_list = [eval_metric_list] + if not isinstance(eval_metric, list): + eval_metric_list = [eval_metric_list] if isinstance(eval_metric_list, str) else list(eval_metric_list) + params['metric'] = eval_metric_list eval_metrics_callable, eval_metrics_builtin = _separate_metrics_list(eval_metric_list) params = _concat_params_metrics(params, eval_metrics_builtin) feval = _concat_metric_feval_callables(eval_metrics_callable, feval) From 704d831b97df54da1feb7c2f5359c2ba220b00db Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Wed, 9 Mar 2022 22:02:52 -0600 Subject: [PATCH 06/37] feat: convert single callable eval_metric to list --- python-package/lightgbm/engine.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index cd9c2d0e9622..6aa1dafaddc7 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -132,12 +132,14 @@ def train( if eval_metric is not None: eval_metric_list = copy.deepcopy(eval_metric) if not isinstance(eval_metric, list): - eval_metric_list = [eval_metric_list] if isinstance(eval_metric_list, str) else list(eval_metric_list) + eval_metric_is_str = isinstance(eval_metric_list, str) + eval_metric_is_callable = callable(eval_metric_list) + eval_metric_list = [eval_metric_list] if (eval_metric_is_str or eval_metric_is_callable) else list(eval_metric_list) params['metric'] = eval_metric_list eval_metrics_callable, eval_metrics_builtin = _separate_metrics_list(eval_metric_list) params = _concat_params_metrics(params, eval_metrics_builtin) feval = _concat_metric_feval_callables(eval_metrics_callable, feval) - # objective can be passed either through params or fobj + # customized objective can be passed either through params or fobj fobj = _objective_is_callable(params, fobj) if fobj is not None: for obj_alias in _ConfigAliases.get("objective"): From 86a1861f6f338e72cd370349f8d98bbda82418a0 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Sat, 12 Mar 2022 08:25:56 -0600 Subject: [PATCH 07/37] test: single callable objective in params Signed-off-by: Miguel Trejo --- tests/python_package_test/test_engine.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index d969073b7153..0cff3c34209c 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2270,7 +2270,8 @@ def test_multiple_feval_train(): def test_params_metric_feval_callable_train(): # Test classification X, y = load_breast_cancer(return_X_y=True) - params = {'verbose': -1, 'objective': 'binary', 'metric': ['binary_logloss', constant_metric]} + # Test single callable + params = {'verbose': -1, 'objective': 'binary', 'metric': constant_metric} X_train, X_validation, y_train, y_validation = train_test_split(X, y, test_size=0.2) train_dataset = lgb.Dataset(data=X_train, label=y_train) validation_dataset = lgb.Dataset(data=X_validation, label=y_validation, reference=train_dataset) @@ -2283,8 +2284,7 @@ def test_params_metric_feval_callable_train(): feval=decreasing_metric, callbacks=[lgb.record_evaluation(evals_result)] ) - assert len(evals_result['valid_0']) == 3 - assert 'binary_logloss' in evals_result['valid_0'] + assert len(evals_result['valid_0']) == 2 assert 'error' in evals_result['valid_0'] assert 'decreasing_metric' in evals_result['valid_0'] From 8d2565bcd9c7e71379766fca692c4aa50565ea6b Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Mon, 14 Mar 2022 21:34:34 -0600 Subject: [PATCH 08/37] feat: callable fobj in basic cv function Signed-off-by: Miguel Trejo --- python-package/lightgbm/basic.py | 9 ++++- python-package/lightgbm/engine.py | 14 +------- tests/python_package_test/test_engine.py | 42 ------------------------ 3 files changed, 9 insertions(+), 56 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 1b57687de1c9..9aca905d4b98 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -324,7 +324,14 @@ def _concat_metric_feval_callables(metrics_callable, feval_callable): def _objective_is_callable(params, fobj_callable): """Check if objective function from params or from fobj is callable.""" - params_objective = deepcopy(params.get('objective')) + # objective function has different aliases + params_objective = ( + deepcopy(params.get('objective')) + or deepcopy(params.get('objective_type')) + or deepcopy(params.get('app')) + or deepcopy(params.get('application')) + or deepcopy(params.get('loss')) + ) # if objective in params is callable ignore the callable from fobj if callable(params_objective): return params_objective diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 6aa1dafaddc7..2c859ee7bc43 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -127,19 +127,6 @@ def train( """ # create predictor first params = copy.deepcopy(params) - eval_metric = params.get('metric') - # merge metrics through params and feval - if eval_metric is not None: - eval_metric_list = copy.deepcopy(eval_metric) - if not isinstance(eval_metric, list): - eval_metric_is_str = isinstance(eval_metric_list, str) - eval_metric_is_callable = callable(eval_metric_list) - eval_metric_list = [eval_metric_list] if (eval_metric_is_str or eval_metric_is_callable) else list(eval_metric_list) - params['metric'] = eval_metric_list - eval_metrics_callable, eval_metrics_builtin = _separate_metrics_list(eval_metric_list) - params = _concat_params_metrics(params, eval_metrics_builtin) - feval = _concat_metric_feval_callables(eval_metrics_callable, feval) - # customized objective can be passed either through params or fobj fobj = _objective_is_callable(params, fobj) if fobj is not None: for obj_alias in _ConfigAliases.get("objective"): @@ -504,6 +491,7 @@ def cv(params, train_set, num_boost_round=100, raise TypeError("Training only accepts Dataset object") params = copy.deepcopy(params) + fobj = _objective_is_callable(params, fobj) if fobj is not None: for obj_alias in _ConfigAliases.get("objective"): params.pop(obj_alias, None) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 0cff3c34209c..7ca8ece2c7fd 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2267,48 +2267,6 @@ def test_multiple_feval_train(): assert 'decreasing_metric' in evals_result['valid_0'] -def test_params_metric_feval_callable_train(): - # Test classification - X, y = load_breast_cancer(return_X_y=True) - # Test single callable - params = {'verbose': -1, 'objective': 'binary', 'metric': constant_metric} - X_train, X_validation, y_train, y_validation = train_test_split(X, y, test_size=0.2) - train_dataset = lgb.Dataset(data=X_train, label=y_train) - validation_dataset = lgb.Dataset(data=X_validation, label=y_validation, reference=train_dataset) - evals_result = {} - lgb.train( - params=params, - train_set=train_dataset, - valid_sets=validation_dataset, - num_boost_round=5, - feval=decreasing_metric, - callbacks=[lgb.record_evaluation(evals_result)] - ) - assert len(evals_result['valid_0']) == 2 - assert 'error' in evals_result['valid_0'] - assert 'decreasing_metric' in evals_result['valid_0'] - - # Test regression - X, y = load_boston(return_X_y=True) - params = {'verbose': -1, 'metric': ['l2', constant_metric]} - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) - lgb_train = lgb.Dataset(X_train, y_train) - lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train) - evals_result = {} - gbm = lgb.train( - params, - lgb_train, - num_boost_round=50, - valid_sets=lgb_eval, - feval=(lambda p, d: ('custom_mae', mean_absolute_error(p, d.get_label()), False)), - callbacks=[lgb.record_evaluation(evals_result)] - ) - assert len(evals_result['valid_0']) == 3 - assert 'l2' in evals_result['valid_0'] - assert 'error' in evals_result['valid_0'] - assert 'custom_mae' in evals_result['valid_0'] - - def test_objective_callable_train(): # Test classification X, y = load_breast_cancer(return_X_y=True) From a0fb372899e74fa5351d60fd6ba49e8d3684ad27 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Mon, 14 Mar 2022 23:17:17 -0600 Subject: [PATCH 09/37] test: cv support objective callable Signed-off-by: Miguel Trejo --- tests/python_package_test/test_engine.py | 59 +++++++++++++----------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index e78520f1041b..c461f0f066d4 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2317,6 +2317,37 @@ def mse_obj(y_pred, dtrain): assert 'decreasing_metric' in evals_result['valid_0'] +def test_objective_callable_cv(): + # Test classification + X, y = load_breast_cancer(return_X_y=True) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) + params = {'verbose': -1, 'objective': dummy_obj, 'metric': 'binary_logloss'} + train_dataset = lgb.Dataset(X_train, y_train) + cv_res = lgb.cv( + params, + train_dataset, + num_boost_round=25, + nfold=3 + ) + assert 'valid binary_logloss-mean' in cv_res + assert len(cv_res['valid binary_logloss-mean']) == 25 + + # Test regression + def mse_obj(y_pred, dtrain): + y_true = dtrain.get_label() + grad = (y_pred - y_true) + hess = np.ones(len(grad)) + return grad, hess + X_train, y_train = make_synthetic_regression() + params = {'verbose': -1} + lgb_train = lgb.Dataset(X_train, y_train) + params_with_metric = {'verbose': -1, 'objective': mse_obj, 'metric': 'l2'} + cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, + nfold=3, stratified=False) + assert 'valid l2-mean' not in cv_res + assert len(cv_res['valid l2-mean']) == 10 + + def test_multiple_feval_cv(): X, y = load_breast_cancer(return_X_y=True) @@ -3474,31 +3505,3 @@ def test_pandas_nullable_dtypes(): # test equal predictions np.testing.assert_allclose(preds, preds_nullable_dtypes) - - -def test_boost_from_average_with_single_leaf_trees(): - # test data are taken from bug report - # https://github.com/microsoft/LightGBM/issues/4708 - X = np.array([ - [1021.0589, 1018.9578], - [1023.85754, 1018.7854], - [1024.5468, 1018.88513], - [1019.02954, 1018.88513], - [1016.79926, 1018.88513], - [1007.6, 1018.88513]], dtype=np.float32) - y = np.array([1023.8, 1024.6, 1024.4, 1023.8, 1022.0, 1014.4], dtype=np.float32) - params = { - "extra_trees": True, - "min_data_in_bin": 1, - "extra_seed": 7, - "objective": "regression", - "verbose": -1, - "boost_from_average": True, - "min_data_in_leaf": 1, - } - train_set = lgb.Dataset(X, y) - model = lgb.train(params=params, train_set=train_set, num_boost_round=10) - - preds = model.predict(X) - mean_preds = np.mean(preds) - assert y.min() <= mean_preds <= y.max() From b5d514bc169ca06a88e694822c12aef6eefe64d7 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Tue, 15 Mar 2022 16:29:09 -0600 Subject: [PATCH 10/37] fix: assert in cv_res Signed-off-by: Miguel Trejo --- tests/python_package_test/test_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index c461f0f066d4..aedfb02aaab4 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2344,7 +2344,7 @@ def mse_obj(y_pred, dtrain): params_with_metric = {'verbose': -1, 'objective': mse_obj, 'metric': 'l2'} cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, nfold=3, stratified=False) - assert 'valid l2-mean' not in cv_res + assert 'valid l2-mean' in cv_res assert len(cv_res['valid l2-mean']) == 10 From 2e20ff596c739b76cee5bcdd181aeff01f8d0c3d Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Tue, 15 Mar 2022 16:34:11 -0600 Subject: [PATCH 11/37] docs: objective callable in params Signed-off-by: Miguel Trejo --- python-package/lightgbm/engine.py | 6 ++++-- tests/python_package_test/test_engine.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 57f4399d9975..6931c9a68c77 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -43,7 +43,8 @@ def train( Parameters ---------- params : dict - Parameters for training. + Parameters for training. If callable objective functions is passed through ``params`` and ``fobj`` + the Booster uses the first one. train_set : Dataset Data to be trained on. num_boost_round : int, optional (default=100) @@ -386,7 +387,8 @@ def cv(params, train_set, num_boost_round=100, Parameters ---------- params : dict - Parameters for Booster. + Parameters for Booster. If callable objective functions is passed through ``params`` and ``fobj`` + the Booster uses the first one. train_set : Dataset Data to be trained on. num_boost_round : int, optional (default=100) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index aedfb02aaab4..b464308d4419 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2317,7 +2317,7 @@ def mse_obj(y_pred, dtrain): assert 'decreasing_metric' in evals_result['valid_0'] -def test_objective_callable_cv(): +def test_objective_callable_cv(): # Test classification X, y = load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) @@ -2328,7 +2328,7 @@ def test_objective_callable_cv(): train_dataset, num_boost_round=25, nfold=3 - ) + ) assert 'valid binary_logloss-mean' in cv_res assert len(cv_res['valid binary_logloss-mean']) == 25 From 6411deeb41e44b6b97f28ea47be25d602f8f73fa Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Tue, 15 Mar 2022 16:36:51 -0600 Subject: [PATCH 12/37] recover test_boost_from_average_with_single_leaf_trees Signed-off-by: Miguel Trejo --- tests/python_package_test/test_engine.py | 28 ++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index b464308d4419..ddc69bfb8b07 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -3505,3 +3505,31 @@ def test_pandas_nullable_dtypes(): # test equal predictions np.testing.assert_allclose(preds, preds_nullable_dtypes) + + +def test_boost_from_average_with_single_leaf_trees(): + # test data are taken from bug report + # https://github.com/microsoft/LightGBM/issues/4708 + X = np.array([ + [1021.0589, 1018.9578], + [1023.85754, 1018.7854], + [1024.5468, 1018.88513], + [1019.02954, 1018.88513], + [1016.79926, 1018.88513], + [1007.6, 1018.88513]], dtype=np.float32) + y = np.array([1023.8, 1024.6, 1024.4, 1023.8, 1022.0, 1014.4], dtype=np.float32) + params = { + "extra_trees": True, + "min_data_in_bin": 1, + "extra_seed": 7, + "objective": "regression", + "verbose": -1, + "boost_from_average": True, + "min_data_in_leaf": 1, + } + train_set = lgb.Dataset(X, y) + model = lgb.train(params=params, train_set=train_set, num_boost_round=10) + + preds = model.predict(X) + mean_preds = np.mean(preds) + assert y.min() <= mean_preds <= y.max() \ No newline at end of file From 2066513cbf936805e4d3fe0d32af0d48d248e4ca Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Tue, 15 Mar 2022 16:41:13 -0600 Subject: [PATCH 13/37] linters fail Signed-off-by: Miguel Trejo --- tests/python_package_test/test_engine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index ddc69bfb8b07..2f62c29f25c6 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2317,7 +2317,7 @@ def mse_obj(y_pred, dtrain): assert 'decreasing_metric' in evals_result['valid_0'] -def test_objective_callable_cv(): +def test_objective_callable_cv(): # Test classification X, y = load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) @@ -3532,4 +3532,4 @@ def test_boost_from_average_with_single_leaf_trees(): preds = model.predict(X) mean_preds = np.mean(preds) - assert y.min() <= mean_preds <= y.max() \ No newline at end of file + assert y.min() <= mean_preds <= y.max() From 6c0b2d97be3f9421ed7f209179126e2292dd9ae8 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Fri, 18 Mar 2022 18:08:30 -0600 Subject: [PATCH 14/37] remove metrics helper functions Signed-off-by: Miguel Trejo --- python-package/lightgbm/basic.py | 26 -------------------------- python-package/lightgbm/engine.py | 5 ++--- 2 files changed, 2 insertions(+), 29 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 7d80aa716b22..37f3d811c51b 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -294,32 +294,6 @@ def to_string(x): return ' '.join(pairs) -def _separate_metrics_list(metrics_list): - """Separate built-in from callable evaluation metrics.""" - metrics_callable = [f for f in metrics_list if callable(f)] - metrics_builtin = [m for m in metrics_list if isinstance(m, str)] - return metrics_callable, metrics_builtin - - -def _concat_params_metrics(params, metrics_builtin): - """Concatenate metric from params (or default if not provided in params) and eval_metric.""" - params = deepcopy(params) - params_metric = deepcopy(params.get('metric')) - params_metric = [params_metric] if isinstance(params_metric, (str, type(None))) else params_metric - params_metric = [e for e in metrics_builtin if e not in params_metric] + params_metric - params_metric = [metric for metric in params_metric if metric is not None] - params['metric'] = params_metric - return params - - -def _concat_metric_feval_callables(metrics_callable, feval_callable): - """Concatenate evaluation metric from params and feval.""" - feval_callable = [feval_callable] if (isinstance(feval_callable, type(None)) or callable(feval_callable)) else feval_callable - feval_callable = [e for e in metrics_callable if e not in feval_callable] + feval_callable - feval_callable = [metric for metric in feval_callable if metric is not None] - return feval_callable - - def _objective_is_callable(params, fobj_callable): """Check if objective function from params or from fobj is callable.""" # objective function has different aliases diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 6931c9a68c77..087463586372 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -9,9 +9,8 @@ import numpy as np from . import callback -from .basic import (Booster, Dataset, LightGBMError, _choose_param_value, _concat_metric_feval_callables, - _concat_params_metrics, _ConfigAliases, _InnerPredictor, _log_warning, _objective_is_callable, - _separate_metrics_list) +from .basic import (Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor, _log_warning, + _objective_is_callable) from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold _LGBM_CustomObjectiveFunction = Callable[ From d43c879bbd9cf5b5a4a2f70d509ca7568d7a239c Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Fri, 18 Mar 2022 18:39:26 -0600 Subject: [PATCH 15/37] feat: choose objective through _choose_param_values Signed-off-by: Miguel Trejo --- python-package/lightgbm/basic.py | 19 ---------------- python-package/lightgbm/engine.py | 38 +++++++++++++++++-------------- 2 files changed, 21 insertions(+), 36 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 37f3d811c51b..27913f4ea04d 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -294,25 +294,6 @@ def to_string(x): return ' '.join(pairs) -def _objective_is_callable(params, fobj_callable): - """Check if objective function from params or from fobj is callable.""" - # objective function has different aliases - params_objective = ( - deepcopy(params.get('objective')) - or deepcopy(params.get('objective_type')) - or deepcopy(params.get('app')) - or deepcopy(params.get('application')) - or deepcopy(params.get('loss')) - ) - # if objective in params is callable ignore the callable from fobj - if callable(params_objective): - return params_objective - elif callable(fobj_callable): - return fobj_callable - else: - return None - - class _TempFile: """Proxy class to workaround errors on Windows.""" diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 087463586372..43af564deeba 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -9,8 +9,7 @@ import numpy as np from . import callback -from .basic import (Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor, _log_warning, - _objective_is_callable) +from .basic import Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor, _log_warning from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold _LGBM_CustomObjectiveFunction = Callable[ @@ -42,8 +41,8 @@ def train( Parameters ---------- params : dict - Parameters for training. If callable objective functions is passed through ``params`` and ``fobj`` - the Booster uses the first one. + Parameters for Booster. Values passed through `params` take precedence over those + supplied via keyword arguments. train_set : Dataset Data to be trained on. num_boost_round : int, optional (default=100) @@ -127,11 +126,14 @@ def train( """ # create predictor first params = copy.deepcopy(params) - fobj = _objective_is_callable(params, fobj) - if fobj is not None: - for obj_alias in _ConfigAliases.get("objective"): - params.pop(obj_alias, None) - params['objective'] = 'none' + params = _choose_param_value( + main_param_name='objective', + params=params, + default_value=fobj + ) + if callable(params["objective"]): + fobj = params["objective"] + params["objective"] = 'none' for alias in _ConfigAliases.get("num_iterations"): if alias in params: num_boost_round = params.pop(alias) @@ -386,8 +388,8 @@ def cv(params, train_set, num_boost_round=100, Parameters ---------- params : dict - Parameters for Booster. If callable objective functions is passed through ``params`` and ``fobj`` - the Booster uses the first one. + Parameters for Booster. Values passed through `params` take precedence over those + supplied via keyword arguments. train_set : Dataset Data to be trained on. num_boost_round : int, optional (default=100) @@ -490,13 +492,15 @@ def cv(params, train_set, num_boost_round=100, """ if not isinstance(train_set, Dataset): raise TypeError("Training only accepts Dataset object") - params = copy.deepcopy(params) - fobj = _objective_is_callable(params, fobj) - if fobj is not None: - for obj_alias in _ConfigAliases.get("objective"): - params.pop(obj_alias, None) - params['objective'] = 'none' + params = _choose_param_value( + main_param_name='objective', + params=params, + default_value=fobj + ) + if callable(params["objective"]): + fobj = params["objective"] + params["objective"] = 'none' for alias in _ConfigAliases.get("num_iterations"): if alias in params: _log_warning(f"Found '{alias}' in params. Will use it instead of 'num_boost_round' argument") From d281cd55941b5649227cd71f86f90cdefd9d64c6 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Fri, 18 Mar 2022 20:03:28 -0600 Subject: [PATCH 16/37] test: test objective through _choose_param_values Signed-off-by: Miguel Trejo --- tests/python_package_test/test_basic.py | 29 +++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py index 5e68c71782e6..147a0aa85559 100644 --- a/tests/python_package_test/test_basic.py +++ b/tests/python_package_test/test_basic.py @@ -511,6 +511,35 @@ def test_choose_param_value(): assert original_params == expected_params +def test_choose_param_value_objective(): + def dummy_obj(preds, train_data): + return np.ones(preds.shape), np.ones(preds.shape) + + def mse_obj(y_pred, dtrain): + y_true = dtrain.get_label() + grad = (y_pred - y_true) + hess = np.ones(len(grad)) + return grad, hess + + # If callable is found in objective + params = {'objective': dummy_obj} + params = lgb.basic._choose_param_value( + main_param_name="objective", + params=params, + default_value=None + ) + assert params['objective'] == dummy_obj + + # If callable in objective and fobj prefer 'objective' + params = {'objective': dummy_obj} + params = lgb.basic._choose_param_value( + main_param_name="objective", + params=params, + default_value=mse_obj + ) + assert params['objective'] == dummy_obj + + @pytest.mark.parametrize('collection', ['1d_np', '2d_np', 'pd_float', 'pd_str', '1d_list', '2d_list']) @pytest.mark.parametrize('dtype', [np.float32, np.float64]) def test_list_to_1d_numpy(collection, dtype): From 259eecb20ad435abc17c1c27c388d1f355aaff14 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Fri, 18 Mar 2022 20:04:05 -0600 Subject: [PATCH 17/37] test: test objective is callabe in train Signed-off-by: Miguel Trejo --- tests/python_package_test/test_engine.py | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 2f62c29f25c6..1327d353ba3b 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2199,7 +2199,7 @@ def train_booster(params=params_obj_verbose, **kwargs): assert 'valid error-mean' in res # multiclass metric alias with custom one for custom objective res = get_cv_result(params_obj_class_3_verbose, fobj=dummy_obj, feval=constant_metric) - assert len(res) == 2 + assert len(res) == 4 assert 'valid error-mean' in res # no metric for invalid class_num res = get_cv_result(params_obj_class_1_verbose, fobj=dummy_obj) @@ -2280,18 +2280,13 @@ def test_objective_callable_train(): X_train, X_validation, y_train, y_validation = train_test_split(X, y, test_size=0.2) train_dataset = lgb.Dataset(data=X_train, label=y_train) validation_dataset = lgb.Dataset(data=X_validation, label=y_validation, reference=train_dataset) - evals_result = {} - lgb.train( + booster = lgb.train( params=params, train_set=train_dataset, valid_sets=validation_dataset, - num_boost_round=5, - feval=decreasing_metric, - callbacks=[lgb.record_evaluation(evals_result)] + num_boost_round=5 ) - assert len(evals_result['valid_0']) == 2 - assert 'binary_logloss' in evals_result['valid_0'] - assert 'decreasing_metric' in evals_result['valid_0'] + assert booster.params['objective'] == 'none' # Test regression def mse_obj(y_pred, dtrain): @@ -2304,17 +2299,13 @@ def mse_obj(y_pred, dtrain): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) lgb_train = lgb.Dataset(X_train, y_train) lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train) - evals_result = {} - lgb.train( + booster = lgb.train( params, lgb_train, - num_boost_round=50, + num_boost_round=5, valid_sets=lgb_eval, - feval=decreasing_metric, - callbacks=[lgb.record_evaluation(evals_result)] ) - assert len(evals_result['valid_0']) == 1 - assert 'decreasing_metric' in evals_result['valid_0'] + assert booster.params['objective'] == 'none' def test_objective_callable_cv(): From 97d8ab70580fe648e3aa90411799a8bacfd390ee Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Fri, 18 Mar 2022 20:09:48 -0600 Subject: [PATCH 18/37] test: parametrize choose_param_value with objective aliases Signed-off-by: Miguel Trejo --- tests/python_package_test/test_basic.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py index 147a0aa85559..d22f945db896 100644 --- a/tests/python_package_test/test_basic.py +++ b/tests/python_package_test/test_basic.py @@ -15,6 +15,14 @@ from .utils import load_breast_cancer +objective_alias = [ + "objective", + "objective_type", + "app", + "application", + "loss" +] + def test_basic(tmp_path): X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True), @@ -511,7 +519,8 @@ def test_choose_param_value(): assert original_params == expected_params -def test_choose_param_value_objective(): +@pytest.mark.parametrize("objective_alias", objective_alias) +def test_choose_param_value_objective(objective_alias): def dummy_obj(preds, train_data): return np.ones(preds.shape), np.ones(preds.shape) From 4fddac115fbffbcac3744ce1c6a5226bcb07ee78 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Fri, 18 Mar 2022 21:41:35 -0600 Subject: [PATCH 19/37] test: cv booster metric is none Signed-off-by: Miguel Trejo --- tests/python_package_test/test_engine.py | 44 +++++++++++++++--------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 1327d353ba3b..fa9e6166c50f 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -28,6 +28,13 @@ def dummy_obj(preds, train_data): return np.ones(preds.shape), np.ones(preds.shape) +def mse_obj(y_pred, dtrain): + y_true = dtrain.get_label() + grad = (y_pred - y_true) + hess = np.ones(len(grad)) + return grad, hess + + def multi_logloss(y_true, y_pred): return np.mean([-math.log(y_pred[i][y]) for i, y in enumerate(y_true)]) @@ -2289,11 +2296,6 @@ def test_objective_callable_train(): assert booster.params['objective'] == 'none' # Test regression - def mse_obj(y_pred, dtrain): - y_true = dtrain.get_label() - grad = (y_pred - y_true) - hess = np.ones(len(grad)) - return grad, hess X, y = load_boston(return_X_y=True) params = {'verbose': -1, 'objective': mse_obj} X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) @@ -2318,25 +2320,33 @@ def test_objective_callable_cv(): params, train_dataset, num_boost_round=25, - nfold=3 + nfold=3, + return_cvbooster=True ) - assert 'valid binary_logloss-mean' in cv_res - assert len(cv_res['valid binary_logloss-mean']) == 25 + cv_booster = cv_res['cvbooster'].boosters + cv_objs = [ + cb.params['objective'] == 'none' for cb in cv_booster + ] + assert all(cv_objs) # Test regression - def mse_obj(y_pred, dtrain): - y_true = dtrain.get_label() - grad = (y_pred - y_true) - hess = np.ones(len(grad)) - return grad, hess X_train, y_train = make_synthetic_regression() params = {'verbose': -1} lgb_train = lgb.Dataset(X_train, y_train) params_with_metric = {'verbose': -1, 'objective': mse_obj, 'metric': 'l2'} - cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, - nfold=3, stratified=False) - assert 'valid l2-mean' in cv_res - assert len(cv_res['valid l2-mean']) == 10 + cv_res = lgb.cv( + params_with_metric, + lgb_train, + num_boost_round=10, + nfold=3, + stratified=False, + return_cvbooster=True + ) + cv_booster = cv_res['cvbooster'].boosters + cv_objs = [ + cb.params['objective'] == 'none' for cb in cv_booster + ] + assert all(cv_objs) def test_multiple_feval_cv(): From 28743900d37624f8f0c5879fdbcf7733f3155521 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Sat, 19 Mar 2022 12:12:14 -0600 Subject: [PATCH 20/37] fix: if string and callable choose callable Signed-off-by: Miguel Trejo --- python-package/lightgbm/engine.py | 2 ++ tests/python_package_test/test_engine.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 43af564deeba..1927853c7e05 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -493,6 +493,8 @@ def cv(params, train_set, num_boost_round=100, if not isinstance(train_set, Dataset): raise TypeError("Training only accepts Dataset object") params = copy.deepcopy(params) + if isinstance(params.get('objective'), str) and callable(fobj): + params['objective'] = fobj params = _choose_param_value( main_param_name='objective', params=params, diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index fa9e6166c50f..c890d6d9fa78 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2206,7 +2206,7 @@ def train_booster(params=params_obj_verbose, **kwargs): assert 'valid error-mean' in res # multiclass metric alias with custom one for custom objective res = get_cv_result(params_obj_class_3_verbose, fobj=dummy_obj, feval=constant_metric) - assert len(res) == 4 + assert len(res) == 2 assert 'valid error-mean' in res # no metric for invalid class_num res = get_cv_result(params_obj_class_1_verbose, fobj=dummy_obj) From 6a09e5e07c9e2565cca0164b63d2ac59a1afde3c Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Mon, 21 Mar 2022 09:52:28 -0600 Subject: [PATCH 21/37] test train uses custom objective metrics Signed-off-by: Miguel Trejo --- python-package/lightgbm/engine.py | 4 +- tests/python_package_test/test_basic.py | 25 ++++----- tests/python_package_test/test_engine.py | 68 ++++++++++++++++++------ 3 files changed, 68 insertions(+), 29 deletions(-) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 1927853c7e05..eebd63728e8b 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -41,7 +41,7 @@ def train( Parameters ---------- params : dict - Parameters for Booster. Values passed through `params` take precedence over those + Parameters for Booster. Values passed through ``params`` take precedence over those supplied via keyword arguments. train_set : Dataset Data to be trained on. @@ -388,7 +388,7 @@ def cv(params, train_set, num_boost_round=100, Parameters ---------- params : dict - Parameters for Booster. Values passed through `params` take precedence over those + Parameters for Booster. Values passed through ``params`` take precedence over those supplied via keyword arguments. train_set : Dataset Data to be trained on. diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py index d22f945db896..f27d48861d43 100644 --- a/tests/python_package_test/test_basic.py +++ b/tests/python_package_test/test_basic.py @@ -15,14 +15,6 @@ from .utils import load_breast_cancer -objective_alias = [ - "objective", - "objective_type", - "app", - "application", - "loss" -] - def test_basic(tmp_path): X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True), @@ -519,7 +511,7 @@ def test_choose_param_value(): assert original_params == expected_params -@pytest.mark.parametrize("objective_alias", objective_alias) +@pytest.mark.parametrize("objective_alias", lgb.basic._ConfigAliases.get("objective")) def test_choose_param_value_objective(objective_alias): def dummy_obj(preds, train_data): return np.ones(preds.shape), np.ones(preds.shape) @@ -531,7 +523,7 @@ def mse_obj(y_pred, dtrain): return grad, hess # If callable is found in objective - params = {'objective': dummy_obj} + params = {objective_alias: dummy_obj} params = lgb.basic._choose_param_value( main_param_name="objective", params=params, @@ -539,8 +531,8 @@ def mse_obj(y_pred, dtrain): ) assert params['objective'] == dummy_obj - # If callable in objective and fobj prefer 'objective' - params = {'objective': dummy_obj} + # Value in params should be preferred to the default_value passed from keyword arguments + params = {objective_alias: dummy_obj} params = lgb.basic._choose_param_value( main_param_name="objective", params=params, @@ -548,6 +540,15 @@ def mse_obj(y_pred, dtrain): ) assert params['objective'] == dummy_obj + # None of objective or its aliases in params, but default_value is callable. + params = {} + params = lgb.basic._choose_param_value( + main_param_name="objective", + params=params, + default_value=mse_obj + ) + assert params['objective'] == mse_obj + @pytest.mark.parametrize('collection', ['1d_np', '2d_np', 'pd_float', 'pd_str', '1d_list', '2d_list']) @pytest.mark.parametrize('dtype', [np.float32, np.float64]) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index c890d6d9fa78..efb3846a2460 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -11,6 +11,7 @@ import numpy as np import psutil import pytest +from scipy import special from scipy.sparse import csr_matrix, isspmatrix_csc, isspmatrix_csr from sklearn.datasets import load_svmlight_file, make_blobs, make_multilabel_classification from sklearn.metrics import average_precision_score, log_loss, mean_absolute_error, mean_squared_error, roc_auc_score @@ -35,6 +36,27 @@ def mse_obj(y_pred, dtrain): return grad, hess +def logloss_obj(preds, train_data): + """Taken from https://maxhalford.github.io/blog/lightgbm-focal-loss/""" + y = train_data.get_label() + p = special.expit(preds) + grad = p - y + hess = p * (1 - p) + return grad, hess + + +def logloss_metric(preds, train_data): + """Taken from https://maxhalford.github.io/blog/lightgbm-focal-loss/""" + y = train_data.get_label() + p = special.expit(preds) + ll = np.empty_like(p) + pos = y == 1 + ll[pos] = np.log(p[pos]) + ll[~pos] = np.log(1 - p[~pos]) + is_higher_better = False + return 'logloss', -ll.mean(), is_higher_better + + def multi_logloss(y_true, y_pred): return np.mean([-math.log(y_pred[i][y]) for i, y in enumerate(y_true)]) @@ -2283,31 +2305,44 @@ def test_multiple_feval_train(): def test_objective_callable_train(): # Test classification X, y = load_breast_cancer(return_X_y=True) - params = {'verbose': -1, 'objective': dummy_obj, 'metric': 'binary_logloss'} - X_train, X_validation, y_train, y_validation = train_test_split(X, y, test_size=0.2) - train_dataset = lgb.Dataset(data=X_train, label=y_train) - validation_dataset = lgb.Dataset(data=X_validation, label=y_validation, reference=train_dataset) + params = { + 'verbose': -1, + 'objective': logloss_obj, + 'learning_rate': 0.01 + } + train_dataset = lgb.Dataset(X, y) booster = lgb.train( params=params, train_set=train_dataset, - valid_sets=validation_dataset, - num_boost_round=5 + num_boost_round=100, + feval=logloss_metric ) + y_pred = special.expit(booster.predict(X)) + logloss_error = log_loss(y, y_pred) + rocauc_error = roc_auc_score(y, y_pred) assert booster.params['objective'] == 'none' + assert logloss_error == pytest.approx(0.25, 0.1) + assert rocauc_error == pytest.approx(0.99, 0.5) # Test regression - X, y = load_boston(return_X_y=True) - params = {'verbose': -1, 'objective': mse_obj} - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) - lgb_train = lgb.Dataset(X_train, y_train) - lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train) + X, y = make_synthetic_regression() + params = { + 'verbose': -1, + 'objective': mse_obj + } + lgb_train = lgb.Dataset(X, y) booster = lgb.train( params, lgb_train, - num_boost_round=5, - valid_sets=lgb_eval, + num_boost_round=100 ) + y_pred = booster.predict(X) + mae_error = mean_absolute_error(y, y_pred) + mse_error = mean_squared_error(y, y_pred) + assert booster.params['objective'] == 'none' + assert mae_error == pytest.approx(8, 0.1) + assert mse_error == pytest.approx(119, 1) def test_objective_callable_cv(): @@ -2331,9 +2366,12 @@ def test_objective_callable_cv(): # Test regression X_train, y_train = make_synthetic_regression() - params = {'verbose': -1} lgb_train = lgb.Dataset(X_train, y_train) - params_with_metric = {'verbose': -1, 'objective': mse_obj, 'metric': 'l2'} + params_with_metric = { + 'verbose': -1, + 'objective': mse_obj, + 'metric': 'l2' + } cv_res = lgb.cv( params_with_metric, lgb_train, From 457b455855b41fc07deea25353374b9e0309eef6 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Mon, 21 Mar 2022 10:17:35 -0600 Subject: [PATCH 22/37] test: cv uses custom objective metrics Signed-off-by: Miguel Trejo --- tests/python_package_test/test_engine.py | 28 +++++++++++++++--------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index efb3846a2460..c557afa4c852 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2337,36 +2337,40 @@ def test_objective_callable_train(): num_boost_round=100 ) y_pred = booster.predict(X) - mae_error = mean_absolute_error(y, y_pred) mse_error = mean_squared_error(y, y_pred) - assert booster.params['objective'] == 'none' - assert mae_error == pytest.approx(8, 0.1) assert mse_error == pytest.approx(119, 1) def test_objective_callable_cv(): # Test classification X, y = load_breast_cancer(return_X_y=True) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) - params = {'verbose': -1, 'objective': dummy_obj, 'metric': 'binary_logloss'} - train_dataset = lgb.Dataset(X_train, y_train) + params = { + 'verbose': -1, + 'objective': logloss_obj, + 'learning_rate': 0.01 + } + train_dataset = lgb.Dataset(X, y) cv_res = lgb.cv( params, train_dataset, - num_boost_round=25, + num_boost_round=100, nfold=3, return_cvbooster=True ) cv_booster = cv_res['cvbooster'].boosters + cv_logloss_errors = [ + log_loss(y, special.expit(cb.predict(X))) < 0.29 for cb in cv_booster + ] cv_objs = [ cb.params['objective'] == 'none' for cb in cv_booster ] assert all(cv_objs) + assert all(cv_logloss_errors) # Test regression - X_train, y_train = make_synthetic_regression() - lgb_train = lgb.Dataset(X_train, y_train) + X, y = make_synthetic_regression() + lgb_train = lgb.Dataset(X, y) params_with_metric = { 'verbose': -1, 'objective': mse_obj, @@ -2375,16 +2379,20 @@ def test_objective_callable_cv(): cv_res = lgb.cv( params_with_metric, lgb_train, - num_boost_round=10, + num_boost_round=100, nfold=3, stratified=False, return_cvbooster=True ) cv_booster = cv_res['cvbooster'].boosters + cv_mse_errors = [ + mean_squared_error(y, cb.predict(X)) < 295 for cb in cv_booster + ] cv_objs = [ cb.params['objective'] == 'none' for cb in cv_booster ] assert all(cv_objs) + assert all(cv_mse_errors) def test_multiple_feval_cv(): From ccab320e611b9145f8cdca435bc46b9b6ff1e3ff Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Mon, 21 Mar 2022 11:47:38 -0600 Subject: [PATCH 23/37] refactor: remove fobj parameter in train and cv Signed-off-by: Miguel Trejo --- python-package/lightgbm/engine.py | 93 ++++++++++++------------ tests/python_package_test/test_engine.py | 75 ++++++++++--------- 2 files changed, 86 insertions(+), 82 deletions(-) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index eebd63728e8b..3ad7c24efb4f 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -28,7 +28,6 @@ def train( num_boost_round: int = 100, valid_sets: Optional[List[Dataset]] = None, valid_names: Optional[List[str]] = None, - fobj: Optional[_LGBM_CustomObjectiveFunction] = None, feval: Optional[Union[_LGBM_CustomMetricFunction, List[_LGBM_CustomMetricFunction]]] = None, init_model: Optional[Union[str, Path, Booster]] = None, feature_name: Union[List[str], str] = 'auto', @@ -51,27 +50,6 @@ def train( List of data to be evaluated on during training. valid_names : list of str, or None, optional (default=None) Names of ``valid_sets``. - fobj : callable or None, optional (default=None) - Customized objective function. - Should accept two parameters: preds, train_data, - and return (grad, hess). - - preds : numpy 1-D array or numpy 2-D array (for multi-class task) - The predicted values. - Predicted values are returned before any transformation, - e.g. they are raw margin instead of probability of positive class for binary task. - train_data : Dataset - The training dataset. - grad : numpy 1-D array or numpy 2-D array (for multi-class task) - The value of the first order derivative (gradient) of the loss - with respect to the elements of preds for each sample point. - hess : numpy 1-D array or numpy 2-D array (for multi-class task) - The value of the second order derivative (Hessian) of the loss - with respect to the elements of preds for each sample point. - - For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes], - and grad and hess should be returned in the same format. - feval : callable, list of callable, or None, optional (default=None) Customized evaluation function. Each evaluation function should accept two parameters: preds, eval_data, @@ -119,6 +97,27 @@ def train( List of callback functions that are applied at each iteration. See Callbacks in Python API for more information. + Note + ---- + A custom objective function can be provided for the ``objective`` parameter. + It should accept two parameters: preds, train_data and return (grad, hess). + + preds : numpy 1-D array or numpy 2-D array (for multi-class task) + The predicted values. + Predicted values are returned before any transformation, + e.g. they are raw margin instead of probability of positive class for binary task. + train_data : Dataset + The training dataset. + grad : numpy 1-D array or numpy 2-D array (for multi-class task) + The value of the first order derivative (gradient) of the loss + with respect to the elements of preds for each sample point. + hess : numpy 1-D array or numpy 2-D array (for multi-class task) + The value of the second order derivative (Hessian) of the loss + with respect to the elements of preds for each sample point. + + For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes], + and grad and hess should be returned in the same format. + Returns ------- booster : Booster @@ -129,8 +128,9 @@ def train( params = _choose_param_value( main_param_name='objective', params=params, - default_value=fobj + default_value=None ) + fobj = None if callable(params["objective"]): fobj = params["objective"] params["objective"] = 'none' @@ -409,27 +409,6 @@ def cv(params, train_set, num_boost_round=100, metrics : str, list of str, or None, optional (default=None) Evaluation metrics to be monitored while CV. If not None, the metric in ``params`` will be overridden. - fobj : callable or None, optional (default=None) - Customized objective function. - Should accept two parameters: preds, train_data, - and return (grad, hess). - - preds : numpy 1-D array or numpy 2-D array (for multi-class task) - The predicted values. - Predicted values are returned before any transformation, - e.g. they are raw margin instead of probability of positive class for binary task. - train_data : Dataset - The training dataset. - grad : numpy 1-D array or numpy 2-D array (for multi-class task) - The value of the first order derivative (gradient) of the loss - with respect to the elements of preds for each sample point. - hess : numpy 1-D array or numpy 2-D array (for multi-class task) - The value of the second order derivative (Hessian) of the loss - with respect to the elements of preds for each sample point. - - For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes], - and grad and hess should be returned in the same format. - feval : callable, list of callable, or None, optional (default=None) Customized evaluation function. Each evaluation function should accept two parameters: preds, eval_data, @@ -480,6 +459,27 @@ def cv(params, train_set, num_boost_round=100, return_cvbooster : bool, optional (default=False) Whether to return Booster models trained on each fold through ``CVBooster``. + Note + ---- + A custom objective function can be provided for the ``objective`` parameter. + It should accept two parameters: preds, train_data and return (grad, hess). + + preds : numpy 1-D array or numpy 2-D array (for multi-class task) + The predicted values. + Predicted values are returned before any transformation, + e.g. they are raw margin instead of probability of positive class for binary task. + train_data : Dataset + The training dataset. + grad : numpy 1-D array or numpy 2-D array (for multi-class task) + The value of the first order derivative (gradient) of the loss + with respect to the elements of preds for each sample point. + hess : numpy 1-D array or numpy 2-D array (for multi-class task) + The value of the second order derivative (Hessian) of the loss + with respect to the elements of preds for each sample point. + + For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes], + and grad and hess should be returned in the same format. + Returns ------- eval_hist : dict @@ -493,13 +493,12 @@ def cv(params, train_set, num_boost_round=100, if not isinstance(train_set, Dataset): raise TypeError("Training only accepts Dataset object") params = copy.deepcopy(params) - if isinstance(params.get('objective'), str) and callable(fobj): - params['objective'] = fobj params = _choose_param_value( main_param_name='objective', params=params, - default_value=fobj + default_value=None ) + fobj = None if callable(params["objective"]): fobj = params["objective"] params["objective"] = 'none' diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index c557afa4c852..7067378ec992 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -1906,7 +1906,7 @@ def test_metrics(): lgb_valid = lgb.Dataset(X_test, y_test, reference=lgb_train) evals_result = {} - params_verbose = {'verbose': -1} + params_verbose = {'verbose': -1, 'objective': dummy_obj} params_obj_verbose = {'objective': 'binary', 'verbose': -1} params_obj_metric_log_verbose = {'objective': 'binary', 'metric': 'binary_logloss', 'verbose': -1} params_obj_metric_err_verbose = {'objective': 'binary', 'metric': 'binary_error', 'verbose': -1} @@ -1915,11 +1915,11 @@ def test_metrics(): 'metric': ['binary_logloss', 'binary_error'], 'verbose': -1} params_obj_metric_none_verbose = {'objective': 'binary', 'metric': 'None', 'verbose': -1} - params_metric_log_verbose = {'metric': 'binary_logloss', 'verbose': -1} + params_metric_log_verbose = {'objective': dummy_obj, 'metric': 'binary_logloss', 'verbose': -1} params_metric_err_verbose = {'metric': 'binary_error', 'verbose': -1} - params_metric_inv_verbose = {'metric_types': 'invalid_metric', 'verbose': -1} - params_metric_multi_verbose = {'metric': ['binary_logloss', 'binary_error'], 'verbose': -1} - params_metric_none_verbose = {'metric': 'None', 'verbose': -1} + params_metric_inv_verbose = {'objective': dummy_obj, 'metric_types': 'invalid_metric', 'verbose': -1} + params_metric_multi_verbose = {'objective': dummy_obj, 'metric': ['binary_logloss', 'binary_error'], 'verbose': -1} + params_metric_none_verbose = {'objective': dummy_obj, 'metric': 'None', 'verbose': -1} def get_cv_result(params=params_obj_verbose, **kwargs): return lgb.cv(params, lgb_train, num_boost_round=2, **kwargs) @@ -1983,32 +1983,32 @@ def train_booster(params=params_obj_verbose, **kwargs): # fobj, no feval # no default metric - res = get_cv_result(params=params_verbose, fobj=dummy_obj) + res = get_cv_result(params=params_verbose) assert len(res) == 0 # metric in params - res = get_cv_result(params=params_metric_err_verbose, fobj=dummy_obj) + res = get_cv_result(params=params_metric_err_verbose) assert len(res) == 2 assert 'valid binary_error-mean' in res # metric in args - res = get_cv_result(params=params_verbose, fobj=dummy_obj, metrics='binary_error') + res = get_cv_result(params=params_verbose, metrics='binary_error') assert len(res) == 2 assert 'valid binary_error-mean' in res # metric in args overwrites its' alias in params - res = get_cv_result(params=params_metric_inv_verbose, fobj=dummy_obj, metrics='binary_error') + res = get_cv_result(params=params_metric_inv_verbose, metrics='binary_error') assert len(res) == 2 assert 'valid binary_error-mean' in res # multiple metrics in params - res = get_cv_result(params=params_metric_multi_verbose, fobj=dummy_obj) + res = get_cv_result(params=params_metric_multi_verbose) assert len(res) == 4 assert 'valid binary_logloss-mean' in res assert 'valid binary_error-mean' in res # multiple metrics in args - res = get_cv_result(params=params_verbose, fobj=dummy_obj, + res = get_cv_result(params=params_verbose, metrics=['binary_logloss', 'binary_error']) assert len(res) == 4 assert 'valid binary_logloss-mean' in res @@ -2066,39 +2066,39 @@ def train_booster(params=params_obj_verbose, **kwargs): # fobj, feval # no default metric, only custom one - res = get_cv_result(params=params_verbose, fobj=dummy_obj, feval=constant_metric) + res = get_cv_result(params=params_verbose, feval=constant_metric) assert len(res) == 2 assert 'valid error-mean' in res # metric in params with custom one - res = get_cv_result(params=params_metric_err_verbose, fobj=dummy_obj, feval=constant_metric) + res = get_cv_result(params=params_metric_err_verbose, feval=constant_metric) assert len(res) == 4 assert 'valid binary_error-mean' in res assert 'valid error-mean' in res # metric in args with custom one - res = get_cv_result(params=params_verbose, fobj=dummy_obj, + res = get_cv_result(params=params_verbose, feval=constant_metric, metrics='binary_error') assert len(res) == 4 assert 'valid binary_error-mean' in res assert 'valid error-mean' in res # metric in args overwrites one in params, custom one is evaluated too - res = get_cv_result(params=params_metric_inv_verbose, fobj=dummy_obj, + res = get_cv_result(params=params_metric_inv_verbose, feval=constant_metric, metrics='binary_error') assert len(res) == 4 assert 'valid binary_error-mean' in res assert 'valid error-mean' in res # multiple metrics in params with custom one - res = get_cv_result(params=params_metric_multi_verbose, fobj=dummy_obj, feval=constant_metric) + res = get_cv_result(params=params_metric_multi_verbose, feval=constant_metric) assert len(res) == 6 assert 'valid binary_logloss-mean' in res assert 'valid binary_error-mean' in res assert 'valid error-mean' in res # multiple metrics in args with custom one - res = get_cv_result(params=params_verbose, fobj=dummy_obj, feval=constant_metric, + res = get_cv_result(params=params_verbose, feval=constant_metric, metrics=['binary_logloss', 'binary_error']) assert len(res) == 6 assert 'valid binary_logloss-mean' in res @@ -2106,7 +2106,7 @@ def train_booster(params=params_obj_verbose, **kwargs): assert 'valid error-mean' in res # custom metric is evaluated despite 'None' is passed - res = get_cv_result(params=params_metric_none_verbose, fobj=dummy_obj, feval=constant_metric) + res = get_cv_result(params=params_metric_none_verbose, feval=constant_metric) assert len(res) == 2 assert 'valid error-mean' in res @@ -2140,16 +2140,16 @@ def train_booster(params=params_obj_verbose, **kwargs): # fobj, no feval # no default metric - train_booster(params=params_verbose, fobj=dummy_obj) + train_booster(params=params_verbose) assert len(evals_result) == 0 # metric in params - train_booster(params=params_metric_log_verbose, fobj=dummy_obj) + train_booster(params=params_metric_log_verbose) assert len(evals_result['valid_0']) == 1 assert 'binary_logloss' in evals_result['valid_0'] # multiple metrics in params - train_booster(params=params_metric_multi_verbose, fobj=dummy_obj) + train_booster(params=params_metric_multi_verbose) assert len(evals_result['valid_0']) == 2 assert 'binary_logloss' in evals_result['valid_0'] assert 'binary_error' in evals_result['valid_0'] @@ -2187,25 +2187,25 @@ def train_booster(params=params_obj_verbose, **kwargs): # fobj, feval # no default metric, only custom one - train_booster(params=params_verbose, fobj=dummy_obj, feval=constant_metric) + train_booster(params=params_verbose, feval=constant_metric) assert len(evals_result['valid_0']) == 1 assert 'error' in evals_result['valid_0'] # metric in params with custom one - train_booster(params=params_metric_log_verbose, fobj=dummy_obj, feval=constant_metric) + train_booster(params=params_metric_log_verbose, feval=constant_metric) assert len(evals_result['valid_0']) == 2 assert 'binary_logloss' in evals_result['valid_0'] assert 'error' in evals_result['valid_0'] # multiple metrics in params with custom one - train_booster(params=params_metric_multi_verbose, fobj=dummy_obj, feval=constant_metric) + train_booster(params=params_metric_multi_verbose, feval=constant_metric) assert len(evals_result['valid_0']) == 3 assert 'binary_logloss' in evals_result['valid_0'] assert 'binary_error' in evals_result['valid_0'] assert 'error' in evals_result['valid_0'] # custom metric is evaluated despite 'None' is passed - train_booster(params=params_metric_none_verbose, fobj=dummy_obj, feval=constant_metric) + train_booster(params=params_metric_none_verbose, feval=constant_metric) assert len(evals_result) == 1 assert 'error' in evals_result['valid_0'] @@ -2214,9 +2214,13 @@ def train_booster(params=params_obj_verbose, **kwargs): obj_multi_aliases = ['multiclass', 'softmax', 'multiclassova', 'multiclass_ova', 'ova', 'ovr'] for obj_multi_alias in obj_multi_aliases: + # Custom objective replaces multiclass params_obj_class_3_verbose = {'objective': obj_multi_alias, 'num_class': 3, 'verbose': -1} + params_obj_class_3_custom_obj = {'objective': dummy_obj, 'num_class': 3, 'verbose': -1} params_obj_class_1_verbose = {'objective': obj_multi_alias, 'num_class': 1, 'verbose': -1} + params_obj_class_1_custom_obj = {'objective': dummy_obj, 'num_class': 1, 'verbose': -1} params_obj_verbose = {'objective': obj_multi_alias, 'verbose': -1} + params_obj_custom_obj = {'objective': dummy_obj, 'verbose': -1} # multiclass default metric res = get_cv_result(params_obj_class_3_verbose) assert len(res) == 2 @@ -2227,20 +2231,20 @@ def train_booster(params=params_obj_verbose, **kwargs): assert 'valid multi_logloss-mean' in res assert 'valid error-mean' in res # multiclass metric alias with custom one for custom objective - res = get_cv_result(params_obj_class_3_verbose, fobj=dummy_obj, feval=constant_metric) + res = get_cv_result(params_obj_class_3_custom_obj, feval=constant_metric) assert len(res) == 2 assert 'valid error-mean' in res # no metric for invalid class_num - res = get_cv_result(params_obj_class_1_verbose, fobj=dummy_obj) + res = get_cv_result(params_obj_class_1_custom_obj) assert len(res) == 0 # custom metric for invalid class_num - res = get_cv_result(params_obj_class_1_verbose, fobj=dummy_obj, feval=constant_metric) + res = get_cv_result(params_obj_class_1_custom_obj, feval=constant_metric) assert len(res) == 2 assert 'valid error-mean' in res # multiclass metric alias with custom one with invalid class_num with pytest.raises(lgb.basic.LightGBMError): - get_cv_result(params_obj_class_1_verbose, metrics=obj_multi_alias, - fobj=dummy_obj, feval=constant_metric) + get_cv_result(params_obj_class_1_custom_obj, metrics=obj_multi_alias, + feval=constant_metric) # multiclass default metric without num_class with pytest.raises(lgb.basic.LightGBMError): get_cv_result(params_obj_verbose) @@ -2261,20 +2265,20 @@ def train_booster(params=params_obj_verbose, **kwargs): with pytest.raises(lgb.basic.LightGBMError): get_cv_result(params_class_3_verbose) # no metric with non-default num_class for custom objective - res = get_cv_result(params_class_3_verbose, fobj=dummy_obj) + res = get_cv_result(params_obj_class_3_custom_obj) assert len(res) == 0 for metric_multi_alias in obj_multi_aliases + ['multi_logloss']: # multiclass metric alias for custom objective - res = get_cv_result(params_class_3_verbose, metrics=metric_multi_alias, fobj=dummy_obj) + res = get_cv_result(params_obj_class_3_custom_obj, metrics=metric_multi_alias) assert len(res) == 2 assert 'valid multi_logloss-mean' in res # multiclass metric for custom objective - res = get_cv_result(params_class_3_verbose, metrics='multi_error', fobj=dummy_obj) + res = get_cv_result(params_obj_class_3_custom_obj, metrics='multi_error') assert len(res) == 2 assert 'valid multi_error-mean' in res # binary metric with non-default num_class for custom objective with pytest.raises(lgb.basic.LightGBMError): - get_cv_result(params_class_3_verbose, metrics='binary_error', fobj=dummy_obj) + get_cv_result(params_obj_class_3_custom_obj, metrics='binary_error') def test_multiple_feval_train(): @@ -2451,7 +2455,8 @@ def custom_obj(y_pred, ds): builtin_obj_bst = lgb.train(params, ds, num_boost_round=10) builtin_obj_preds = builtin_obj_bst.predict(X) - custom_obj_bst = lgb.train(params, ds, num_boost_round=10, fobj=custom_obj) + params = {'objective': custom_obj, 'num_class': 3, 'num_leaves': 7} + custom_obj_bst = lgb.train(params, ds, num_boost_round=10) custom_obj_preds = softmax(custom_obj_bst.predict(X)) np.testing.assert_allclose(builtin_obj_preds, custom_obj_preds, rtol=0.01) From 0730dfb2f4806c12e0afece00159479396572f94 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Sat, 26 Mar 2022 07:40:19 -0600 Subject: [PATCH 24/37] refactor: objective through params in sklearn API Signed-off-by: Miguel Trejo --- python-package/lightgbm/sklearn.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 7ebba0bc962c..15280c347794 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -597,7 +597,9 @@ def _process_params(self, stage: str) -> Dict[str, Any]: if callable(self._objective): if stage == "fit": self._fobj = _ObjectiveFunctionWrapper(self._objective) - params['objective'] = 'None' # objective = nullptr for unknown objective + params['objective'] = self._fobj + elif stage == "predict": + params['objective'] = 'None' else: if stage == "fit": self._fobj = None @@ -756,7 +758,6 @@ def _get_meta_data(collection, name, i): num_boost_round=self.n_estimators, valid_sets=valid_sets, valid_names=eval_names, - fobj=self._fobj, feval=eval_metrics_callable, init_model=init_model, feature_name=feature_name, From 9444018b85dab7d08e98b8cf094047eb10ccc094 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Sat, 26 Mar 2022 08:09:31 -0600 Subject: [PATCH 25/37] custom objective function in advanced_example Signed-off-by: Miguel Trejo --- examples/python-guide/advanced_example.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/examples/python-guide/advanced_example.py b/examples/python-guide/advanced_example.py index 54b62cdb1563..eabbf75dd677 100644 --- a/examples/python-guide/advanced_example.py +++ b/examples/python-guide/advanced_example.py @@ -1,6 +1,7 @@ # coding: utf-8 import json import pickle +import copy from pathlib import Path import numpy as np @@ -158,12 +159,14 @@ def binary_error(preds, train_data): preds = 1. / (1. + np.exp(-preds)) return 'error', np.mean(labels != (preds > 0.5)), False +# Pass custom objective function through params +params_custom_obj = copy.deepcopy(params) +params_custom_obj['objective'] = loglikelihood -gbm = lgb.train(params, +gbm = lgb.train(params_custom_obj, lgb_train, num_boost_round=10, init_model=gbm, - fobj=loglikelihood, feval=binary_error, valid_sets=lgb_eval) @@ -182,12 +185,14 @@ def accuracy(preds, train_data): preds = 1. / (1. + np.exp(-preds)) return 'accuracy', np.mean(labels == (preds > 0.5)), True +# Pass custom objective function through params +params_custom_obj = copy.deepcopy(params) +params_custom_obj['objective'] = loglikelihood -gbm = lgb.train(params, +gbm = lgb.train(params_custom_obj, lgb_train, num_boost_round=10, init_model=gbm, - fobj=loglikelihood, feval=[binary_error, accuracy], valid_sets=lgb_eval) From a4830787815251e7bf2ad88108f68e101a0c8a49 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Sat, 26 Mar 2022 08:17:28 -0600 Subject: [PATCH 26/37] fix whitespackes lint --- examples/python-guide/advanced_example.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/python-guide/advanced_example.py b/examples/python-guide/advanced_example.py index eabbf75dd677..6c2a42ce2bf6 100644 --- a/examples/python-guide/advanced_example.py +++ b/examples/python-guide/advanced_example.py @@ -1,7 +1,7 @@ # coding: utf-8 +import copy import json import pickle -import copy from pathlib import Path import numpy as np @@ -159,6 +159,7 @@ def binary_error(preds, train_data): preds = 1. / (1. + np.exp(-preds)) return 'error', np.mean(labels != (preds > 0.5)), False + # Pass custom objective function through params params_custom_obj = copy.deepcopy(params) params_custom_obj['objective'] = loglikelihood @@ -185,6 +186,7 @@ def accuracy(preds, train_data): preds = 1. / (1. + np.exp(-preds)) return 'accuracy', np.mean(labels == (preds > 0.5)), True + # Pass custom objective function through params params_custom_obj = copy.deepcopy(params) params_custom_obj['objective'] = loglikelihood From 3a22ab33133d38184fa9dfa878eb1a6865c65bf4 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Sun, 27 Mar 2022 13:26:35 -0600 Subject: [PATCH 27/37] objective is none not a particular case for predict method Signed-off-by: Miguel Trejo --- python-package/lightgbm/sklearn.py | 2 +- tests/python_package_test/test_engine.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 15280c347794..8a211c4c5540 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -598,7 +598,7 @@ def _process_params(self, stage: str) -> Dict[str, Any]: if stage == "fit": self._fobj = _ObjectiveFunctionWrapper(self._objective) params['objective'] = self._fobj - elif stage == "predict": + else: params['objective'] = 'None' else: if stage == "fit": diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 243c383fab66..5d868233de90 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2311,8 +2311,7 @@ def test_multiple_feval_train(): assert 'decreasing_metric' in evals_result['valid_0'] -def test_objective_callable_train(): - # Test classification +def test_objective_callable_train_binary_classification(): X, y = load_breast_cancer(return_X_y=True) params = { 'verbose': -1, @@ -2333,7 +2332,8 @@ def test_objective_callable_train(): assert logloss_error == pytest.approx(0.25, 0.1) assert rocauc_error == pytest.approx(0.99, 0.5) - # Test regression + +def test_objective_callable_train_regression(): X, y = make_synthetic_regression() params = { 'verbose': -1, @@ -2351,8 +2351,7 @@ def test_objective_callable_train(): assert mse_error == pytest.approx(119, 1) -def test_objective_callable_cv(): - # Test classification +def test_objective_callable_cv_binary_classification(): X, y = load_breast_cancer(return_X_y=True) params = { 'verbose': -1, @@ -2377,7 +2376,8 @@ def test_objective_callable_cv(): assert all(cv_objs) assert all(cv_logloss_errors) - # Test regression + +def test_objective_callable_cv_regression(): X, y = make_synthetic_regression() lgb_train = lgb.Dataset(X, y) params_with_metric = { From b272740efca247ebeca4494b4e901e9a8bb6b0f4 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Sun, 27 Mar 2022 19:46:29 -0600 Subject: [PATCH 28/37] replace scipy.expit with custom implementation Signed-off-by: Miguel Trejo --- tests/python_package_test/test_engine.py | 33 +++++++----------------- tests/python_package_test/utils.py | 4 +++ 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 5d868233de90..c8c16ce9b8fc 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -12,7 +12,6 @@ import numpy as np import psutil import pytest -from scipy import special from scipy.sparse import csr_matrix, isspmatrix_csc, isspmatrix_csr from sklearn.datasets import load_svmlight_file, make_blobs, make_multilabel_classification from sklearn.metrics import average_precision_score, log_loss, mean_absolute_error, mean_squared_error, roc_auc_score @@ -20,8 +19,8 @@ import lightgbm as lgb -from .utils import (load_boston, load_breast_cancer, load_digits, load_iris, make_synthetic_regression, - sklearn_multiclass_custom_objective, softmax) +from .utils import (load_boston, load_breast_cancer, load_digits, load_iris, logistic_sigmoid, + make_synthetic_regression, sklearn_multiclass_custom_objective, softmax) decreasing_generator = itertools.count(0, -1) @@ -38,26 +37,13 @@ def mse_obj(y_pred, dtrain): def logloss_obj(preds, train_data): - """Taken from https://maxhalford.github.io/blog/lightgbm-focal-loss/""" - y = train_data.get_label() - p = special.expit(preds) - grad = p - y - hess = p * (1 - p) + y_true = train_data.get_label() + y_pred = logistic_sigmoid(preds) + grad = y_pred - y_true + hess = y_pred * (1.0 - y_pred) return grad, hess -def logloss_metric(preds, train_data): - """Taken from https://maxhalford.github.io/blog/lightgbm-focal-loss/""" - y = train_data.get_label() - p = special.expit(preds) - ll = np.empty_like(p) - pos = y == 1 - ll[pos] = np.log(p[pos]) - ll[~pos] = np.log(1 - p[~pos]) - is_higher_better = False - return 'logloss', -ll.mean(), is_higher_better - - def multi_logloss(y_true, y_pred): return np.mean([-math.log(y_pred[i][y]) for i, y in enumerate(y_true)]) @@ -2322,10 +2308,9 @@ def test_objective_callable_train_binary_classification(): booster = lgb.train( params=params, train_set=train_dataset, - num_boost_round=100, - feval=logloss_metric + num_boost_round=100 ) - y_pred = special.expit(booster.predict(X)) + y_pred = logistic_sigmoid(booster.predict(X)) logloss_error = log_loss(y, y_pred) rocauc_error = roc_auc_score(y, y_pred) assert booster.params['objective'] == 'none' @@ -2368,7 +2353,7 @@ def test_objective_callable_cv_binary_classification(): ) cv_booster = cv_res['cvbooster'].boosters cv_logloss_errors = [ - log_loss(y, special.expit(cb.predict(X))) < 0.29 for cb in cv_booster + log_loss(y, logistic_sigmoid(cb.predict(X))) < 0.29 for cb in cv_booster ] cv_objs = [ cb.params['objective'] == 'none' for cb in cv_booster diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py index 63950d471608..3793cc354ad8 100644 --- a/tests/python_package_test/utils.py +++ b/tests/python_package_test/utils.py @@ -125,6 +125,10 @@ def softmax(x): return exp_x / np.sum(exp_x, axis=1).reshape(-1, 1) +def logistic_sigmoid(x): + return 1.0 / (1.0 + np.exp(-x)) + + def sklearn_multiclass_custom_objective(y_true, y_pred): num_rows, num_class = y_pred.shape prob = softmax(y_pred) From 3a69df94e86ef651f749e0ee5318a57d9b7881be Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Sat, 2 Apr 2022 07:56:33 -0600 Subject: [PATCH 29/37] test: set num_boost_round value to 20 Signed-off-by: Miguel Trejo --- tests/python_package_test/test_engine.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index c8c16ce9b8fc..f0c9ffeebfea 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2308,13 +2308,13 @@ def test_objective_callable_train_binary_classification(): booster = lgb.train( params=params, train_set=train_dataset, - num_boost_round=100 + num_boost_round=20 ) y_pred = logistic_sigmoid(booster.predict(X)) logloss_error = log_loss(y, y_pred) rocauc_error = roc_auc_score(y, y_pred) assert booster.params['objective'] == 'none' - assert logloss_error == pytest.approx(0.25, 0.1) + assert logloss_error == pytest.approx(0.55, 0.1) assert rocauc_error == pytest.approx(0.99, 0.5) @@ -2328,12 +2328,12 @@ def test_objective_callable_train_regression(): booster = lgb.train( params, lgb_train, - num_boost_round=100 + num_boost_round=20 ) y_pred = booster.predict(X) mse_error = mean_squared_error(y, y_pred) assert booster.params['objective'] == 'none' - assert mse_error == pytest.approx(119, 1) + assert mse_error == pytest.approx(286, 1) def test_objective_callable_cv_binary_classification(): @@ -2347,13 +2347,13 @@ def test_objective_callable_cv_binary_classification(): cv_res = lgb.cv( params, train_dataset, - num_boost_round=100, + num_boost_round=20, nfold=3, return_cvbooster=True ) cv_booster = cv_res['cvbooster'].boosters cv_logloss_errors = [ - log_loss(y, logistic_sigmoid(cb.predict(X))) < 0.29 for cb in cv_booster + log_loss(y, logistic_sigmoid(cb.predict(X))) < 0.56 for cb in cv_booster ] cv_objs = [ cb.params['objective'] == 'none' for cb in cv_booster @@ -2373,14 +2373,14 @@ def test_objective_callable_cv_regression(): cv_res = lgb.cv( params_with_metric, lgb_train, - num_boost_round=100, + num_boost_round=20, nfold=3, stratified=False, return_cvbooster=True ) cv_booster = cv_res['cvbooster'].boosters cv_mse_errors = [ - mean_squared_error(y, cb.predict(X)) < 295 for cb in cv_booster + mean_squared_error(y, cb.predict(X)) < 463 for cb in cv_booster ] cv_objs = [ cb.params['objective'] == 'none' for cb in cv_booster From be96bcc65ad13e1f3d7f4a240ec6e1f61236282b Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Sat, 2 Apr 2022 08:31:04 -0600 Subject: [PATCH 30/37] fix: custom objective default_value is none Signed-off-by: Miguel Trejo --- python-package/lightgbm/basic.py | 6 +++--- python-package/lightgbm/engine.py | 22 +++++++++------------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 27913f4ea04d..5c88ac7b27b1 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -3172,7 +3172,7 @@ def eval(self, data, name, feval=None): preds : numpy 1-D array or numpy 2-D array (for multi-class task) The predicted values. For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes]. - If ``fobj`` is specified, predicted values are returned before any transformation, + If custom objective function is used, predicted values are returned before any transformation, e.g. they are raw margin instead of probability of positive class for binary task in this case. eval_data : Dataset A ``Dataset`` to evaluate. @@ -3218,7 +3218,7 @@ def eval_train(self, feval=None): preds : numpy 1-D array or numpy 2-D array (for multi-class task) The predicted values. For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes]. - If ``fobj`` is specified, predicted values are returned before any transformation, + If custom objective function is used, predicted values are returned before any transformation, e.g. they are raw margin instead of probability of positive class for binary task in this case. eval_data : Dataset The training dataset. @@ -3249,7 +3249,7 @@ def eval_valid(self, feval=None): preds : numpy 1-D array or numpy 2-D array (for multi-class task) The predicted values. For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes]. - If ``fobj`` is specified, predicted values are returned before any transformation, + If custom objective function is used, predicted values are returned before any transformation, e.g. they are raw margin instead of probability of positive class for binary task in this case. eval_data : Dataset The validation dataset. diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 3ad7c24efb4f..4927ebe27636 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -12,10 +12,6 @@ from .basic import Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor, _log_warning from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold -_LGBM_CustomObjectiveFunction = Callable[ - [np.ndarray, Dataset], - Tuple[np.ndarray, np.ndarray] -] _LGBM_CustomMetricFunction = Callable[ [np.ndarray, Dataset], Tuple[str, float, bool] @@ -40,8 +36,8 @@ def train( Parameters ---------- params : dict - Parameters for Booster. Values passed through ``params`` take precedence over those - supplied via keyword arguments. + Parameters for training. Values passed through ``params`` take precedence over those + supplied via arguments. train_set : Dataset Data to be trained on. num_boost_round : int, optional (default=100) @@ -58,7 +54,7 @@ def train( preds : numpy 1-D array or numpy 2-D array (for multi-class task) The predicted values. For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes]. - If ``fobj`` is specified, predicted values are returned before any transformation, + If custom objective function is used, predicted values are returned before any transformation, e.g. they are raw margin instead of probability of positive class for binary task in this case. eval_data : Dataset A ``Dataset`` to evaluate. @@ -128,7 +124,7 @@ def train( params = _choose_param_value( main_param_name='objective', params=params, - default_value=None + default_value='none' ) fobj = None if callable(params["objective"]): @@ -379,7 +375,7 @@ def _agg_cv_result(raw_results): def cv(params, train_set, num_boost_round=100, folds=None, nfold=5, stratified=True, shuffle=True, - metrics=None, fobj=None, feval=None, init_model=None, + metrics=None, feval=None, init_model=None, feature_name='auto', categorical_feature='auto', fpreproc=None, seed=0, callbacks=None, eval_train_metric=False, return_cvbooster=False): @@ -388,8 +384,8 @@ def cv(params, train_set, num_boost_round=100, Parameters ---------- params : dict - Parameters for Booster. Values passed through ``params`` take precedence over those - supplied via keyword arguments. + Parameters for training. Values passed through ``params`` take precedence over those + supplied via arguments. train_set : Dataset Data to be trained on. num_boost_round : int, optional (default=100) @@ -417,7 +413,7 @@ def cv(params, train_set, num_boost_round=100, preds : numpy 1-D array or numpy 2-D array (for multi-class task) The predicted values. For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes]. - If ``fobj`` is specified, predicted values are returned before any transformation, + If custom objective function is used, predicted values are returned before any transformation, e.g. they are raw margin instead of probability of positive class for binary task in this case. eval_data : Dataset A ``Dataset`` to evaluate. @@ -496,7 +492,7 @@ def cv(params, train_set, num_boost_round=100, params = _choose_param_value( main_param_name='objective', params=params, - default_value=None + default_value='none' ) fobj = None if callable(params["objective"]): From d10d36ee7da7f2c20a3afd85b2754a739deec9b2 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Sat, 2 Apr 2022 08:36:26 -0600 Subject: [PATCH 31/37] refactor: remove self._fobj Signed-off-by: Miguel Trejo --- python-package/lightgbm/sklearn.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 8a211c4c5540..a1301f98323f 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -596,13 +596,10 @@ def _process_params(self, stage: str) -> Dict[str, Any]: raise ValueError("Unknown LGBMModel type.") if callable(self._objective): if stage == "fit": - self._fobj = _ObjectiveFunctionWrapper(self._objective) - params['objective'] = self._fobj + params['objective'] = _ObjectiveFunctionWrapper(self._objective) else: params['objective'] = 'None' else: - if stage == "fit": - self._fobj = None params['objective'] = self._objective params.pop('importance_type', None) From 5722b738a63794fee0c2375ad805244e75cdbbd3 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Sat, 2 Apr 2022 09:41:06 -0600 Subject: [PATCH 32/37] custom_objective default value is None Signed-off-by: Miguel Trejo --- python-package/lightgbm/engine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 4927ebe27636..71a9a115d342 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -124,7 +124,7 @@ def train( params = _choose_param_value( main_param_name='objective', params=params, - default_value='none' + default_value=None ) fobj = None if callable(params["objective"]): @@ -492,7 +492,7 @@ def cv(params, train_set, num_boost_round=100, params = _choose_param_value( main_param_name='objective', params=params, - default_value='none' + default_value=None ) fobj = None if callable(params["objective"]): From 8dc017a6a7f1a42e2904f13ba80bb9a7df010a2a Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Sun, 10 Apr 2022 21:16:06 -0500 Subject: [PATCH 33/37] refactor: variables name reference dummy_obj Signed-off-by: Miguel Trejo --- tests/python_package_test/test_basic.py | 11 +-- tests/python_package_test/test_engine.py | 86 ++++++++++-------------- tests/python_package_test/utils.py | 8 +++ 3 files changed, 46 insertions(+), 59 deletions(-) diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py index 0f38ad829474..43dcaff6c08b 100644 --- a/tests/python_package_test/test_basic.py +++ b/tests/python_package_test/test_basic.py @@ -14,7 +14,7 @@ import lightgbm as lgb from lightgbm.compat import PANDAS_INSTALLED, pd_DataFrame, pd_Series -from .utils import load_breast_cancer +from .utils import load_breast_cancer, dummy_obj, mse_obj def test_basic(tmp_path): @@ -515,15 +515,6 @@ def test_choose_param_value(): @pytest.mark.parametrize("objective_alias", lgb.basic._ConfigAliases.get("objective")) def test_choose_param_value_objective(objective_alias): - def dummy_obj(preds, train_data): - return np.ones(preds.shape), np.ones(preds.shape) - - def mse_obj(y_pred, dtrain): - y_true = dtrain.get_label() - grad = (y_pred - y_true) - hess = np.ones(len(grad)) - return grad, hess - # If callable is found in objective params = {objective_alias: dummy_obj} params = lgb.basic._choose_param_value( diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index f0c9ffeebfea..1fd889eee5ac 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -20,22 +20,11 @@ import lightgbm as lgb from .utils import (load_boston, load_breast_cancer, load_digits, load_iris, logistic_sigmoid, - make_synthetic_regression, sklearn_multiclass_custom_objective, softmax) + make_synthetic_regression, sklearn_multiclass_custom_objective, softmax, dummy_obj, mse_obj) decreasing_generator = itertools.count(0, -1) -def dummy_obj(preds, train_data): - return np.ones(preds.shape), np.ones(preds.shape) - - -def mse_obj(y_pred, dtrain): - y_true = dtrain.get_label() - grad = (y_pred - y_true) - hess = np.ones(len(grad)) - return grad, hess - - def logloss_obj(preds, train_data): y_true = train_data.get_label() y_pred = logistic_sigmoid(preds) @@ -1897,7 +1886,7 @@ def test_metrics(): lgb_valid = lgb.Dataset(X_test, y_test, reference=lgb_train) evals_result = {} - params_verbose = {'verbose': -1, 'objective': dummy_obj} + params_dummy_obj_verbose = {'verbose': -1, 'objective': dummy_obj} params_obj_verbose = {'objective': 'binary', 'verbose': -1} params_obj_metric_log_verbose = {'objective': 'binary', 'metric': 'binary_logloss', 'verbose': -1} params_obj_metric_err_verbose = {'objective': 'binary', 'metric': 'binary_error', 'verbose': -1} @@ -1906,11 +1895,11 @@ def test_metrics(): 'metric': ['binary_logloss', 'binary_error'], 'verbose': -1} params_obj_metric_none_verbose = {'objective': 'binary', 'metric': 'None', 'verbose': -1} - params_metric_log_verbose = {'objective': dummy_obj, 'metric': 'binary_logloss', 'verbose': -1} + params_dummy_obj_metric_log_verbose = {'objective': dummy_obj, 'metric': 'binary_logloss', 'verbose': -1} params_metric_err_verbose = {'metric': 'binary_error', 'verbose': -1} - params_metric_inv_verbose = {'objective': dummy_obj, 'metric_types': 'invalid_metric', 'verbose': -1} - params_metric_multi_verbose = {'objective': dummy_obj, 'metric': ['binary_logloss', 'binary_error'], 'verbose': -1} - params_metric_none_verbose = {'objective': dummy_obj, 'metric': 'None', 'verbose': -1} + params_dummy_obj_metric_inv_verbose = {'objective': dummy_obj, 'metric_types': 'invalid_metric', 'verbose': -1} + params_dummy_obj_metric_multi_verbose = {'objective': dummy_obj, 'metric': ['binary_logloss', 'binary_error'], 'verbose': -1} + params_dummy_obj_metric_none_verbose = {'objective': dummy_obj, 'metric': 'None', 'verbose': -1} def get_cv_result(params=params_obj_verbose, **kwargs): return lgb.cv(params, lgb_train, num_boost_round=2, **kwargs) @@ -1974,7 +1963,7 @@ def train_booster(params=params_obj_verbose, **kwargs): # fobj, no feval # no default metric - res = get_cv_result(params=params_verbose) + res = get_cv_result(params=params_dummy_obj_verbose) assert len(res) == 0 # metric in params @@ -1983,23 +1972,23 @@ def train_booster(params=params_obj_verbose, **kwargs): assert 'valid binary_error-mean' in res # metric in args - res = get_cv_result(params=params_verbose, metrics='binary_error') + res = get_cv_result(params=params_dummy_obj_verbose, metrics='binary_error') assert len(res) == 2 assert 'valid binary_error-mean' in res # metric in args overwrites its' alias in params - res = get_cv_result(params=params_metric_inv_verbose, metrics='binary_error') + res = get_cv_result(params=params_dummy_obj_metric_inv_verbose, metrics='binary_error') assert len(res) == 2 assert 'valid binary_error-mean' in res # multiple metrics in params - res = get_cv_result(params=params_metric_multi_verbose) + res = get_cv_result(params=params_dummy_obj_metric_multi_verbose) assert len(res) == 4 assert 'valid binary_logloss-mean' in res assert 'valid binary_error-mean' in res # multiple metrics in args - res = get_cv_result(params=params_verbose, + res = get_cv_result(params=params_dummy_obj_verbose, metrics=['binary_logloss', 'binary_error']) assert len(res) == 4 assert 'valid binary_logloss-mean' in res @@ -2057,7 +2046,7 @@ def train_booster(params=params_obj_verbose, **kwargs): # fobj, feval # no default metric, only custom one - res = get_cv_result(params=params_verbose, feval=constant_metric) + res = get_cv_result(params=params_dummy_obj_verbose, feval=constant_metric) assert len(res) == 2 assert 'valid error-mean' in res @@ -2068,28 +2057,28 @@ def train_booster(params=params_obj_verbose, **kwargs): assert 'valid error-mean' in res # metric in args with custom one - res = get_cv_result(params=params_verbose, + res = get_cv_result(params=params_dummy_obj_verbose, feval=constant_metric, metrics='binary_error') assert len(res) == 4 assert 'valid binary_error-mean' in res assert 'valid error-mean' in res # metric in args overwrites one in params, custom one is evaluated too - res = get_cv_result(params=params_metric_inv_verbose, + res = get_cv_result(params=params_dummy_obj_metric_inv_verbose, feval=constant_metric, metrics='binary_error') assert len(res) == 4 assert 'valid binary_error-mean' in res assert 'valid error-mean' in res # multiple metrics in params with custom one - res = get_cv_result(params=params_metric_multi_verbose, feval=constant_metric) + res = get_cv_result(params=params_dummy_obj_metric_multi_verbose, feval=constant_metric) assert len(res) == 6 assert 'valid binary_logloss-mean' in res assert 'valid binary_error-mean' in res assert 'valid error-mean' in res # multiple metrics in args with custom one - res = get_cv_result(params=params_verbose, feval=constant_metric, + res = get_cv_result(params=params_dummy_obj_verbose, feval=constant_metric, metrics=['binary_logloss', 'binary_error']) assert len(res) == 6 assert 'valid binary_logloss-mean' in res @@ -2097,7 +2086,7 @@ def train_booster(params=params_obj_verbose, **kwargs): assert 'valid error-mean' in res # custom metric is evaluated despite 'None' is passed - res = get_cv_result(params=params_metric_none_verbose, feval=constant_metric) + res = get_cv_result(params=params_dummy_obj_metric_none_verbose, feval=constant_metric) assert len(res) == 2 assert 'valid error-mean' in res @@ -2131,16 +2120,16 @@ def train_booster(params=params_obj_verbose, **kwargs): # fobj, no feval # no default metric - train_booster(params=params_verbose) + train_booster(params=params_dummy_obj_verbose) assert len(evals_result) == 0 # metric in params - train_booster(params=params_metric_log_verbose) + train_booster(params=params_dummy_obj_metric_log_verbose) assert len(evals_result['valid_0']) == 1 assert 'binary_logloss' in evals_result['valid_0'] # multiple metrics in params - train_booster(params=params_metric_multi_verbose) + train_booster(params=params_dummy_obj_metric_multi_verbose) assert len(evals_result['valid_0']) == 2 assert 'binary_logloss' in evals_result['valid_0'] assert 'binary_error' in evals_result['valid_0'] @@ -2178,25 +2167,25 @@ def train_booster(params=params_obj_verbose, **kwargs): # fobj, feval # no default metric, only custom one - train_booster(params=params_verbose, feval=constant_metric) + train_booster(params=params_dummy_obj_verbose, feval=constant_metric) assert len(evals_result['valid_0']) == 1 assert 'error' in evals_result['valid_0'] # metric in params with custom one - train_booster(params=params_metric_log_verbose, feval=constant_metric) + train_booster(params=params_dummy_obj_metric_log_verbose, feval=constant_metric) assert len(evals_result['valid_0']) == 2 assert 'binary_logloss' in evals_result['valid_0'] assert 'error' in evals_result['valid_0'] # multiple metrics in params with custom one - train_booster(params=params_metric_multi_verbose, feval=constant_metric) + train_booster(params=params_dummy_obj_metric_multi_verbose, feval=constant_metric) assert len(evals_result['valid_0']) == 3 assert 'binary_logloss' in evals_result['valid_0'] assert 'binary_error' in evals_result['valid_0'] assert 'error' in evals_result['valid_0'] # custom metric is evaluated despite 'None' is passed - train_booster(params=params_metric_none_verbose, feval=constant_metric) + train_booster(params=params_dummy_obj_metric_none_verbose, feval=constant_metric) assert len(evals_result) == 1 assert 'error' in evals_result['valid_0'] @@ -2207,11 +2196,11 @@ def train_booster(params=params_obj_verbose, **kwargs): for obj_multi_alias in obj_multi_aliases: # Custom objective replaces multiclass params_obj_class_3_verbose = {'objective': obj_multi_alias, 'num_class': 3, 'verbose': -1} - params_obj_class_3_custom_obj = {'objective': dummy_obj, 'num_class': 3, 'verbose': -1} + params_dummy_obj_class_3_verbose = {'objective': dummy_obj, 'num_class': 3, 'verbose': -1} params_obj_class_1_verbose = {'objective': obj_multi_alias, 'num_class': 1, 'verbose': -1} - params_obj_class_1_custom_obj = {'objective': dummy_obj, 'num_class': 1, 'verbose': -1} + params_dummy_obj_class_1_verbose = {'objective': dummy_obj, 'num_class': 1, 'verbose': -1} params_obj_verbose = {'objective': obj_multi_alias, 'verbose': -1} - params_obj_custom_obj = {'objective': dummy_obj, 'verbose': -1} + params_dummy_obj_verbose= {'objective': dummy_obj, 'verbose': -1} # multiclass default metric res = get_cv_result(params_obj_class_3_verbose) assert len(res) == 2 @@ -2222,19 +2211,19 @@ def train_booster(params=params_obj_verbose, **kwargs): assert 'valid multi_logloss-mean' in res assert 'valid error-mean' in res # multiclass metric alias with custom one for custom objective - res = get_cv_result(params_obj_class_3_custom_obj, feval=constant_metric) + res = get_cv_result(params_dummy_obj_class_3_verbose, feval=constant_metric) assert len(res) == 2 assert 'valid error-mean' in res # no metric for invalid class_num - res = get_cv_result(params_obj_class_1_custom_obj) + res = get_cv_result(params_dummy_obj_class_1_verbose) assert len(res) == 0 # custom metric for invalid class_num - res = get_cv_result(params_obj_class_1_custom_obj, feval=constant_metric) + res = get_cv_result(params_dummy_obj_class_1_verbose, feval=constant_metric) assert len(res) == 2 assert 'valid error-mean' in res # multiclass metric alias with custom one with invalid class_num with pytest.raises(lgb.basic.LightGBMError): - get_cv_result(params_obj_class_1_custom_obj, metrics=obj_multi_alias, + get_cv_result(params_dummy_obj_class_1_verbose, metrics=obj_multi_alias, feval=constant_metric) # multiclass default metric without num_class with pytest.raises(lgb.basic.LightGBMError): @@ -2256,20 +2245,20 @@ def train_booster(params=params_obj_verbose, **kwargs): with pytest.raises(lgb.basic.LightGBMError): get_cv_result(params_class_3_verbose) # no metric with non-default num_class for custom objective - res = get_cv_result(params_obj_class_3_custom_obj) + res = get_cv_result(params_dummy_obj_class_3_verbose) assert len(res) == 0 for metric_multi_alias in obj_multi_aliases + ['multi_logloss']: # multiclass metric alias for custom objective - res = get_cv_result(params_obj_class_3_custom_obj, metrics=metric_multi_alias) + res = get_cv_result(params_dummy_obj_class_3_verbose, metrics=metric_multi_alias) assert len(res) == 2 assert 'valid multi_logloss-mean' in res # multiclass metric for custom objective - res = get_cv_result(params_obj_class_3_custom_obj, metrics='multi_error') + res = get_cv_result(params_dummy_obj_class_3_verbose, metrics='multi_error') assert len(res) == 2 assert 'valid multi_error-mean' in res # binary metric with non-default num_class for custom objective with pytest.raises(lgb.basic.LightGBMError): - get_cv_result(params_obj_class_3_custom_obj, metrics='binary_error') + get_cv_result(params_dummy_obj_class_3_verbose, metrics='binary_error') def test_multiple_feval_train(): @@ -2367,8 +2356,7 @@ def test_objective_callable_cv_regression(): lgb_train = lgb.Dataset(X, y) params_with_metric = { 'verbose': -1, - 'objective': mse_obj, - 'metric': 'l2' + 'objective': mse_obj } cv_res = lgb.cv( params_with_metric, @@ -2445,7 +2433,7 @@ def custom_obj(y_pred, ds): builtin_obj_bst = lgb.train(params, ds, num_boost_round=10) builtin_obj_preds = builtin_obj_bst.predict(X) - params = {'objective': custom_obj, 'num_class': 3, 'num_leaves': 7} + params['objective'] = custom_obj custom_obj_bst = lgb.train(params, ds, num_boost_round=10) custom_obj_preds = softmax(custom_obj_bst.predict(X)) diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py index 3793cc354ad8..d0ce2ab7df0a 100644 --- a/tests/python_package_test/utils.py +++ b/tests/python_package_test/utils.py @@ -118,6 +118,14 @@ def make_ranking(n_samples=100, n_features=20, n_informative=5, gmax=2, def make_synthetic_regression(n_samples=100): return sklearn.datasets.make_regression(n_samples, n_features=4, n_informative=2, random_state=42) +def dummy_obj(preds, train_data): + return np.ones(preds.shape), np.ones(preds.shape) + +def mse_obj(y_pred, dtrain): + y_true = dtrain.get_label() + grad = (y_pred - y_true) + hess = np.ones(len(grad)) + return grad, hess def softmax(x): row_wise_max = np.max(x, axis=1).reshape(-1, 1) From 8d4740a38d3f4f485a613c994ae388902483b21b Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Sun, 10 Apr 2022 21:22:48 -0500 Subject: [PATCH 34/37] linter errors --- tests/python_package_test/test_engine.py | 2 +- tests/python_package_test/utils.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 1fd889eee5ac..099f46cfde52 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2200,7 +2200,7 @@ def train_booster(params=params_obj_verbose, **kwargs): params_obj_class_1_verbose = {'objective': obj_multi_alias, 'num_class': 1, 'verbose': -1} params_dummy_obj_class_1_verbose = {'objective': dummy_obj, 'num_class': 1, 'verbose': -1} params_obj_verbose = {'objective': obj_multi_alias, 'verbose': -1} - params_dummy_obj_verbose= {'objective': dummy_obj, 'verbose': -1} + params_dummy_obj_verbose = {'objective': dummy_obj, 'verbose': -1} # multiclass default metric res = get_cv_result(params_obj_class_3_verbose) assert len(res) == 2 diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py index d0ce2ab7df0a..472343091566 100644 --- a/tests/python_package_test/utils.py +++ b/tests/python_package_test/utils.py @@ -118,8 +118,10 @@ def make_ranking(n_samples=100, n_features=20, n_informative=5, gmax=2, def make_synthetic_regression(n_samples=100): return sklearn.datasets.make_regression(n_samples, n_features=4, n_informative=2, random_state=42) + def dummy_obj(preds, train_data): - return np.ones(preds.shape), np.ones(preds.shape) + return np.ones(preds.shape), np.ones(preds.shape) + def mse_obj(y_pred, dtrain): y_true = dtrain.get_label() @@ -127,6 +129,7 @@ def mse_obj(y_pred, dtrain): hess = np.ones(len(grad)) return grad, hess + def softmax(x): row_wise_max = np.max(x, axis=1).reshape(-1, 1) exp_x = np.exp(x - row_wise_max) From 29aff3f72aa7ad0960a4c1252ffcb8022872f9fc Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Sun, 10 Apr 2022 22:20:16 -0500 Subject: [PATCH 35/37] fix: process objective parameter when calling predict Signed-off-by: Miguel Trejo --- python-package/lightgbm/sklearn.py | 12 +++++++++++- tests/python_package_test/test_basic.py | 2 +- tests/python_package_test/test_engine.py | 4 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index a1301f98323f..9014460b5b07 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -598,7 +598,17 @@ def _process_params(self, stage: str) -> Dict[str, Any]: if stage == "fit": params['objective'] = _ObjectiveFunctionWrapper(self._objective) else: - params['objective'] = 'None' + if isinstance(self, LGBMRegressor): + params['objective'] = "regression" + elif isinstance(self, LGBMClassifier): + if self._n_classes > 2: + params['objective'] = "multiclass" + else: + params['objective'] = "binary" + elif isinstance(self, LGBMRanker): + params['objective'] = "lambdarank" + else: + raise ValueError("Unknown LGBMModel type.") else: params['objective'] = self._objective diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py index 43dcaff6c08b..4d6c367d8150 100644 --- a/tests/python_package_test/test_basic.py +++ b/tests/python_package_test/test_basic.py @@ -14,7 +14,7 @@ import lightgbm as lgb from lightgbm.compat import PANDAS_INSTALLED, pd_DataFrame, pd_Series -from .utils import load_breast_cancer, dummy_obj, mse_obj +from .utils import dummy_obj, load_breast_cancer, mse_obj def test_basic(tmp_path): diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 099f46cfde52..5e4a7140bf29 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -19,8 +19,8 @@ import lightgbm as lgb -from .utils import (load_boston, load_breast_cancer, load_digits, load_iris, logistic_sigmoid, - make_synthetic_regression, sklearn_multiclass_custom_objective, softmax, dummy_obj, mse_obj) +from .utils import (dummy_obj, load_boston, load_breast_cancer, load_digits, load_iris, logistic_sigmoid, + make_synthetic_regression, mse_obj, sklearn_multiclass_custom_objective, softmax) decreasing_generator = itertools.count(0, -1) From c26c2a9234872eeb2a94ae07527d607be3e71c58 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Sun, 10 Apr 2022 22:28:22 -0500 Subject: [PATCH 36/37] linter errors --- python-package/lightgbm/sklearn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 9014460b5b07..56fd23008208 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -599,7 +599,7 @@ def _process_params(self, stage: str) -> Dict[str, Any]: params['objective'] = _ObjectiveFunctionWrapper(self._objective) else: if isinstance(self, LGBMRegressor): - params['objective'] = "regression" + params['objective'] = "regression" elif isinstance(self, LGBMClassifier): if self._n_classes > 2: params['objective'] = "multiclass" From 36ca839f6491f0c120d4fe94dc7bf3c22fdf8971 Mon Sep 17 00:00:00 2001 From: Miguel Trejo Date: Sun, 17 Apr 2022 08:49:31 -0500 Subject: [PATCH 37/37] fix: objective is None during predict call Signed-off-by: Miguel Trejo --- python-package/lightgbm/sklearn.py | 12 +----------- tests/python_package_test/test_engine.py | 11 +++++------ 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 56fd23008208..a1301f98323f 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -598,17 +598,7 @@ def _process_params(self, stage: str) -> Dict[str, Any]: if stage == "fit": params['objective'] = _ObjectiveFunctionWrapper(self._objective) else: - if isinstance(self, LGBMRegressor): - params['objective'] = "regression" - elif isinstance(self, LGBMClassifier): - if self._n_classes > 2: - params['objective'] = "multiclass" - else: - params['objective'] = "binary" - elif isinstance(self, LGBMRanker): - params['objective'] = "lambdarank" - else: - raise ValueError("Unknown LGBMModel type.") + params['objective'] = 'None' else: params['objective'] = self._objective diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 5e4a7140bf29..df840a768539 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -1896,7 +1896,7 @@ def test_metrics(): 'verbose': -1} params_obj_metric_none_verbose = {'objective': 'binary', 'metric': 'None', 'verbose': -1} params_dummy_obj_metric_log_verbose = {'objective': dummy_obj, 'metric': 'binary_logloss', 'verbose': -1} - params_metric_err_verbose = {'metric': 'binary_error', 'verbose': -1} + params_dummy_obj_metric_err_verbose = {'metric': 'binary_error', 'objective': dummy_obj, 'verbose': -1} params_dummy_obj_metric_inv_verbose = {'objective': dummy_obj, 'metric_types': 'invalid_metric', 'verbose': -1} params_dummy_obj_metric_multi_verbose = {'objective': dummy_obj, 'metric': ['binary_logloss', 'binary_error'], 'verbose': -1} params_dummy_obj_metric_none_verbose = {'objective': dummy_obj, 'metric': 'None', 'verbose': -1} @@ -1967,7 +1967,7 @@ def train_booster(params=params_obj_verbose, **kwargs): assert len(res) == 0 # metric in params - res = get_cv_result(params=params_metric_err_verbose) + res = get_cv_result(params=params_dummy_obj_metric_err_verbose) assert len(res) == 2 assert 'valid binary_error-mean' in res @@ -2051,7 +2051,7 @@ def train_booster(params=params_obj_verbose, **kwargs): assert 'valid error-mean' in res # metric in params with custom one - res = get_cv_result(params=params_metric_err_verbose, feval=constant_metric) + res = get_cv_result(params=params_dummy_obj_metric_err_verbose, feval=constant_metric) assert len(res) == 4 assert 'valid binary_error-mean' in res assert 'valid error-mean' in res @@ -2197,7 +2197,6 @@ def train_booster(params=params_obj_verbose, **kwargs): # Custom objective replaces multiclass params_obj_class_3_verbose = {'objective': obj_multi_alias, 'num_class': 3, 'verbose': -1} params_dummy_obj_class_3_verbose = {'objective': dummy_obj, 'num_class': 3, 'verbose': -1} - params_obj_class_1_verbose = {'objective': obj_multi_alias, 'num_class': 1, 'verbose': -1} params_dummy_obj_class_1_verbose = {'objective': dummy_obj, 'num_class': 1, 'verbose': -1} params_obj_verbose = {'objective': obj_multi_alias, 'verbose': -1} params_dummy_obj_verbose = {'objective': dummy_obj, 'verbose': -1} @@ -2354,12 +2353,12 @@ def test_objective_callable_cv_binary_classification(): def test_objective_callable_cv_regression(): X, y = make_synthetic_regression() lgb_train = lgb.Dataset(X, y) - params_with_metric = { + params = { 'verbose': -1, 'objective': mse_obj } cv_res = lgb.cv( - params_with_metric, + params, lgb_train, num_boost_round=20, nfold=3,