Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Divide metric into train_metric and valid_metric #1912

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion docs/Parameters.rst
Original file line number Diff line number Diff line change
Expand Up @@ -791,11 +791,23 @@ Metric Parameters

- support multiple metrics, separated by ``,``

- ``train_metric`` :raw-html:`<a id="train_metric" title="Permalink to this parameter" href="#train_metric">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = multi-enum

- metric(s) to be evaluated on the train set passed into evaluation set

- if ``metric`` is set, ``train_metric`` will be overwritten

- ``valid_metric`` :raw-html:`<a id="valid_metric" title="Permalink to this parameter" href="#valid_metric">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = multi-enum

- metric(s) to be evaluated on the valid set passed into evaluation set

- if ``metric`` is set, ``valid_metric`` will be overwritten

- ``metric_freq`` :raw-html:`<a id="metric_freq" title="Permalink to this parameter" href="#metric_freq">&#x1F517;&#xFE0E;</a>`, default = ``1``, type = int, aliases: ``output_freq``, constraints: ``metric_freq > 0``

- frequency for metric output

- ``is_provide_training_metric`` :raw-html:`<a id="is_provide_training_metric" title="Permalink to this parameter" href="#is_provide_training_metric">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool, aliases: ``training_metric``, ``is_training_metric``, ``train_metric``
- ``is_provide_training_metric`` :raw-html:`<a id="is_provide_training_metric" title="Permalink to this parameter" href="#is_provide_training_metric">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool, aliases: ``training_metric``, ``is_training_metric``

- set this to ``true`` to output metric result over training dataset

Expand Down
4 changes: 2 additions & 2 deletions include/LightGBM/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -484,15 +484,15 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterNumberOfTotalModel(BoosterHandle handle, int*
* \param out_len total number of eval results
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int* out_len);
LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int data_idx, int* out_len);

/*!
* \brief Get name of eval
* \param out_len total number of eval results
* \param out_strs names of eval result, need to pre-allocate memory before call this
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalNames(BoosterHandle handle, int* out_len, char** out_strs);
LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalNames(BoosterHandle handle, int data_idx, int* out_len, char** out_strs);

/*!
* \brief Get name of features
Expand Down
16 changes: 15 additions & 1 deletion include/LightGBM/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -699,12 +699,26 @@ struct Config {
// desc = support multiple metrics, separated by ``,``
std::vector<std::string> metric;

// [doc-only]
// default = ""
// type = multi-enum
// desc = metric(s) to be evaluated on the train set passed into evaluation set
// desc = if ``metric`` is set, ``train_metric`` will be overwritten
std::vector<std::string> train_metric;

// [doc-only]
// default = ""
// type = multi-enum
// desc = metric(s) to be evaluated on the valid set passed into evaluation set
// desc = if ``metric`` is set, ``valid_metric`` will be overwritten
std::vector<std::string> valid_metric;

// check = >0
// alias = output_freq
// desc = frequency for metric output
int metric_freq = 1;

// alias = training_metric, is_training_metric, train_metric
// alias = training_metric, is_training_metric
// desc = set this to ``true`` to output metric result over training dataset
// desc = **Note**: can be used only in CLI version
bool is_provide_training_metric = false;
Expand Down
52 changes: 24 additions & 28 deletions python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1497,7 +1497,6 @@ def __init__(self, params=None, train_set=None, model_file=None, silent=False):
"""
self.handle = None
self.network = False
self.__need_reload_eval_info = True
self.__train_data_name = "training"
self.__attr = {}
self.__set_objective_to_none = False
Expand Down Expand Up @@ -1553,7 +1552,6 @@ def __init__(self, params=None, train_set=None, model_file=None, silent=False):
# buffer for inner predict
self.__inner_predict_buffer = [None]
self.__is_predicted_cur_iter = [False]
self.__get_eval_info()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the reload is deleted?
I think this will somehow affect the performance.

self.pandas_categorical = train_set.pandas_categorical
elif model_file is not None:
# Prediction task
Expand Down Expand Up @@ -1733,8 +1731,6 @@ def reset_parameter(self, params):
self : Booster
Booster with new parameters.
"""
if any(metric_alias in params for metric_alias in ('metric', 'metrics', 'metric_types')):
self.__need_reload_eval_info = True
params_str = param_dict_to_str(params)
if params_str:
_safe_call(_LIB.LGBM_BoosterResetParameter(
Expand Down Expand Up @@ -2335,7 +2331,7 @@ def __inner_eval(self, data_name, data_idx, feval=None):
"""Evaluate training or validation data."""
if data_idx >= self.__num_dataset:
raise ValueError("Data_idx should be smaller than number of dataset")
self.__get_eval_info()
self.__get_eval_info(data_idx)
ret = []
if self.__num_inner_eval > 0:
result = np.zeros(self.__num_inner_eval, dtype=np.float64)
Expand Down Expand Up @@ -2388,31 +2384,31 @@ def __inner_predict(self, data_idx):
self.__is_predicted_cur_iter[data_idx] = True
return self.__inner_predict_buffer[data_idx]

def __get_eval_info(self):
def __get_eval_info(self, data_idx):
"""Get inner evaluation count and names."""
if self.__need_reload_eval_info:
self.__need_reload_eval_info = False
out_num_eval = ctypes.c_int(0)
# Get num of inner evals
_safe_call(_LIB.LGBM_BoosterGetEvalCounts(
out_num_eval = ctypes.c_int(0)
# Get num of inner evals
_safe_call(_LIB.LGBM_BoosterGetEvalCounts(
self.handle,
ctypes.c_int(data_idx),
ctypes.byref(out_num_eval)))
self.__num_inner_eval = out_num_eval.value
if self.__num_inner_eval > 0:
# Get name of evals
tmp_out_len = ctypes.c_int(0)
string_buffers = [ctypes.create_string_buffer(255) for i in range_(self.__num_inner_eval)]
ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers))
_safe_call(_LIB.LGBM_BoosterGetEvalNames(
self.handle,
ctypes.byref(out_num_eval)))
self.__num_inner_eval = out_num_eval.value
if self.__num_inner_eval > 0:
# Get name of evals
tmp_out_len = ctypes.c_int(0)
string_buffers = [ctypes.create_string_buffer(255) for i in range_(self.__num_inner_eval)]
ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers))
_safe_call(_LIB.LGBM_BoosterGetEvalNames(
self.handle,
ctypes.byref(tmp_out_len),
ptr_string_buffers))
if self.__num_inner_eval != tmp_out_len.value:
raise ValueError("Length of eval names doesn't equal with num_evals")
self.__name_inner_eval = \
[string_buffers[i].value.decode() for i in range_(self.__num_inner_eval)]
self.__higher_better_inner_eval = \
[name.startswith(('auc', 'ndcg@', 'map@')) for name in self.__name_inner_eval]
ctypes.c_int(data_idx),
ctypes.byref(tmp_out_len),
ptr_string_buffers))
if self.__num_inner_eval != tmp_out_len.value:
raise ValueError("Length of eval names doesn't equal with num_evals")
self.__name_inner_eval = \
[string_buffers[i].value.decode() for i in range_(self.__num_inner_eval)]
self.__higher_better_inner_eval = \
[name.startswith(('auc', 'ndcg@', 'map@')) for name in self.__name_inner_eval]

def attr(self, key):
"""Get attribute string from the Booster.
Expand Down
64 changes: 35 additions & 29 deletions python-package/lightgbm/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,8 @@ def fit(self, X, y,
sample_weight=None, init_score=None, group=None,
eval_set=None, eval_names=None, eval_sample_weight=None,
eval_class_weight=None, eval_init_score=None, eval_group=None,
eval_metric=None, early_stopping_rounds=None, verbose=True,
eval_metric=None, eval_train_metric=None, eval_valid_metric=None,
Copy link
Collaborator

@StrikerRUS StrikerRUS Dec 20, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that these new args are really needed. One can achieve the needed result in their rare case with params.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that these new args are really needed.
One can achieve the needed result in their rare case with params.

I don't think they are not rere cases, because CatBoost, which is one of the most famous GBDT library and was acepted in NIPS 2018, supports setting of individual metrics to training and validation.
This PR (change) allows us to set evaluation metric different from objective function, which is useful and needed and thus CatBoost implemetned it.

cf. eval_metric in
https://tech.yandex.com/catboost/doc/dg/concepts/python-reference_parameters-list-docpage/#python-reference_parameters-list.

However, as you you pointed, eval_metric, eval_train_metric and eval_valid_metric are confusing for many users...

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@guolinke What do you think? My position is that corresponding keys in params is enough because new arguments introduce huge number of possible combination which is very hard to support.

I remember we were getting rid of some arguments in favor of their aliases in params some time ago (max_bin argument is the first thing that comes to my mind). So, new args is against this policy.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, I think to have 3 similar parameters is confusing. BTW, eval_metric could be a function, while eval_train_metric and eval_valid_metric only can be string, these inconsistency will further cause confusing...

early_stopping_rounds=None, verbose=True,
feature_name='auto', categorical_feature='auto', callbacks=None):
"""Build a gradient boosting model from the training set (X, y).

Expand Down Expand Up @@ -363,6 +364,12 @@ def fit(self, X, y,
If callable, it should be a custom evaluation metric, see note below for more details.
In either case, the ``metric`` from the model parameters will be evaluated and used as well.
Default: 'l2' for LGBMRegressor, 'logloss' for LGBMClassifier, 'ndcg' for LGBMRanker.
eval_train_metric : string, list of strings, callable or None, optional (default=None)
metric(s) to be evaluated on the train set passed into ``eval_set``
if ``metric`` is set, ``eval_train_metric`` will be overwritten
eval_valid_metric : string, list of strings, callable or None, optional (default=None)
metric(s) to be evaluated on the valid set passed into ``eval_set``
if ``metric`` is set, ``eval_valid_metric`` will be overwritten
early_stopping_rounds : int or None, optional (default=None)
Activates early stopping. The model will train until the validation score stops improving.
Validation score needs to improve at least every ``early_stopping_rounds`` round(s)
Expand Down Expand Up @@ -460,24 +467,12 @@ def fit(self, X, y,
feval = _eval_function_wrapper(eval_metric)
else:
feval = None
# register default metric for consistency with callable eval_metric case
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this change has backward compatibility because default value for metric is set in c++ as the same way as the current code.

Yeah, but I'm talking about the Python side (we had many issues about inconsistent behavior at Python side and this code solved them). So, this code should be leaved as is to prevent the future changes in current behavior. Please see this our old conversation (setting default metric only at cpp side is not enough): #1589 (comment)

Copy link
Collaborator

@StrikerRUS StrikerRUS Jan 15, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@henry0312 Please fix this (leave old code) to get this PR merged.
I think that in case of we don't introduce new arguments (please see #1912 (review)), it's enought just to add something like this after the old code:

if params['train_metric'] or params['valid_metric']:
    params['metric'] = 'None'

to support this new behavior

if metric is set, train_metric/valid_metric will be overwritten

I believe that this is the most painless and not new-odd-behavior-introducing way to support new train/valid metric support 😃

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@henry0312 @guolinke I've decided to dive into the "metrics hell" again and started to write a big test which reflects the current behavior of metrics handling: 8265469 (metrics_test branch). The next step is sklearn - the hardest part. After this, I'll create a PR.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR has been created: #1954.

@henry0312 I hope it will help you abandon the idea to create more arguments for metrics 😄
I like the idea of separate metrics a lot, but seriously, corresponding keys in params is quite enough.

original_metric = self._objective if isinstance(self._objective, string_type) else None
if original_metric is None:
# try to deduce from class instance
if isinstance(self, LGBMRegressor):
original_metric = "l2"
elif isinstance(self, LGBMClassifier):
original_metric = "multi_logloss" if self._n_classes > 2 else "binary_logloss"
elif isinstance(self, LGBMRanker):
original_metric = "ndcg"
# overwrite default metric by explicitly set metric
for metric_alias in ['metric', 'metrics', 'metric_types']:
if metric_alias in params:
original_metric = params.pop(metric_alias)
# concatenate metric from params (or default if not provided in params) and eval_metric
original_metric = [original_metric] if isinstance(original_metric, (string_type, type(None))) else original_metric
eval_metric = [eval_metric] if isinstance(eval_metric, (string_type, type(None))) else eval_metric
params['metric'] = set(original_metric + eval_metric)
if eval_metric:
# for backward compatibility, `metric` is prefered to `train_metric` and `valid_metric`
params['metric'] = eval_metric
else:
params['train_metric'] = eval_train_metric
params['valid_metric'] = eval_valid_metric

if not isinstance(X, DataFrame):
_X, _y = _LGBMCheckXY(X, y, accept_sparse=True, force_all_finite=False, ensure_min_samples=2)
Expand Down Expand Up @@ -670,7 +665,8 @@ class LGBMRegressor(LGBMModel, _LGBMRegressorBase):
def fit(self, X, y,
sample_weight=None, init_score=None,
eval_set=None, eval_names=None, eval_sample_weight=None,
eval_init_score=None, eval_metric=None, early_stopping_rounds=None,
eval_init_score=None, eval_metric=None, eval_train_metric=None,
eval_valid_metric=None, early_stopping_rounds=None,
verbose=True, feature_name='auto', categorical_feature='auto', callbacks=None):
"""Docstring is inherited from the LGBMModel."""
super(LGBMRegressor, self).fit(X, y, sample_weight=sample_weight,
Expand All @@ -679,6 +675,8 @@ def fit(self, X, y,
eval_sample_weight=eval_sample_weight,
eval_init_score=eval_init_score,
eval_metric=eval_metric,
eval_train_metric=eval_train_metric,
eval_valid_metric=eval_valid_metric,
early_stopping_rounds=early_stopping_rounds,
verbose=verbose, feature_name=feature_name,
categorical_feature=categorical_feature,
Expand All @@ -697,6 +695,7 @@ def fit(self, X, y,
sample_weight=None, init_score=None,
eval_set=None, eval_names=None, eval_sample_weight=None,
eval_class_weight=None, eval_init_score=None, eval_metric=None,
eval_train_metric=None, eval_valid_metric=None,
early_stopping_rounds=None, verbose=True,
feature_name='auto', categorical_feature='auto', callbacks=None):
"""Docstring is inherited from the LGBMModel."""
Expand All @@ -712,15 +711,17 @@ def fit(self, X, y,
ova_aliases = ("multiclassova", "multiclass_ova", "ova", "ovr")
if self._objective not in ova_aliases and not callable(self._objective):
self._objective = "multiclass"
if eval_metric in ('logloss', 'binary_logloss'):
eval_metric = "multi_logloss"
elif eval_metric in ('error', 'binary_error'):
eval_metric = "multi_error"
for _metric in (eval_metric, eval_train_metric, eval_valid_metric):
if _metric in ('logloss', 'binary_logloss'):
_metric = "multi_logloss"
elif _metric in ('error', 'binary_error'):
_metric = "multi_error"
else:
if eval_metric in ('logloss', 'multi_logloss'):
eval_metric = 'binary_logloss'
elif eval_metric in ('error', 'multi_error'):
eval_metric = 'binary_error'
for _metric in (eval_metric, eval_train_metric, eval_valid_metric):
if _metric in ('logloss', 'multi_logloss'):
_metric = 'binary_logloss'
elif _metric in ('error', 'multi_error'):
_metric = 'binary_error'

if eval_set is not None:
if isinstance(eval_set, tuple):
Expand All @@ -738,6 +739,8 @@ def fit(self, X, y,
eval_class_weight=eval_class_weight,
eval_init_score=eval_init_score,
eval_metric=eval_metric,
eval_train_metric=eval_train_metric,
eval_valid_metric=eval_valid_metric,
early_stopping_rounds=early_stopping_rounds,
verbose=verbose, feature_name=feature_name,
categorical_feature=categorical_feature,
Expand Down Expand Up @@ -825,7 +828,8 @@ def fit(self, X, y,
sample_weight=None, init_score=None, group=None,
eval_set=None, eval_names=None, eval_sample_weight=None,
eval_init_score=None, eval_group=None, eval_metric=None,
eval_at=[1], early_stopping_rounds=None, verbose=True,
eval_train_metric=None, eval_valid_metric=None, eval_at=[1],
early_stopping_rounds=None, verbose=True,
feature_name='auto', categorical_feature='auto', callbacks=None):
"""Docstring is inherited from the LGBMModel."""
# check group data
Expand All @@ -851,6 +855,8 @@ def fit(self, X, y,
eval_sample_weight=eval_sample_weight,
eval_init_score=eval_init_score, eval_group=eval_group,
eval_metric=eval_metric,
eval_train_metric=eval_train_metric,
eval_valid_metric=eval_valid_metric,
early_stopping_rounds=early_stopping_rounds,
verbose=verbose, feature_name=feature_name,
categorical_feature=categorical_feature,
Expand Down
6 changes: 3 additions & 3 deletions src/application/application.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ void Application::LoadData() {
}
// create training metric
if (config_.is_provide_training_metric) {
for (auto metric_type : config_.metric) {
for (auto metric_type : config_.train_metric) {
auto metric = std::unique_ptr<Metric>(Metric::CreateMetric(metric_type, config_));
if (metric == nullptr) { continue; }
metric->Init(train_data_->metadata(), train_data_->num_data());
Expand All @@ -126,7 +126,7 @@ void Application::LoadData() {
train_metric_.shrink_to_fit();


if (!config_.metric.empty()) {
if (!config_.train_metric.empty()) {
// only when have metrics then need to construct validation data

// Add validation data, if it exists
Expand All @@ -146,7 +146,7 @@ void Application::LoadData() {

// add metric for validation data
valid_metrics_.emplace_back();
for (auto metric_type : config_.metric) {
for (auto metric_type : config_.valid_metric) {
auto metric = std::unique_ptr<Metric>(Metric::CreateMetric(metric_type, config_));
if (metric == nullptr) { continue; }
metric->Init(valid_datas_.back()->metadata(),
Expand Down
Loading