From 5098d643a6d635d94c65dcec191dd7477e70e400 Mon Sep 17 00:00:00 2001 From: Paul Koch Date: Tue, 31 Dec 2024 17:34:56 -0800 Subject: [PATCH] update python default parameters (n_jobs=-1, outer_bags=16, cat_smooth=math.inf) --- python/interpret-core/interpret/develop.py | 3 ++- .../interpret/glassbox/_ebm/_ebm.py | 24 +++++++++---------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/python/interpret-core/interpret/develop.py b/python/interpret-core/interpret/develop.py index ab318630b..007590d97 100644 --- a/python/interpret-core/interpret/develop.py +++ b/python/interpret-core/interpret/develop.py @@ -2,6 +2,7 @@ # Distributed under the MIT software license import sys +import math from .utils._native import Native @@ -17,7 +18,7 @@ "min_samples_leaf_nominal": None, # TODO: LightGBM uses min_data_per_group = 100 "min_cat_samples": 12, "min_cat_hessian_percent": 0.0, - "cat_smooth": 10.0, + "cat_smooth": math.inf, # math.inf means use only the gradient for sorting "max_cat_threshold": 32, "cat_include": 0.75, "purify_boosting": False, diff --git a/python/interpret-core/interpret/glassbox/_ebm/_ebm.py b/python/interpret-core/interpret/glassbox/_ebm/_ebm.py index 9d1bca5a1..4202ea17b 100644 --- a/python/interpret-core/interpret/glassbox/_ebm/_ebm.py +++ b/python/interpret-core/interpret/glassbox/_ebm/_ebm.py @@ -2718,7 +2718,7 @@ class ExplainableBoostingClassifier(ClassifierMixin, EBMModel): - Integer (1 <= validation_size): Count of samples to put in the validation sets - Percentage (validation_size < 1.0): Percentage of the data to put in the validation sets - 0: Turns off early stopping. Outer bags have no utility. Error bounds will be eliminated - outer_bags : int, default=14 + outer_bags : int, default=16 Number of outer bags. Outer bags are used to generate error bounds and help with smoothing the graphs. inner_bags : int, default=0 Number of inner bags. 0 turns off inner bagging. @@ -2808,7 +2808,7 @@ class ExplainableBoostingClassifier(ClassifierMixin, EBMModel): - -1: The partial response of the corresponding feature should be monotonically decreasing with respect to the target. objective : str, default="log_loss" The objective to optimize. - n_jobs : int, default=-2 + n_jobs : int, default=-1 Number of jobs to run in parallel. Negative integers are interpreted as following joblib's formula (n_cpus + 1 + n_jobs), just like scikit-learn. Eg: -2 means using all threads except 1. random_state : int or None, default=42 @@ -2927,7 +2927,7 @@ def __init__( exclude: Optional[Sequence[Union[int, str, Sequence[Union[int, str]]]]] = None, # Ensemble validation_size: Optional[Union[int, float]] = 0.15, - outer_bags: int = 14, + outer_bags: int = 16, inner_bags: Optional[int] = 0, # Boosting learning_rate: float = 0.015, @@ -2949,7 +2949,7 @@ def __init__( monotone_constraints: Optional[Sequence[int]] = None, objective: str = "log_loss", # Overall - n_jobs: Optional[int] = -2, + n_jobs: Optional[int] = -1, random_state: Optional[int] = 42, ): super().__init__( @@ -3090,7 +3090,7 @@ class ExplainableBoostingRegressor(RegressorMixin, EBMModel): - Integer (1 <= validation_size): Count of samples to put in the validation sets - Percentage (validation_size < 1.0): Percentage of the data to put in the validation sets - 0: Turns off early stopping. Outer bags have no utility. Error bounds will be eliminated - outer_bags : int, default=14 + outer_bags : int, default=16 Number of outer bags. Outer bags are used to generate error bounds and help with smoothing the graphs. inner_bags : int, default=0 Number of inner bags. 0 turns off inner bagging. @@ -3182,7 +3182,7 @@ class ExplainableBoostingRegressor(RegressorMixin, EBMModel): The objective to optimize. Options include: "rmse", "poisson_deviance", "tweedie_deviance:variance_power=1.5", "gamma_deviance", "pseudo_huber:delta=1.0", "rmse_log" (rmse with a log link function) - n_jobs : int, default=-2 + n_jobs : int, default=-1 Number of jobs to run in parallel. Negative integers are interpreted as following joblib's formula (n_cpus + 1 + n_jobs), just like scikit-learn. Eg: -2 means using all threads except 1. random_state : int or None, default=42 @@ -3299,7 +3299,7 @@ def __init__( exclude: Optional[Sequence[Union[int, str, Sequence[Union[int, str]]]]] = None, # Ensemble validation_size: Optional[Union[int, float]] = 0.15, - outer_bags: int = 14, + outer_bags: int = 16, inner_bags: Optional[int] = 0, # Boosting learning_rate: float = 0.04, @@ -3321,7 +3321,7 @@ def __init__( monotone_constraints: Optional[Sequence[int]] = None, objective: str = "rmse", # Overall - n_jobs: Optional[int] = -2, + n_jobs: Optional[int] = -1, random_state: Optional[int] = 42, ): super().__init__( @@ -3423,7 +3423,7 @@ class DPExplainableBoostingClassifier(ClassifierMixin, EBMModel): Total number of boosting rounds with n_terms boosting steps per round. max_leaves : int, default=3 Maximum number of leaves allowed in each tree. - n_jobs : int, default=-2 + n_jobs : int, default=-1 Number of jobs to run in parallel. Negative integers are interpreted as following joblib's formula (n_cpus + 1 + n_jobs), just like scikit-learn. Eg: -2 means using all threads except 1. random_state : int or None, default=None @@ -3547,7 +3547,7 @@ def __init__( # Trees max_leaves: int = 3, # Overall - n_jobs: Optional[int] = -2, + n_jobs: Optional[int] = -1, random_state: Optional[int] = None, # Differential Privacy epsilon: float = 1.0, @@ -3691,7 +3691,7 @@ class DPExplainableBoostingRegressor(RegressorMixin, EBMModel): Total number of boosting rounds with n_terms boosting steps per round. max_leaves : int, default=3 Maximum number of leaves allowed in each tree. - n_jobs : int, default=-2 + n_jobs : int, default=-1 Number of jobs to run in parallel. Negative integers are interpreted as following joblib's formula (n_cpus + 1 + n_jobs), just like scikit-learn. Eg: -2 means using all threads except 1. random_state : int or None, default=None @@ -3824,7 +3824,7 @@ def __init__( # Trees max_leaves: int = 3, # Overall - n_jobs: Optional[int] = -2, + n_jobs: Optional[int] = -1, random_state: Optional[int] = None, # Differential Privacy epsilon: float = 1.0,