diff --git a/R/src/interpret_R.cpp b/R/src/interpret_R.cpp index a4bbf9e49..c0fd4c6f0 100644 --- a/R/src/interpret_R.cpp +++ b/R/src/interpret_R.cpp @@ -836,9 +836,8 @@ SEXP GenerateTermUpdate_R( 0, 0, 0, - 0.0, 10.0, - 32, + 9223372036854775807, 1.0, aLeavesMax, nullptr, diff --git a/python/interpret-core/interpret/develop.py b/python/interpret-core/interpret/develop.py index 48244447c..5c8b94803 100644 --- a/python/interpret-core/interpret/develop.py +++ b/python/interpret-core/interpret/develop.py @@ -17,7 +17,6 @@ "cat_l2": 0.0, "min_samples_leaf_nominal": None, "min_cat_samples": 10, - "min_cat_hessian_percent": 0.0, "cat_smooth": math.inf, # math.inf means use only the gradient for sorting "max_cat_threshold": 9223372036854775807, "cat_include": 1.0, diff --git a/python/interpret-core/interpret/glassbox/_ebm/_boost.py b/python/interpret-core/interpret/glassbox/_ebm/_boost.py index 018f493cc..c658d827e 100644 --- a/python/interpret-core/interpret/glassbox/_ebm/_boost.py +++ b/python/interpret-core/interpret/glassbox/_ebm/_boost.py @@ -79,9 +79,6 @@ def boost( reg_lambda=reg_lambda, max_delta_step=0.0, min_cat_samples=develop.get_option("min_cat_samples"), - min_cat_hessian_percent=develop.get_option( - "min_cat_hessian_percent" - ), cat_smooth=develop.get_option("cat_smooth"), max_cat_threshold=develop.get_option("max_cat_threshold"), cat_include=develop.get_option("cat_include"), @@ -190,9 +187,6 @@ def boost( reg_lambda=reg_lambda_local, max_delta_step=max_delta_step, min_cat_samples=develop.get_option("min_cat_samples"), - min_cat_hessian_percent=develop.get_option( - "min_cat_hessian_percent" - ), cat_smooth=develop.get_option("cat_smooth"), max_cat_threshold=develop.get_option("max_cat_threshold"), cat_include=develop.get_option("cat_include"), diff --git a/python/interpret-core/interpret/utils/_native.py b/python/interpret-core/interpret/utils/_native.py index d618b6577..17eb9a693 100644 --- a/python/interpret-core/interpret/utils/_native.py +++ b/python/interpret-core/interpret/utils/_native.py @@ -1511,8 +1511,6 @@ def _initialize(self, is_debug): ct.c_double, # int64_t minCategorySamples ct.c_int64, - # double minCategoryHessianPercent - ct.c_double, # double categoricalSmoothing ct.c_double, # int64_t maxCategoricalThreshold @@ -1835,7 +1833,6 @@ def generate_term_update( reg_lambda, max_delta_step, min_cat_samples, - min_cat_hessian_percent, cat_smooth, max_cat_threshold, cat_include, @@ -1855,7 +1852,6 @@ def generate_term_update( reg_lambda: L2 regularization. max_delta_step: Used to limit the max output of tree leaves. <=0.0 means no constraint. min_cat_samples: Min samples to consider category independently - min_cat_hessian_percent: Min percentage of the hessians to consider category independently cat_smooth: Parameter used to determine which categories are included each boosting round and ordering. max_cat_threshold: max number of categories to include each boosting round cat_include: percentage of categories to include in each boosting round @@ -1904,7 +1900,6 @@ def generate_term_update( reg_lambda, max_delta_step, min_cat_samples, - min_cat_hessian_percent, cat_smooth, max_cat_threshold, cat_include, diff --git a/shared/libebm/GenerateTermUpdate.cpp b/shared/libebm/GenerateTermUpdate.cpp index 60e411fd9..c04bdffad 100644 --- a/shared/libebm/GenerateTermUpdate.cpp +++ b/shared/libebm/GenerateTermUpdate.cpp @@ -89,7 +89,6 @@ extern ErrorEbm PartitionOneDimensionalBoosting(RandomDeterministic* const pRng, const FloatCalc regLambda, const FloatCalc deltaStepMax, const size_t cCategorySamplesMin, - const FloatCalc categoryHessianPercentMin, const FloatCalc categoricalSmoothing, const size_t categoricalThresholdMax, const FloatCalc categoricalInclusionPercent, @@ -217,7 +216,6 @@ static ErrorEbm BoostSingleDimensional(RandomDeterministic* const pRng, const FloatCalc regLambda, const FloatCalc deltaStepMax, const size_t cCategorySamplesMin, - const FloatCalc categoryHessianPercentMin, const FloatCalc categoricalSmoothing, const size_t categoricalThresholdMax, const FloatCalc categoricalInclusionPercent, @@ -250,7 +248,6 @@ static ErrorEbm BoostSingleDimensional(RandomDeterministic* const pRng, regLambda, deltaStepMax, cCategorySamplesMin, - categoryHessianPercentMin, categoricalSmoothing, categoricalThresholdMax, categoricalInclusionPercent, @@ -662,7 +659,6 @@ EBM_API_BODY ErrorEbm EBM_CALLING_CONVENTION GenerateTermUpdate(void* rng, double regLambda, double maxDeltaStep, IntEbm minCategorySamples, - double minCategoryHessianPercent, double categoricalSmoothing, IntEbm maxCategoricalThreshold, double categoricalInclusionPercent, @@ -686,7 +682,6 @@ EBM_API_BODY ErrorEbm EBM_CALLING_CONVENTION GenerateTermUpdate(void* rng, "regLambda=%le, " "maxDeltaStep=%le, " "minCategorySamples=%" IntEbmPrintf ", " - "minCategoryHessianPercent=%le, " "categoricalSmoothing=%le, " "maxCategoricalThreshold=%" IntEbmPrintf ", " "categoricalInclusionPercent=%le, " @@ -704,7 +699,6 @@ EBM_API_BODY ErrorEbm EBM_CALLING_CONVENTION GenerateTermUpdate(void* rng, regLambda, maxDeltaStep, minCategorySamples, - minCategoryHessianPercent, categoricalSmoothing, maxCategoricalThreshold, categoricalInclusionPercent, @@ -838,14 +832,6 @@ EBM_API_BODY ErrorEbm EBM_CALLING_CONVENTION GenerateTermUpdate(void* rng, LOG_0(Trace_Warning, "WARNING GenerateTermUpdate minSamplesLeaf can't be less than 0. Adjusting to 0."); } - FloatCalc categoryHessianPercentMin = static_cast(minCategoryHessianPercent); - if(/* NaN */ !(0.0 <= categoryHessianPercentMin)) { - categoryHessianPercentMin = 0.0; - LOG_0(Trace_Warning, - "WARNING GenerateTermUpdate minCategoryHessianPercent must be a positive number. Adjusting to minimum " - "float"); - } - FloatCalc categoricalSmoothingCalc = static_cast(categoricalSmoothing); if(categoricalSmoothingCalc < std::numeric_limits::min()) { // allow isnan(categoricalSmoothingCalc) through unscathed @@ -1281,7 +1267,6 @@ EBM_API_BODY ErrorEbm EBM_CALLING_CONVENTION GenerateTermUpdate(void* rng, regLambdaCalc, deltaStepMax, cCategorySamplesMin, - categoryHessianPercentMin, categoricalSmoothingCalc, categoricalThresholdMax, categoricalInclusionPercentCalc, diff --git a/shared/libebm/PartitionOneDimensionalBoosting.cpp b/shared/libebm/PartitionOneDimensionalBoosting.cpp index 8b10ed9a4..cf27433dd 100644 --- a/shared/libebm/PartitionOneDimensionalBoosting.cpp +++ b/shared/libebm/PartitionOneDimensionalBoosting.cpp @@ -963,7 +963,6 @@ template class PartitionOneDimensionalBoo const FloatCalc regLambda, const FloatCalc deltaStepMax, const size_t cCategorySamplesMin, - const FloatCalc categoryHessianPercentMin, const FloatCalc categoricalSmoothing, const size_t categoricalThresholdMax, const FloatCalc categoricalInclusionPercent, @@ -979,9 +978,8 @@ template class PartitionOneDimensionalBoo ErrorEbm error; - // TODO: use all of these! + // TODO: mirror the bMissing option for bUnseen UNUSED(bUnseen); - UNUSED(categoryHessianPercentMin); BoosterCore* const pBoosterCore = pBoosterShell->GetBoosterCore(); const size_t cScores = GET_COUNT_SCORES(cCompilerScores, pBoosterCore->GetCountScores()); @@ -1420,7 +1418,6 @@ extern ErrorEbm PartitionOneDimensionalBoosting(RandomDeterministic* const pRng, const FloatCalc regLambda, const FloatCalc deltaStepMax, const size_t cCategorySamplesMin, - const FloatCalc categoryHessianPercentMin, const FloatCalc categoricalSmoothing, const size_t categoricalThresholdMax, const FloatCalc categoricalInclusionPercent, @@ -1453,7 +1450,6 @@ extern ErrorEbm PartitionOneDimensionalBoosting(RandomDeterministic* const pRng, regLambda, deltaStepMax, cCategorySamplesMin, - categoryHessianPercentMin, categoricalSmoothing, categoricalThresholdMax, categoricalInclusionPercent, @@ -1478,7 +1474,6 @@ extern ErrorEbm PartitionOneDimensionalBoosting(RandomDeterministic* const pRng, regLambda, deltaStepMax, cCategorySamplesMin, - categoryHessianPercentMin, categoricalSmoothing, categoricalThresholdMax, categoricalInclusionPercent, @@ -1503,7 +1498,6 @@ extern ErrorEbm PartitionOneDimensionalBoosting(RandomDeterministic* const pRng, regLambda, deltaStepMax, cCategorySamplesMin, - categoryHessianPercentMin, categoricalSmoothing, categoricalThresholdMax, categoricalInclusionPercent, @@ -1529,7 +1523,6 @@ extern ErrorEbm PartitionOneDimensionalBoosting(RandomDeterministic* const pRng, regLambda, deltaStepMax, cCategorySamplesMin, - categoryHessianPercentMin, categoricalSmoothing, categoricalThresholdMax, categoricalInclusionPercent, @@ -1554,7 +1547,6 @@ extern ErrorEbm PartitionOneDimensionalBoosting(RandomDeterministic* const pRng, regLambda, deltaStepMax, cCategorySamplesMin, - categoryHessianPercentMin, categoricalSmoothing, categoricalThresholdMax, categoricalInclusionPercent, diff --git a/shared/libebm/inc/libebm.h b/shared/libebm/inc/libebm.h index 35bee9dd2..1e425a2d9 100644 --- a/shared/libebm/inc/libebm.h +++ b/shared/libebm/inc/libebm.h @@ -466,7 +466,6 @@ EBM_API_INCLUDE ErrorEbm EBM_CALLING_CONVENTION GenerateTermUpdate(void* rng, double regLambda, double maxDeltaStep, IntEbm minCategorySamples, - double minCategoryHessianPercent, double categoricalSmoothing, IntEbm maxCategoricalThreshold, double categoricalInclusionPercent, diff --git a/shared/libebm/tests/boosting_unusual_inputs.cpp b/shared/libebm/tests/boosting_unusual_inputs.cpp index 0a7b262bc..bf5b461fa 100644 --- a/shared/libebm/tests/boosting_unusual_inputs.cpp +++ b/shared/libebm/tests/boosting_unusual_inputs.cpp @@ -276,7 +276,6 @@ TEST_CASE("leave one potential cut uncut, boosting, regression") { 0, 0, k_minCategorySamplesDefault, - k_minCategoryHessianPercentDefault, k_categoricalSmoothingDefault, k_maxCategoricalThresholdDefault, k_categoricalInclusionPercentDefault, @@ -556,7 +555,6 @@ TEST_CASE("one leavesMax, boosting, regression") { k_regLambdaDefault, k_maxDeltaStepDefault, k_minCategorySamplesDefault, - k_minCategoryHessianPercentDefault, k_categoricalSmoothingDefault, k_maxCategoricalThresholdDefault, k_categoricalInclusionPercentDefault, @@ -596,7 +594,6 @@ TEST_CASE("mono-classification") { k_regLambdaDefault, k_maxDeltaStepDefault, k_minCategorySamplesDefault, - k_minCategoryHessianPercentDefault, k_categoricalSmoothingDefault, k_maxCategoricalThresholdDefault, k_categoricalInclusionPercentDefault, @@ -1255,7 +1252,6 @@ TEST_CASE("Random splitting with 3 features, boosting, multiclass") { k_regLambdaDefault, k_maxDeltaStepDefault, k_minCategorySamplesDefault, - k_minCategoryHessianPercentDefault, k_categoricalSmoothingDefault, k_maxCategoricalThresholdDefault, k_categoricalInclusionPercentDefault, @@ -1296,7 +1292,6 @@ TEST_CASE("Random splitting with 3 features, boosting, multiclass, sums") { k_regLambdaDefault, k_maxDeltaStepDefault, k_minCategorySamplesDefault, - k_minCategoryHessianPercentDefault, k_categoricalSmoothingDefault, k_maxCategoricalThresholdDefault, k_categoricalInclusionPercentDefault, @@ -1357,7 +1352,6 @@ TEST_CASE("Random splitting, tripple with one dimension missing, multiclass") { k_regLambdaDefault, k_maxDeltaStepDefault, k_minCategorySamplesDefault, - k_minCategoryHessianPercentDefault, k_categoricalSmoothingDefault, k_maxCategoricalThresholdDefault, k_categoricalInclusionPercentDefault, @@ -1425,7 +1419,6 @@ TEST_CASE("Random splitting, pure tripples, multiclass") { k_regLambdaDefault, k_maxDeltaStepDefault, k_minCategorySamplesDefault, - k_minCategoryHessianPercentDefault, k_categoricalSmoothingDefault, k_maxCategoricalThresholdDefault, k_categoricalInclusionPercentDefault, @@ -1494,7 +1487,6 @@ TEST_CASE("Random splitting, pure tripples, regression") { k_regLambdaDefault, k_maxDeltaStepDefault, k_minCategorySamplesDefault, - k_minCategoryHessianPercentDefault, k_categoricalSmoothingDefault, k_maxCategoricalThresholdDefault, k_categoricalInclusionPercentDefault, @@ -1560,7 +1552,6 @@ TEST_CASE("Random splitting, pure tripples, only 1 leaf, multiclass") { k_regLambdaDefault, k_maxDeltaStepDefault, k_minCategorySamplesDefault, - k_minCategoryHessianPercentDefault, k_categoricalSmoothingDefault, k_maxCategoricalThresholdDefault, k_categoricalInclusionPercentDefault, @@ -1621,7 +1612,6 @@ TEST_CASE("Random splitting, no splits, binary, sums") { k_regLambdaDefault, k_maxDeltaStepDefault, k_minCategorySamplesDefault, - k_minCategoryHessianPercentDefault, k_categoricalSmoothingDefault, k_maxCategoricalThresholdDefault, k_categoricalInclusionPercentDefault, @@ -2336,7 +2326,6 @@ static double RandomizedTesting(const AccelerationFlags acceleration) { const double regLambda = 0 == TestRand(rng, 5) ? 0.015625 : 0.0; const double maxDeltaStep = 0 == TestRand(rng, 5) ? 1.0 : 0.0; const IntEbm minCategorySamples = TestRand(rng, 100); - const double minCategoryHessianPercent = 0.0; // TODO: make random const double categoricalSmoothing = 10.0; const IntEbm maxCategoricalThreshold = 1 + TestRand(rng, cRealBins + 1); const double categoricalInclusionPercent = 0 == TestRand(rng, 2) ? 0.75 : 1.0; @@ -2357,7 +2346,6 @@ static double RandomizedTesting(const AccelerationFlags acceleration) { regLambda, maxDeltaStep, minCategorySamples, - minCategoryHessianPercent, categoricalSmoothing, maxCategoricalThreshold, categoricalInclusionPercent, diff --git a/shared/libebm/tests/libebm_test.cpp b/shared/libebm/tests/libebm_test.cpp index 80cba0bb2..c7d18e495 100644 --- a/shared/libebm/tests/libebm_test.cpp +++ b/shared/libebm/tests/libebm_test.cpp @@ -547,7 +547,6 @@ BoostRet TestBoost::Boost(const IntEbm indexTerm, const double regLambda, const double maxDeltaStep, const IntEbm minCategorySamples, - const double minCategoryHessianPercent, const double categoricalSmoothing, const IntEbm maxCategoricalThreshold, const double categoricalInclusionPercent, @@ -569,7 +568,6 @@ BoostRet TestBoost::Boost(const IntEbm indexTerm, regLambda, maxDeltaStep, minCategorySamples, - minCategoryHessianPercent, categoricalSmoothing, maxCategoricalThreshold, categoricalInclusionPercent, diff --git a/shared/libebm/tests/libebm_test.hpp b/shared/libebm/tests/libebm_test.hpp index 16e01c4a5..8a76b2b67 100644 --- a/shared/libebm/tests/libebm_test.hpp +++ b/shared/libebm/tests/libebm_test.hpp @@ -295,7 +295,6 @@ static constexpr double k_regAlphaDefault = 0.0; static constexpr double k_regLambdaDefault = 0.0; static constexpr double k_maxDeltaStepDefault = 0.0; static constexpr IntEbm k_minCategorySamplesDefault = 0; -static constexpr double k_minCategoryHessianPercentDefault = 0.0; static constexpr double k_categoricalSmoothingDefault = 10.0; static constexpr IntEbm k_maxCategoricalThresholdDefault = IntEbm{32}; static constexpr double k_categoricalInclusionPercentDefault = 0.75; @@ -492,7 +491,6 @@ class TestBoost { const double regLambda = k_regLambdaDefault, const double maxDeltaStep = k_maxDeltaStepDefault, IntEbm minCategorySamplesDefault = k_minCategorySamplesDefault, - double minCategoryHessianPercentDefault = k_minCategoryHessianPercentDefault, const double categoricalSmoothing = k_categoricalSmoothingDefault, const IntEbm maxCategoricalThreshold = k_maxCategoricalThresholdDefault, const double categoricalInclusionPercent = k_categoricalInclusionPercentDefault,