Skip to content

Commit

Permalink
eliminate min_cat_hessian_percent
Browse files Browse the repository at this point in the history
  • Loading branch information
paulbkoch committed Jan 3, 2025
1 parent c1a00d9 commit 39edc08
Show file tree
Hide file tree
Showing 10 changed files with 2 additions and 55 deletions.
3 changes: 1 addition & 2 deletions R/src/interpret_R.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -836,9 +836,8 @@ SEXP GenerateTermUpdate_R(
0,
0,
0,
0.0,
10.0,
32,
9223372036854775807,
1.0,
aLeavesMax,
nullptr,
Expand Down
1 change: 0 additions & 1 deletion python/interpret-core/interpret/develop.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
"cat_l2": 0.0,
"min_samples_leaf_nominal": None,
"min_cat_samples": 10,
"min_cat_hessian_percent": 0.0,
"cat_smooth": math.inf, # math.inf means use only the gradient for sorting
"max_cat_threshold": 9223372036854775807,
"cat_include": 1.0,
Expand Down
6 changes: 0 additions & 6 deletions python/interpret-core/interpret/glassbox/_ebm/_boost.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,6 @@ def boost(
reg_lambda=reg_lambda,
max_delta_step=0.0,
min_cat_samples=develop.get_option("min_cat_samples"),
min_cat_hessian_percent=develop.get_option(
"min_cat_hessian_percent"
),
cat_smooth=develop.get_option("cat_smooth"),
max_cat_threshold=develop.get_option("max_cat_threshold"),
cat_include=develop.get_option("cat_include"),
Expand Down Expand Up @@ -190,9 +187,6 @@ def boost(
reg_lambda=reg_lambda_local,
max_delta_step=max_delta_step,
min_cat_samples=develop.get_option("min_cat_samples"),
min_cat_hessian_percent=develop.get_option(
"min_cat_hessian_percent"
),
cat_smooth=develop.get_option("cat_smooth"),
max_cat_threshold=develop.get_option("max_cat_threshold"),
cat_include=develop.get_option("cat_include"),
Expand Down
5 changes: 0 additions & 5 deletions python/interpret-core/interpret/utils/_native.py
Original file line number Diff line number Diff line change
Expand Up @@ -1511,8 +1511,6 @@ def _initialize(self, is_debug):
ct.c_double,
# int64_t minCategorySamples
ct.c_int64,
# double minCategoryHessianPercent
ct.c_double,
# double categoricalSmoothing
ct.c_double,
# int64_t maxCategoricalThreshold
Expand Down Expand Up @@ -1835,7 +1833,6 @@ def generate_term_update(
reg_lambda,
max_delta_step,
min_cat_samples,
min_cat_hessian_percent,
cat_smooth,
max_cat_threshold,
cat_include,
Expand All @@ -1855,7 +1852,6 @@ def generate_term_update(
reg_lambda: L2 regularization.
max_delta_step: Used to limit the max output of tree leaves. <=0.0 means no constraint.
min_cat_samples: Min samples to consider category independently
min_cat_hessian_percent: Min percentage of the hessians to consider category independently
cat_smooth: Parameter used to determine which categories are included each boosting round and ordering.
max_cat_threshold: max number of categories to include each boosting round
cat_include: percentage of categories to include in each boosting round
Expand Down Expand Up @@ -1904,7 +1900,6 @@ def generate_term_update(
reg_lambda,
max_delta_step,
min_cat_samples,
min_cat_hessian_percent,
cat_smooth,
max_cat_threshold,
cat_include,
Expand Down
15 changes: 0 additions & 15 deletions shared/libebm/GenerateTermUpdate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ extern ErrorEbm PartitionOneDimensionalBoosting(RandomDeterministic* const pRng,
const FloatCalc regLambda,
const FloatCalc deltaStepMax,
const size_t cCategorySamplesMin,
const FloatCalc categoryHessianPercentMin,
const FloatCalc categoricalSmoothing,
const size_t categoricalThresholdMax,
const FloatCalc categoricalInclusionPercent,
Expand Down Expand Up @@ -217,7 +216,6 @@ static ErrorEbm BoostSingleDimensional(RandomDeterministic* const pRng,
const FloatCalc regLambda,
const FloatCalc deltaStepMax,
const size_t cCategorySamplesMin,
const FloatCalc categoryHessianPercentMin,
const FloatCalc categoricalSmoothing,
const size_t categoricalThresholdMax,
const FloatCalc categoricalInclusionPercent,
Expand Down Expand Up @@ -250,7 +248,6 @@ static ErrorEbm BoostSingleDimensional(RandomDeterministic* const pRng,
regLambda,
deltaStepMax,
cCategorySamplesMin,
categoryHessianPercentMin,
categoricalSmoothing,
categoricalThresholdMax,
categoricalInclusionPercent,
Expand Down Expand Up @@ -662,7 +659,6 @@ EBM_API_BODY ErrorEbm EBM_CALLING_CONVENTION GenerateTermUpdate(void* rng,
double regLambda,
double maxDeltaStep,
IntEbm minCategorySamples,
double minCategoryHessianPercent,
double categoricalSmoothing,
IntEbm maxCategoricalThreshold,
double categoricalInclusionPercent,
Expand All @@ -686,7 +682,6 @@ EBM_API_BODY ErrorEbm EBM_CALLING_CONVENTION GenerateTermUpdate(void* rng,
"regLambda=%le, "
"maxDeltaStep=%le, "
"minCategorySamples=%" IntEbmPrintf ", "
"minCategoryHessianPercent=%le, "
"categoricalSmoothing=%le, "
"maxCategoricalThreshold=%" IntEbmPrintf ", "
"categoricalInclusionPercent=%le, "
Expand All @@ -704,7 +699,6 @@ EBM_API_BODY ErrorEbm EBM_CALLING_CONVENTION GenerateTermUpdate(void* rng,
regLambda,
maxDeltaStep,
minCategorySamples,
minCategoryHessianPercent,
categoricalSmoothing,
maxCategoricalThreshold,
categoricalInclusionPercent,
Expand Down Expand Up @@ -838,14 +832,6 @@ EBM_API_BODY ErrorEbm EBM_CALLING_CONVENTION GenerateTermUpdate(void* rng,
LOG_0(Trace_Warning, "WARNING GenerateTermUpdate minSamplesLeaf can't be less than 0. Adjusting to 0.");
}

FloatCalc categoryHessianPercentMin = static_cast<FloatCalc>(minCategoryHessianPercent);
if(/* NaN */ !(0.0 <= categoryHessianPercentMin)) {
categoryHessianPercentMin = 0.0;
LOG_0(Trace_Warning,
"WARNING GenerateTermUpdate minCategoryHessianPercent must be a positive number. Adjusting to minimum "
"float");
}

FloatCalc categoricalSmoothingCalc = static_cast<FloatCalc>(categoricalSmoothing);
if(categoricalSmoothingCalc < std::numeric_limits<FloatCalc>::min()) {
// allow isnan(categoricalSmoothingCalc) through unscathed
Expand Down Expand Up @@ -1281,7 +1267,6 @@ EBM_API_BODY ErrorEbm EBM_CALLING_CONVENTION GenerateTermUpdate(void* rng,
regLambdaCalc,
deltaStepMax,
cCategorySamplesMin,
categoryHessianPercentMin,
categoricalSmoothingCalc,
categoricalThresholdMax,
categoricalInclusionPercentCalc,
Expand Down
10 changes: 1 addition & 9 deletions shared/libebm/PartitionOneDimensionalBoosting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -963,7 +963,6 @@ template<bool bHessian, size_t cCompilerScores> class PartitionOneDimensionalBoo
const FloatCalc regLambda,
const FloatCalc deltaStepMax,
const size_t cCategorySamplesMin,
const FloatCalc categoryHessianPercentMin,
const FloatCalc categoricalSmoothing,
const size_t categoricalThresholdMax,
const FloatCalc categoricalInclusionPercent,
Expand All @@ -979,9 +978,8 @@ template<bool bHessian, size_t cCompilerScores> class PartitionOneDimensionalBoo

ErrorEbm error;

// TODO: use all of these!
// TODO: mirror the bMissing option for bUnseen
UNUSED(bUnseen);
UNUSED(categoryHessianPercentMin);

BoosterCore* const pBoosterCore = pBoosterShell->GetBoosterCore();
const size_t cScores = GET_COUNT_SCORES(cCompilerScores, pBoosterCore->GetCountScores());
Expand Down Expand Up @@ -1420,7 +1418,6 @@ extern ErrorEbm PartitionOneDimensionalBoosting(RandomDeterministic* const pRng,
const FloatCalc regLambda,
const FloatCalc deltaStepMax,
const size_t cCategorySamplesMin,
const FloatCalc categoryHessianPercentMin,
const FloatCalc categoricalSmoothing,
const size_t categoricalThresholdMax,
const FloatCalc categoricalInclusionPercent,
Expand Down Expand Up @@ -1453,7 +1450,6 @@ extern ErrorEbm PartitionOneDimensionalBoosting(RandomDeterministic* const pRng,
regLambda,
deltaStepMax,
cCategorySamplesMin,
categoryHessianPercentMin,
categoricalSmoothing,
categoricalThresholdMax,
categoricalInclusionPercent,
Expand All @@ -1478,7 +1474,6 @@ extern ErrorEbm PartitionOneDimensionalBoosting(RandomDeterministic* const pRng,
regLambda,
deltaStepMax,
cCategorySamplesMin,
categoryHessianPercentMin,
categoricalSmoothing,
categoricalThresholdMax,
categoricalInclusionPercent,
Expand All @@ -1503,7 +1498,6 @@ extern ErrorEbm PartitionOneDimensionalBoosting(RandomDeterministic* const pRng,
regLambda,
deltaStepMax,
cCategorySamplesMin,
categoryHessianPercentMin,
categoricalSmoothing,
categoricalThresholdMax,
categoricalInclusionPercent,
Expand All @@ -1529,7 +1523,6 @@ extern ErrorEbm PartitionOneDimensionalBoosting(RandomDeterministic* const pRng,
regLambda,
deltaStepMax,
cCategorySamplesMin,
categoryHessianPercentMin,
categoricalSmoothing,
categoricalThresholdMax,
categoricalInclusionPercent,
Expand All @@ -1554,7 +1547,6 @@ extern ErrorEbm PartitionOneDimensionalBoosting(RandomDeterministic* const pRng,
regLambda,
deltaStepMax,
cCategorySamplesMin,
categoryHessianPercentMin,
categoricalSmoothing,
categoricalThresholdMax,
categoricalInclusionPercent,
Expand Down
1 change: 0 additions & 1 deletion shared/libebm/inc/libebm.h
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,6 @@ EBM_API_INCLUDE ErrorEbm EBM_CALLING_CONVENTION GenerateTermUpdate(void* rng,
double regLambda,
double maxDeltaStep,
IntEbm minCategorySamples,
double minCategoryHessianPercent,
double categoricalSmoothing,
IntEbm maxCategoricalThreshold,
double categoricalInclusionPercent,
Expand Down
12 changes: 0 additions & 12 deletions shared/libebm/tests/boosting_unusual_inputs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,6 @@ TEST_CASE("leave one potential cut uncut, boosting, regression") {
0,
0,
k_minCategorySamplesDefault,
k_minCategoryHessianPercentDefault,
k_categoricalSmoothingDefault,
k_maxCategoricalThresholdDefault,
k_categoricalInclusionPercentDefault,
Expand Down Expand Up @@ -556,7 +555,6 @@ TEST_CASE("one leavesMax, boosting, regression") {
k_regLambdaDefault,
k_maxDeltaStepDefault,
k_minCategorySamplesDefault,
k_minCategoryHessianPercentDefault,
k_categoricalSmoothingDefault,
k_maxCategoricalThresholdDefault,
k_categoricalInclusionPercentDefault,
Expand Down Expand Up @@ -596,7 +594,6 @@ TEST_CASE("mono-classification") {
k_regLambdaDefault,
k_maxDeltaStepDefault,
k_minCategorySamplesDefault,
k_minCategoryHessianPercentDefault,
k_categoricalSmoothingDefault,
k_maxCategoricalThresholdDefault,
k_categoricalInclusionPercentDefault,
Expand Down Expand Up @@ -1255,7 +1252,6 @@ TEST_CASE("Random splitting with 3 features, boosting, multiclass") {
k_regLambdaDefault,
k_maxDeltaStepDefault,
k_minCategorySamplesDefault,
k_minCategoryHessianPercentDefault,
k_categoricalSmoothingDefault,
k_maxCategoricalThresholdDefault,
k_categoricalInclusionPercentDefault,
Expand Down Expand Up @@ -1296,7 +1292,6 @@ TEST_CASE("Random splitting with 3 features, boosting, multiclass, sums") {
k_regLambdaDefault,
k_maxDeltaStepDefault,
k_minCategorySamplesDefault,
k_minCategoryHessianPercentDefault,
k_categoricalSmoothingDefault,
k_maxCategoricalThresholdDefault,
k_categoricalInclusionPercentDefault,
Expand Down Expand Up @@ -1357,7 +1352,6 @@ TEST_CASE("Random splitting, tripple with one dimension missing, multiclass") {
k_regLambdaDefault,
k_maxDeltaStepDefault,
k_minCategorySamplesDefault,
k_minCategoryHessianPercentDefault,
k_categoricalSmoothingDefault,
k_maxCategoricalThresholdDefault,
k_categoricalInclusionPercentDefault,
Expand Down Expand Up @@ -1425,7 +1419,6 @@ TEST_CASE("Random splitting, pure tripples, multiclass") {
k_regLambdaDefault,
k_maxDeltaStepDefault,
k_minCategorySamplesDefault,
k_minCategoryHessianPercentDefault,
k_categoricalSmoothingDefault,
k_maxCategoricalThresholdDefault,
k_categoricalInclusionPercentDefault,
Expand Down Expand Up @@ -1494,7 +1487,6 @@ TEST_CASE("Random splitting, pure tripples, regression") {
k_regLambdaDefault,
k_maxDeltaStepDefault,
k_minCategorySamplesDefault,
k_minCategoryHessianPercentDefault,
k_categoricalSmoothingDefault,
k_maxCategoricalThresholdDefault,
k_categoricalInclusionPercentDefault,
Expand Down Expand Up @@ -1560,7 +1552,6 @@ TEST_CASE("Random splitting, pure tripples, only 1 leaf, multiclass") {
k_regLambdaDefault,
k_maxDeltaStepDefault,
k_minCategorySamplesDefault,
k_minCategoryHessianPercentDefault,
k_categoricalSmoothingDefault,
k_maxCategoricalThresholdDefault,
k_categoricalInclusionPercentDefault,
Expand Down Expand Up @@ -1621,7 +1612,6 @@ TEST_CASE("Random splitting, no splits, binary, sums") {
k_regLambdaDefault,
k_maxDeltaStepDefault,
k_minCategorySamplesDefault,
k_minCategoryHessianPercentDefault,
k_categoricalSmoothingDefault,
k_maxCategoricalThresholdDefault,
k_categoricalInclusionPercentDefault,
Expand Down Expand Up @@ -2336,7 +2326,6 @@ static double RandomizedTesting(const AccelerationFlags acceleration) {
const double regLambda = 0 == TestRand(rng, 5) ? 0.015625 : 0.0;
const double maxDeltaStep = 0 == TestRand(rng, 5) ? 1.0 : 0.0;
const IntEbm minCategorySamples = TestRand(rng, 100);
const double minCategoryHessianPercent = 0.0; // TODO: make random
const double categoricalSmoothing = 10.0;
const IntEbm maxCategoricalThreshold = 1 + TestRand(rng, cRealBins + 1);
const double categoricalInclusionPercent = 0 == TestRand(rng, 2) ? 0.75 : 1.0;
Expand All @@ -2357,7 +2346,6 @@ static double RandomizedTesting(const AccelerationFlags acceleration) {
regLambda,
maxDeltaStep,
minCategorySamples,
minCategoryHessianPercent,
categoricalSmoothing,
maxCategoricalThreshold,
categoricalInclusionPercent,
Expand Down
2 changes: 0 additions & 2 deletions shared/libebm/tests/libebm_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,6 @@ BoostRet TestBoost::Boost(const IntEbm indexTerm,
const double regLambda,
const double maxDeltaStep,
const IntEbm minCategorySamples,
const double minCategoryHessianPercent,
const double categoricalSmoothing,
const IntEbm maxCategoricalThreshold,
const double categoricalInclusionPercent,
Expand All @@ -569,7 +568,6 @@ BoostRet TestBoost::Boost(const IntEbm indexTerm,
regLambda,
maxDeltaStep,
minCategorySamples,
minCategoryHessianPercent,
categoricalSmoothing,
maxCategoricalThreshold,
categoricalInclusionPercent,
Expand Down
2 changes: 0 additions & 2 deletions shared/libebm/tests/libebm_test.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,6 @@ static constexpr double k_regAlphaDefault = 0.0;
static constexpr double k_regLambdaDefault = 0.0;
static constexpr double k_maxDeltaStepDefault = 0.0;
static constexpr IntEbm k_minCategorySamplesDefault = 0;
static constexpr double k_minCategoryHessianPercentDefault = 0.0;
static constexpr double k_categoricalSmoothingDefault = 10.0;
static constexpr IntEbm k_maxCategoricalThresholdDefault = IntEbm{32};
static constexpr double k_categoricalInclusionPercentDefault = 0.75;
Expand Down Expand Up @@ -492,7 +491,6 @@ class TestBoost {
const double regLambda = k_regLambdaDefault,
const double maxDeltaStep = k_maxDeltaStepDefault,
IntEbm minCategorySamplesDefault = k_minCategorySamplesDefault,
double minCategoryHessianPercentDefault = k_minCategoryHessianPercentDefault,
const double categoricalSmoothing = k_categoricalSmoothingDefault,
const IntEbm maxCategoricalThreshold = k_maxCategoricalThresholdDefault,
const double categoricalInclusionPercent = k_categoricalInclusionPercentDefault,
Expand Down

0 comments on commit 39edc08

Please sign in to comment.