add public gain_scale parameter

paulbkoch · paulbkoch · commit 64158beae427 · 2025-01-04T01:40:12.000-08:00
diff --git a/python/interpret-core/interpret/develop.py b/python/interpret-core/interpret/develop.py
@@ -18,7 +18,6 @@
     "min_samples_leaf_nominal": None,
     "max_cat_threshold": 9223372036854775807,
     "cat_include": 1.0,
-    "cat_scale": 1.0,
     "purify_boosting": False,
     "purify_result": False,
     "randomize_initial_feature_order": True,
diff --git a/python/interpret-core/interpret/glassbox/_ebm/_boost.py b/python/interpret-core/interpret/glassbox/_ebm/_boost.py
@@ -28,6 +28,7 @@ def boost(
     reg_alpha,
     reg_lambda,
     max_delta_step,
+    gain_scale,
     min_cat_samples,
     cat_smooth,
     missing,
@@ -198,7 +199,7 @@ def boost(
 
                     if contains_nominals and len(term_features[term_idx]) == 1:
                         # penalize nominals a bit because they benefit from sorting categories
-                        avg_gain *= develop.get_option("cat_scale")
+                        avg_gain *= gain_scale
 
                     gainkey = (-avg_gain, native.generate_seed(rng), term_idx)
                     if not make_progress:
diff --git a/python/interpret-core/interpret/glassbox/_ebm/_ebm.py b/python/interpret-core/interpret/glassbox/_ebm/_ebm.py
@@ -361,6 +361,7 @@ def __init__(
         reg_alpha,
         reg_lambda,
         max_delta_step,
+        gain_scale,
         min_cat_samples,
         cat_smooth,
         missing,
@@ -411,6 +412,7 @@ def __init__(
             self.reg_alpha = reg_alpha
             self.reg_lambda = reg_lambda
             self.max_delta_step = max_delta_step
+            self.gain_scale = gain_scale
             self.min_cat_samples = min_cat_samples
             self.cat_smooth = cat_smooth
             self.missing = missing
@@ -942,6 +944,7 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
             reg_alpha = 0.0
             reg_lambda = 0.0
             max_delta_step = 0.0
+            gain_scale = 1.0
             min_cat_samples = 0
             cat_smooth = 0.0
             missing = "low"
@@ -965,6 +968,7 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
             reg_alpha = self.reg_alpha
             reg_lambda = self.reg_lambda
             max_delta_step = self.max_delta_step
+            gain_scale = self.gain_scale
             min_cat_samples = self.min_cat_samples
             cat_smooth = self.cat_smooth
             missing = self.missing
@@ -1084,6 +1088,7 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
                     reg_alpha,
                     reg_lambda,
                     max_delta_step,
+                    gain_scale,
                     min_cat_samples,
                     cat_smooth,
                     missing,
@@ -1359,6 +1364,7 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
                         reg_alpha,
                         reg_lambda,
                         max_delta_step,
+                        gain_scale,
                         min_cat_samples,
                         cat_smooth,
                         missing,
@@ -1486,6 +1492,7 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
                     0.0,
                     0.0,
                     0.0,
+                    1.0,
                     min_cat_samples,
                     cat_smooth,
                     missing,
@@ -2785,6 +2792,9 @@ class ExplainableBoostingClassifier(ClassifierMixin, EBMModel):
         L2 regularization.
     max_delta_step : float, default=0.0
         Used to limit the max output of tree leaves. <=0.0 means no constraint.
+    gain_scale : float, default=1.0
+        Scale factor to apply to nominal categoricals. A scale factor above 1.0 will cause the
+        algorithm focus more on the nominal categoricals.
     min_cat_samples : int, default=10
         Minimum number of samples in order to treat a category separately. If lower than this threshold
         the category is combined with other categories that have low numbers of samples.
@@ -2964,6 +2974,7 @@ def __init__(
         reg_alpha: Optional[float] = 0.0,
         reg_lambda: Optional[float] = 0.0,
         max_delta_step: Optional[float] = 0.0,
+        gain_scale: Optional[float] = 1.0,
         min_cat_samples: Optional[int] = 10,
         cat_smooth: Optional[float] = 10.0,
         missing: str = "separate",
@@ -2997,6 +3008,7 @@ def __init__(
             reg_alpha=reg_alpha,
             reg_lambda=reg_lambda,
             max_delta_step=max_delta_step,
+            gain_scale=gain_scale,
             min_cat_samples=min_cat_samples,
             cat_smooth=cat_smooth,
             missing=missing,
@@ -3167,6 +3179,9 @@ class ExplainableBoostingRegressor(RegressorMixin, EBMModel):
         L2 regularization.
     max_delta_step : float, default=0.0
         Used to limit the max output of tree leaves. <=0.0 means no constraint.
+    gain_scale : float, default=1.0
+        Scale factor to apply to nominal categoricals. A scale factor above 1.0 will cause the
+        algorithm focus more on the nominal categoricals.
     min_cat_samples : int, default=10
         Minimum number of samples in order to treat a category separately. If lower than this threshold
         the category is combined with other categories that have low numbers of samples.
@@ -3346,6 +3361,7 @@ def __init__(
         reg_alpha: Optional[float] = 0.0,
         reg_lambda: Optional[float] = 0.0,
         max_delta_step: Optional[float] = 0.0,
+        gain_scale: Optional[float] = 1.0,
         min_cat_samples: Optional[int] = 10,
         cat_smooth: Optional[float] = 10.0,
         missing: str = "separate",
@@ -3379,6 +3395,7 @@ def __init__(
             reg_alpha=reg_alpha,
             reg_lambda=reg_lambda,
             max_delta_step=max_delta_step,
+            gain_scale=gain_scale,
             min_cat_samples=min_cat_samples,
             cat_smooth=cat_smooth,
             missing=missing,
@@ -3615,6 +3632,7 @@ def __init__(
             reg_alpha=0.0,
             reg_lambda=0.0,
             max_delta_step=0.0,
+            gain_scale=1.0,
             min_cat_samples=0,
             cat_smooth=0.0,
             missing=None,
@@ -3896,6 +3914,7 @@ def __init__(
             reg_alpha=0.0,
             reg_lambda=0.0,
             max_delta_step=0.0,
+            gain_scale=1.0,
             min_cat_samples=0,
             cat_smooth=0.0,
             missing=None,