[Feature] Add the smooth approximated check loss (#18)

RektPunk · Aug 19, 2024 · 748378c · 748378c
1 parent 6b29e70
commit 748378c
Show file tree

Hide file tree

Showing 3 changed files with 37 additions and 35 deletions.
diff --git a/mqboost/base.py b/mqboost/base.py
@@ -39,7 +39,7 @@ class ModelName(BaseEnum):
 class ObjectiveName(BaseEnum):
     check: str = "check"
     huber: str = "huber"
-    phuber: str = "phuber"
+    approx: str = "approx"
 
 
 class TypeName(BaseEnum):

diff --git a/mqboost/objective.py b/mqboost/objective.py
@@ -46,63 +46,61 @@ def _rho(u: np.ndarray, alpha: float) -> np.ndarray:
     return -u * _grad_rho(u=u, alpha=alpha)
 
 
-def _error_delta_compare(u: np.ndarray, delta: float) -> Tuple[np.ndarray, np.ndarray]:
+def _grad_majorizer(u: np.ndarray, alpha: float, epsilon: float = 1e-5):
     """
-    Compare absolute errors with delta.
+    Compute the gradient of the majorizer of the smooth approximated check loss function.
     Args:
         u (np.ndarray): The error term.
-        delta (float): The delta parameter.
+        alpha (float): The quantile level.
+        epsilon (float, optional): The perturbation imposing smoothness. Defaults to 1e-5.
     Returns:
-        tuple: Two boolean arrays indicating where the errors are smaller or larger than delta.
+        np.ndarray: The gradient of the majorizer of the smooth approximated check loss function. 
     """
-    _abs_error = np.abs(u)
-    return (_abs_error <= delta).astype(int), (_abs_error > delta).astype(int)
+    _grad = (1 - 2 * alpha - u / (epsilon + np.abs(u))) / 2
+    return _grad
 
 
-def _grad_huber(u: np.ndarray, alpha: float, delta: float) -> np.ndarray:
+def _hess_majorizer(u: np.ndarray, alpha: float, epsilon: float = 1e-5):
     """
-    Compute the gradient of the huber loss function.
+    Compute the Hessian of the majorizer of the smooth approximated check loss function.
     Args:
         u (np.ndarray): The error term.
         alpha (float): The quantile level.
-        delta (float): The delta parameter.
+        epsilon (float, optional): The perturbation imposing smoothness. Defaults to 1e-5.
     Returns:
-        np.ndarray: The gradient of the huber loss function.
+        np.ndarray: The Hessian of the majorizer of the smooth approximated check loss function.
     """
-    _smaller_delta, _bigger_delta = _error_delta_compare(u=u, delta=delta)
-    _grad = _grad_rho(u=u, alpha=alpha)
-    _r = _rho(u=u, alpha=alpha)
-    return _r * _smaller_delta + _grad * _bigger_delta
+    _hess = 1 / (2 * (epsilon + np.abs(u)))
+    return _hess
 
 
-def _grad_phuber(u: np.ndarray, alpha: float, delta: float) -> np.ndarray:
+def _error_delta_compare(u: np.ndarray, delta: float) -> Tuple[np.ndarray, np.ndarray]:
     """
-    Compute the gradient of the pseudo-Huber loss function.
+    Compare absolute errors with delta.
     Args:
         u (np.ndarray): The error term.
-        alpha (float): The quantile level.
         delta (float): The delta parameter.
     Returns:
-        np.ndarray: The gradient of the pseudo-Huber loss function.
+        tuple: Two boolean arrays indicating where the errors are smaller or larger than delta.
     """
-    scale = delta**2 + u**2
-    _grad = -abs(_grad_rho(u, alpha)) * u / scale ** (1 / 2)
-    return _grad
+    _abs_error = np.abs(u)
+    return (_abs_error <= delta).astype(int), (_abs_error > delta).astype(int)
 
 
-def _hess_phuber(u: np.ndarray, alpha: float, delta: float) -> np.ndarray:
+def _grad_huber(u: np.ndarray, alpha: float, delta: float) -> np.ndarray:
     """
-    Compute the Hessian of the pseudo-Huber loss function.
+    Compute the gradient of the huber loss function.
     Args:
         u (np.ndarray): The error term.
         alpha (float): The quantile level.
         delta (float): The delta parameter.
     Returns:
-        np.ndarray: The Hessian of the pseudo-Huber loss function.
+        np.ndarray: The gradient of the huber loss function.
     """
-    scale = 1 + (u / delta) ** 2
-    _hess = (1 / delta) * abs(_grad_rho(u, alpha)) / (scale ** (3 / 2))
-    return _hess
+    _smaller_delta, _bigger_delta = _error_delta_compare(u=u, delta=delta)
+    _grad = _grad_rho(u=u, alpha=alpha)
+    _r = _rho(u=u, alpha=alpha)
+    return _r * _smaller_delta + _grad * _bigger_delta
 
 
 def _train_pred_reshape(
@@ -165,8 +163,8 @@ def _compute_grads_hess(
 huber_loss_grad_hess: Callable = partial(
     _compute_grads_hess, grad_fn=_grad_huber, hess_fn=_hess_rho
 )
-phuber_loss_grad_hess: Callable = partial(
-    _compute_grads_hess, grad_fn=_grad_phuber, hess_fn=_hess_phuber
+majorizer_loss_grad_hess: Callable = partial(
+    _compute_grads_hess, grad_fn=_grad_majorizer, hess_fn=_hess_majorizer
 )
 
 
@@ -264,10 +262,10 @@ def __init__(
             self._fobj = partial(huber_loss_grad_hess, alphas=alphas, delta=self._delta)
         elif objective == ObjectiveName.check:
             self._fobj = partial(check_loss_grad_hess, alphas=alphas)
-        elif objective == ObjectiveName.phuber:
+        elif objective == ObjectiveName.approx:
             self._delta = delta_validate(delta=delta)
             self._fobj = partial(
-                phuber_loss_grad_hess, alphas=alphas, delta=self._delta
+                majorizer_loss_grad_hess, alphas=alphas, epsilon=self._epsilon
             )
 
         self._eval_name = CHECK_LOSS

diff --git a/mqboost/regressor.py b/mqboost/regressor.py
@@ -22,11 +22,13 @@ class MQRegressor:
             Parameters for the model.
             Any params related to model can be used except "objective".
         model (str): The model type (either 'lightgbm' or 'xgboost'). Default is 'lightgbm'.
-        objective (str): The objective function (either 'check', 'huber', or 'phuber'). Default is 'check'.
+        objective (str): The objective function (either 'check', 'huber', or 'approx'). Default is 'check'.
         delta (float):
-            Parameter for the 'huber' or 'phuber' objective function.
+            Parameter for the 'huber' objective function.
             Default is 0.01 and must be smaller than 0.05.
-
+        epsilon (float):
+            Parameter for the 'smooth approximated check' objective function.
+            Default is 1e-5.
     Methods:
         fit(dataset, eval_set):
             Fits the regressor to the provided dataset, optionally evaluating on a separate validation set.
@@ -43,12 +45,14 @@ def __init__(
         model: str = ModelName.lightgbm.value,
         objective: str = ObjectiveName.check.value,
         delta: float = 0.01,
+        epsilon: float = 1e-5
     ) -> None:
         """Initialize the MQRegressor."""
         self._params = params
         self._model = ModelName.get(model)
         self._objective = ObjectiveName.get(objective)
         self._delta = delta
+        self._epsilon = epsilon
 
     def fit(
         self,