Skip to content

Commit

Permalink
[Feature] apply weight in objective function (#49)
Browse files Browse the repository at this point in the history
  • Loading branch information
RektPunk authored Nov 12, 2024
1 parent d2c6d1f commit cd2b556
Show file tree
Hide file tree
Showing 8 changed files with 86 additions and 22 deletions.
2 changes: 1 addition & 1 deletion mqboost/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
from mqboost.optimize import MQOptimizer
from mqboost.regressor import MQRegressor

__version__ = "0.2.9"
__version__ = "0.2.10"
3 changes: 2 additions & 1 deletion mqboost/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class MQDataset:
data: Returns the input features.
label: Returns the target labels.
alphas: Returns the list of quantile levels.
weight: Returns the weight vector for each instance.
dtrain: Returns the training data in the required format for the model.
dpredict: Returns the prediction data in the required format for the model.
"""
Expand Down Expand Up @@ -136,7 +137,7 @@ def label_mean(self) -> float:
return self._label_mean

@property
def weight(self) -> WeightLike | None:
def weight(self) -> np.ndarray | None:
"""Get the weights."""
return getattr(self, "_weight", None)

Expand Down
31 changes: 24 additions & 7 deletions mqboost/objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def _compute_grads_hess(
y_pred: np.ndarray,
dtrain: DtrainLike,
alphas: list[float],
weight: np.ndarray | None,
**kwargs: Any,
) -> tuple[np.ndarray, np.ndarray]:
_len_alpha = len(alphas)
Expand All @@ -89,7 +90,10 @@ def _compute_grads_hess(
grads.append(_grad / _len_y)
hess.append(_hess / _len_y)

return np.concatenate(grads), np.concatenate(hess)
if isinstance(weight, np.ndarray):
return np.concatenate(grads) * weight, np.concatenate(hess) * weight
else:
return np.concatenate(grads), np.concatenate(hess)

return _compute_grads_hess

Expand Down Expand Up @@ -144,13 +148,21 @@ def validate_parameters(objective: ObjectiveName, delta: float, epsilon: float)


def get_fobj_function(
objective: ObjectiveName, alphas: list[float], delta: float, epsilon: float
objective: ObjectiveName,
weight: np.ndarray | None,
alphas: list[float],
delta: float,
epsilon: float,
) -> ObjLike:
objective_mapping: dict[ObjectiveName, ObjLike] = {
ObjectiveName.check: partial(check_loss_grad_hess, alphas=alphas),
ObjectiveName.huber: partial(huber_loss_grad_hess, alphas=alphas, delta=delta),
ObjectiveName.check: partial(
check_loss_grad_hess, weight=weight, alphas=alphas
),
ObjectiveName.huber: partial(
huber_loss_grad_hess, weight=weight, alphas=alphas, delta=delta
),
ObjectiveName.approx: partial(
approx_loss_grad_hess, alphas=alphas, epsilon=epsilon
approx_loss_grad_hess, weight=weight, alphas=alphas, epsilon=epsilon
),
}
return objective_mapping[objective]
Expand All @@ -174,7 +186,7 @@ class MQObjective:
model (ModelName): The model type (either 'lightgbm' or 'xgboost').
delta (float): The delta parameter used for the 'huber' loss.
epsilon (float): The epsilon parameter used for the 'approx' loss.
weight (np.ndarray): The weight for each instance (if provided).
Properties:
fobj (Callable): The objective function to be minimized.
feval (Callable): The evaluation function used during training.
Expand All @@ -187,11 +199,16 @@ def __init__(
model: ModelName,
delta: float,
epsilon: float,
weight: np.ndarray | None,
) -> None:
"""Initialize the MQObjective."""
validate_parameters(objective=objective, delta=delta, epsilon=epsilon)
self._fobj = get_fobj_function(
objective=objective, alphas=alphas, delta=delta, epsilon=epsilon
objective=objective,
weight=weight,
alphas=alphas,
delta=delta,
epsilon=epsilon,
)
self._feval = get_feval_function(model=model, alphas=alphas)

Expand Down
55 changes: 46 additions & 9 deletions mqboost/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,38 @@ def _xgb_get_params(trial: Trial):


def _train_valid_split(
x_train: pd.DataFrame, y_train: np.ndarray
) -> tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray]:
return train_test_split(
x_train, y_train, test_size=0.2, random_state=42, stratify=x_train["_tau"]
)
x_train: pd.DataFrame,
y_train: np.ndarray,
weight: np.ndarray | None,
) -> tuple[
pd.DataFrame,
pd.DataFrame,
np.ndarray,
np.ndarray,
np.ndarray | None,
np.ndarray | None,
]:
if weight is not None:
_x_train, _x_valid, _y_train, _y_valid, _w_train, _w_valid = train_test_split(
x_train,
y_train,
weight,
test_size=0.2,
random_state=42,
stratify=x_train["_tau"],
)
else:
_x_train, _x_valid, _y_train, _y_valid = train_test_split(
x_train,
y_train,
test_size=0.2,
random_state=42,
stratify=x_train["_tau"],
)
_w_train = None
_w_valid = None

return _x_train, _x_valid, _y_train, _y_valid, _w_train, _w_valid


class MQOptimizer:
Expand Down Expand Up @@ -130,17 +157,27 @@ def get_params(trial: Trial) -> dict[str, Any]:
self._MQObj = MQObjective(
alphas=dataset.alphas,
objective=self._objective,
weight=dataset.weight,
model=self._model,
delta=self._delta,
epsilon=self._epsilon,
)
if valid_set is None:
x_train, x_valid, y_train, y_valid = _train_valid_split(
x_train=self._dataset.data, y_train=self._dataset.label
x_train, x_valid, y_train, y_valid, weight_train, weight_valid = (
_train_valid_split(
x_train=self._dataset.data,
y_train=self._dataset.label,
weight=dataset.weight,
)
)
dtrain = self._dataset.train_dtype(
data=x_train, label=y_train, weight=weight_train
)
dvalid = self._dataset.train_dtype(
data=x_valid, label=y_valid, weight=weight_valid
)
dtrain = self._dataset.train_dtype(data=x_train, label=y_train)
dvalid = self._dataset.train_dtype(data=x_valid, label=y_valid)
deval = self._dataset.predict_dtype(data=x_valid)

else:
dtrain = self._dataset.dtrain
dvalid = valid_set.dtrain
Expand Down
1 change: 1 addition & 0 deletions mqboost/regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def fit(
self._MQObj = MQObjective(
alphas=dataset.alphas,
objective=self._objective,
weight=dataset.weight,
model=self._model,
delta=self._delta,
epsilon=self._epsilon,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "mqboost"
version = "0.2.9"
version = "0.2.10"
description = "Monotonic composite quantile gradient boost regressor"
authors = ["RektPunk <rektpunk@gmail.com>"]
readme = "README.md"
Expand Down
13 changes: 10 additions & 3 deletions tests/test_objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def test_mqobjective_check_loss_initialization():
mq_objective = MQObjective(
alphas=alphas,
objective=ObjectiveName.check,
weight=None,
model=ModelName.xgboost,
delta=0.0,
epsilon=0.0,
Expand All @@ -53,6 +54,7 @@ def test_mqobjective_huber_loss_initialization():
mq_objective = MQObjective(
alphas=alphas,
objective=ObjectiveName.huber,
weight=None,
model=ModelName.lightgbm,
delta=delta,
epsilon=0.0,
Expand All @@ -67,6 +69,7 @@ def test_mqobjective_approx_loss_initialization():
mq_objective = MQObjective(
alphas=alphas,
objective=ObjectiveName.approx,
weight=None,
model=ModelName.xgboost,
delta=0.0,
epsilon=epsilon,
Expand All @@ -79,7 +82,9 @@ def test_mqobjective_approx_loss_initialization():
def test_check_loss_grad_hess(dummy_data):
"""Test check loss gradient and Hessian calculation."""
dtrain = dummy_data(y_true)
grads, hess = check_loss_grad_hess(y_pred=y_pred, dtrain=dtrain, alphas=alphas)
grads, hess = check_loss_grad_hess(
y_pred=y_pred, dtrain=dtrain, weight=None, alphas=alphas
)
# fmt: off
expected_grads = [-0.02, -0.02, 0.18, -0.02, -0.02, -0.1, -0.1, 0.1, -0.1, -0.1, -0.18, -0.18, 0.02, -0.18, -0.18]
# fmt: on
Expand All @@ -102,7 +107,7 @@ def test_huber_loss_grad_hess(dummy_data, delta, expected_grads):
"""Test huber loss gradient and Hessian calculation with multiple datasets and deltas."""
dtrain = dummy_data(y_true)
grads, hess = huber_loss_grad_hess(
y_pred=y_pred, dtrain=dtrain, alphas=alphas, delta=delta
y_pred=y_pred, dtrain=dtrain, weight = None,alphas=alphas, delta=delta
)

np.testing.assert_almost_equal(grads, np.array(expected_grads))
Expand Down Expand Up @@ -136,7 +141,7 @@ def test_approx_loss_grad_hess(dummy_data, epsilon, expected_grads, expected_hes
"""Test approx loss gradient and Hessian calculation."""
dtrain = dummy_data(y_true)
grads, hess = approx_loss_grad_hess(
y_pred=y_pred, dtrain=dtrain, alphas=alphas, epsilon=epsilon
y_pred=y_pred, dtrain=dtrain, weight = None, alphas=alphas, epsilon=epsilon
)
np.testing.assert_almost_equal(grads, np.array(expected_grads), decimal=4)
np.testing.assert_almost_equal(hess, np.array(expected_hess), decimal=4)
Expand Down Expand Up @@ -181,6 +186,7 @@ def test_invalid_delta_for_huber():
MQObjective(
alphas=alphas,
objective=ObjectiveName.huber,
weight=None,
model=ModelName.xgboost,
delta=-0.1, # Invalid delta (negative)
epsilon=0.0,
Expand All @@ -194,6 +200,7 @@ def test_invalid_epsilon_for_approx():
MQObjective(
alphas=alphas,
objective=ObjectiveName.approx,
weight=None,
model=ModelName.xgboost,
delta=0.0,
epsilon=-0.01, # Invalid epsilon (negative)
Expand Down
1 change: 1 addition & 0 deletions tests/test_optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def test_mqobjective_property(sample_data):
optimizer._MQObj = MQObjective(
alphas=sample_data.alphas,
objective=optimizer._objective,
weight=None,
model=optimizer._model,
delta=optimizer._delta,
epsilon=optimizer._epsilon,
Expand Down

1 comment on commit cd2b556

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests Skipped Failures Errors Time
91 0 💤 0 ❌ 0 🔥 6.846s ⏱️

Please sign in to comment.