From 64e2681de7660e0a6bb174f84d95e6bd0aadbe75 Mon Sep 17 00:00:00 2001 From: RektPunk <110188257+RektPunk@users.noreply.github.com> Date: Thu, 24 Oct 2024 20:50:14 +0900 Subject: [PATCH] [Feature] introduce weight in MQDataset (#46) --- mqboost/__init__.py | 2 +- mqboost/base.py | 1 + mqboost/dataset.py | 16 +++++++++++++++- mqboost/optimize.py | 1 + pyproject.toml | 2 +- 5 files changed, 19 insertions(+), 3 deletions(-) diff --git a/mqboost/__init__.py b/mqboost/__init__.py index 2d6f82d..ee48a94 100644 --- a/mqboost/__init__.py +++ b/mqboost/__init__.py @@ -3,4 +3,4 @@ from mqboost.optimize import MQOptimizer from mqboost.regressor import MQRegressor -__version__ = "0.2.8" +__version__ = "0.2.9" diff --git a/mqboost/base.py b/mqboost/base.py index 59585dc..827da11 100644 --- a/mqboost/base.py +++ b/mqboost/base.py @@ -29,6 +29,7 @@ def _isin(cls, text: str) -> None: ModelLike = lgb.basic.Booster | xgb.Booster DtrainLike = lgb.basic.Dataset | xgb.DMatrix ParamsLike = dict[str, float | int | str | bool] +WeightLike = list[float] | list[int] | np.ndarray | pd.Series # Name diff --git a/mqboost/dataset.py b/mqboost/dataset.py index e43a5e9..3883dc5 100644 --- a/mqboost/dataset.py +++ b/mqboost/dataset.py @@ -1,5 +1,6 @@ from typing import Callable, Optional +import numpy as np import pandas as pd from mqboost.base import ( @@ -9,6 +10,7 @@ FittingException, ModelName, TypeName, + WeightLike, XdataLike, YdataLike, ) @@ -32,6 +34,7 @@ class MQDataset: Must be in ascending order and contain no duplicates. data (pd.DataFrame | pd.Series | np.ndarray): The input features. label (pd.Series | np.ndarray): The target labels (if provided). + weight (list[float] | list[int] | np.ndarray | pd.Series): Weight for each instance (if provided). model (str): The model type (LightGBM or XGBoost). reference (MQBoost | None): Reference dataset for label encoding and label mean. @@ -52,6 +55,7 @@ def __init__( alphas: AlphaLike, data: XdataLike, label: YdataLike | None = None, + weight: WeightLike | None = None, model: str = ModelName.lightgbm.value, reference: Optional["MQDataset"] = None, ) -> None: @@ -85,6 +89,10 @@ def __init__( self._label = prepare_y(y=label - self._label_mean, alphas=self._alphas) self._is_none_label = False + if weight is not None: + _weight = np.array(weight) if not isinstance(weight, np.ndarray) else weight + self._weight = prepare_y(y=_weight, alphas=self._alphas) + @property def train_dtype(self) -> Callable: """Get the data type function for training data.""" @@ -123,14 +131,20 @@ def label(self) -> pd.DataFrame: @property def label_mean(self) -> float: + """Get the label mean.""" self.__label_available() return self._label_mean + @property + def weight(self) -> WeightLike | None: + """Get the weights.""" + return getattr(self, "_weight", None) + @property def dtrain(self) -> DtrainLike: """Get the training data in the required format for the model.""" self.__label_available() - return self._train_dtype(data=self._data, label=self._label) + return self._train_dtype(data=self._data, label=self._label, weight=self.weight) @property def dpredict(self) -> DtrainLike | Callable: diff --git a/mqboost/optimize.py b/mqboost/optimize.py index ef800dc..de98dbe 100644 --- a/mqboost/optimize.py +++ b/mqboost/optimize.py @@ -200,6 +200,7 @@ def __optuna_objective( params=params, dtrain=dtrain, evals=[(dvalid, "valid")], + num_boost_round=100, ) _gbm = xgb.train(**model_params) _preds = _gbm.predict(data=deval) + self._label_mean diff --git a/pyproject.toml b/pyproject.toml index 0ad6d87..3aca39d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "mqboost" -version = "0.2.8" +version = "0.2.9" description = "Monotonic composite quantile gradient boost regressor" authors = ["RektPunk "] readme = "README.md"