Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] add feature importance #38

Merged
merged 1 commit into from
Sep 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion mqboost/regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def fit(
custom_metric=self._MQObj.feval,
evals=[(_eval_set, "eval")],
)
self._colnames = dataset.columns.to_list()
self._fitted = True

def predict(
Expand All @@ -121,13 +122,25 @@ def predict(
def __predict_available(self) -> None:
"""Check if the model has been fitted before making predictions."""
if not getattr(self, "_fitted", False):
raise FittingException("Fit must be executed before predict")
raise FittingException("Fit must be executed first.")

@property
def MQObj(self) -> MQObjective:
"""Get the MQObjective instance."""
return self._MQObj

@property
def feature_importance(self) -> dict[str, float]:
self.__predict_available()
importances = {str(k): 0 for k in self._colnames}
if self.__is_lgb:
_importance = self.model.feature_importance(importance_type="gain").tolist()
importances.update({str(k): v for k, v in zip(self._colnames, _importance)})
return importances
else:
importances.update(self.model.get_score(importance_type="gain"))
return importances

@property
def __is_lgb(self) -> bool:
"""Check if the model is LightGBM."""
Expand Down
36 changes: 36 additions & 0 deletions tests/test_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,39 @@ def test_monotone_constraints_called_xgb(dummy_dataset_xgb):
for k in range(len(predictions) - 1)
]
)


def test_feature_importance_before_fit_raises():
params = {"learning_rate": 0.1, "max_depth": 6}
with pytest.raises(FittingException, match="Fit must be executed first."):
_ = MQRegressor(params=params).feature_importance


def test_feature_importance_after_fit(dummy_dataset_lgb):
params = {"learning_rate": 0.1, "max_depth": 6}
gbm_model = MQRegressor(params=params)
gbm_model.fit(dataset=dummy_dataset_lgb)
feature_importances = gbm_model.feature_importance

assert isinstance(
feature_importances, dict
), "Feature importances should be a dictionary"
assert len(feature_importances) == len(
dummy_dataset_lgb.columns
), "Feature importance length mismatch"
for feature in dummy_dataset_lgb.columns:
assert (
str(feature) in feature_importances
), f"Feature {feature} not found in importance"


def test_feature_importance_positive(dummy_dataset_lgb):
"""Test that at least some feature importances are non-zero after training"""
params = {"learning_rate": 0.1, "max_depth": 6}
gbm_model = MQRegressor(params=params)
gbm_model.fit(dataset=dummy_dataset_lgb)
feature_importances = gbm_model.feature_importance

assert all(
[importance >= 0 for importance in feature_importances.values()]
), "All importance should be positive."
Loading