Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

82 spe #83

Merged
merged 4 commits into from
Jul 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def RMSPEMetric(XGBoost=False):
Calculates the Root Mean Squared Percentage Error:
https://www.kaggle.com/c/optiver-realized-volatility-prediction/overview/evaluation

There is no loss function for this as the gradient is constant, meaning the Hessian is equal to 0.
The corresponding Loss function is Squared Percentage Error.
Args:
XGBoost (Bool): Set to True if using XGBoost. We assume LightGBM as default use.
Note that you should also set `maximize=False` in the XGBoost train function
Expand Down
4 changes: 3 additions & 1 deletion bokbokbok/loss_functions/regression/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

from .regression_loss_functions import (
LogCoshLoss,
SPELoss,
)

__all__ = [
"LogCoshLoss"
"LogCoshLoss",
"SPELoss",
]
53 changes: 53 additions & 0 deletions bokbokbok/loss_functions/regression/regression_loss_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,56 @@ def log_cosh_loss(
return grad, hess

return log_cosh_loss


def SPELoss():
"""
Squared Percentage Error loss
"""

def _gradient(yhat, dtrain):
"""
Compute the gradient squared percentage error.
Args:
yhat (np.array): Predictions
dtrain: The XGBoost / LightGBM dataset

Returns:
SPE Gradient
"""
y = dtrain.get_label()
return -2*(y-yhat)/(y**2)

def _hessian(yhat, dtrain):
"""
Compute the hessian for squared percentage error.
Args:
yhat (np.array): Predictions
dtrain: The XGBoost / LightGBM dataset

Returns:
SPE Hessian
"""
y = dtrain.get_label()
return 2/(y**2)

def squared_percentage(yhat, dtrain):
"""
Calculate gradient and hessian for squared percentage error.

Args:
yhat (np.array): Predictions
dtrain: The XGBoost / LightGBM dataset

Returns:
grad: SPE loss gradient
hess: SPE loss Hessian
"""
yhat[yhat < -1] = -1 + 1e-6
grad = _gradient(yhat, dtrain)

hess = _hessian(yhat, dtrain)

return grad, hess

return squared_percentage
9 changes: 5 additions & 4 deletions docs/tutorials/RMSPE.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import mean_absolute_error\n",
"from bokbokbok.eval_metrics.regression import RMSPEMetric\n",
"from bokbokbok.loss_functions.regression import SPELoss\n",
"\n",
"X, y = make_regression(n_samples=1000, \n",
" n_features=10, \n",
Expand Down Expand Up @@ -56,16 +57,15 @@
" 'num_leaves': 10,\n",
" 'learning_rate': 0.1,\n",
" 'verbose': 10,\n",
" #'objective': 'RMSE',\n",
" }\n",
"\n",
"clf = lgb.train(params=params,\n",
" train_set=train,\n",
" valid_sets=[train, valid],\n",
" valid_names=['train','valid'],\n",
" fobj=SPELoss(),\n",
" feval=RMSPEMetric(),\n",
" early_stopping_rounds=3000,\n",
" verbose_eval=1)\n",
" early_stopping_rounds=3000)\n",
"\n",
"mean_absolute_error(y_valid, clf.predict(X_valid))"
]
Expand Down Expand Up @@ -99,6 +99,7 @@
" num_boost_round=3000,\n",
" early_stopping_rounds=100,\n",
" verbose_eval=100,\n",
" obj=LogCoshLoss(),\n",
" maximize=False,\n",
" feval=RMSPEMetric(XGBoost=True),\n",
" evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])\n",
Expand Down Expand Up @@ -128,4 +129,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}