From 97b45b099a418e8e43a62bfcdbaa08472bf4f487 Mon Sep 17 00:00:00 2001 From: Dan Date: Thu, 8 Jul 2021 20:50:43 +0200 Subject: [PATCH 1/4] add SPE loss --- .../loss_functions/regression/__init__.py | 4 +- .../regression/regression_loss_functions.py | 53 +++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/bokbokbok/loss_functions/regression/__init__.py b/bokbokbok/loss_functions/regression/__init__.py index b20f468..b0b2641 100644 --- a/bokbokbok/loss_functions/regression/__init__.py +++ b/bokbokbok/loss_functions/regression/__init__.py @@ -3,8 +3,10 @@ from .regression_loss_functions import ( LogCoshLoss, + SPELoss. ) __all__ = [ - "LogCoshLoss" + "LogCoshLoss", + "SPELoss" ] \ No newline at end of file diff --git a/bokbokbok/loss_functions/regression/regression_loss_functions.py b/bokbokbok/loss_functions/regression/regression_loss_functions.py index 28d3e36..ceeea55 100644 --- a/bokbokbok/loss_functions/regression/regression_loss_functions.py +++ b/bokbokbok/loss_functions/regression/regression_loss_functions.py @@ -56,3 +56,56 @@ def log_cosh_loss( return grad, hess return log_cosh_loss + + +def SPELoss(): + """ + Squared Percentage Error loss + """ + + def _gradient(yhat, dtrain): + """ + Compute the gradient squared percentage error. + Args: + yhat (np.array): Predictions + dtrain: The XGBoost / LightGBM dataset + + Returns: + SPE Gradient + """ + y = dtrain.get_label() + return -2*(y-yhat)/(y**2) + + def _hessian(yhat, dtrain): + """ + Compute the hessian for squared percentage error. + Args: + yhat (np.array): Predictions + dtrain: The XGBoost / LightGBM dataset + + Returns: + SPE Hessian + """ + y = dtrain.get_label() + return 2/(y**2) + + def squared_percentage(yhat, dtrain): + """ + Calculate gradient and hessian for squared percentage error. + + Args: + yhat (np.array): Predictions + dtrain: The XGBoost / LightGBM dataset + + Returns: + grad: SPE loss gradient + hess: SPE loss Hessian + """ + yhat[yhat < -1] = -1 + 1e-6 + grad = _gradient(yhat, dtrain) + + hess = _hessian(yhat, dtrain) + + return grad, hess + + return squared_percentage \ No newline at end of file From 937f992c8599c02456c368f4b6fb7d3dc0975d39 Mon Sep 17 00:00:00 2001 From: Dan Date: Thu, 8 Jul 2021 20:51:18 +0200 Subject: [PATCH 2/4] add SPELoss --- bokbokbok/loss_functions/regression/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bokbokbok/loss_functions/regression/__init__.py b/bokbokbok/loss_functions/regression/__init__.py index b0b2641..803d87c 100644 --- a/bokbokbok/loss_functions/regression/__init__.py +++ b/bokbokbok/loss_functions/regression/__init__.py @@ -3,10 +3,10 @@ from .regression_loss_functions import ( LogCoshLoss, - SPELoss. + SPELoss, ) __all__ = [ "LogCoshLoss", - "SPELoss" + "SPELoss", ] \ No newline at end of file From e184c37a6d18886bcbcc4a0547175585be1cd130 Mon Sep 17 00:00:00 2001 From: Dan Date: Thu, 8 Jul 2021 20:52:36 +0200 Subject: [PATCH 3/4] add SPE commment --- bokbokbok/eval_metrics/regression/regression_eval_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bokbokbok/eval_metrics/regression/regression_eval_metrics.py b/bokbokbok/eval_metrics/regression/regression_eval_metrics.py index 98d4e1e..bbd549d 100644 --- a/bokbokbok/eval_metrics/regression/regression_eval_metrics.py +++ b/bokbokbok/eval_metrics/regression/regression_eval_metrics.py @@ -35,7 +35,7 @@ def RMSPEMetric(XGBoost=False): Calculates the Root Mean Squared Percentage Error: https://www.kaggle.com/c/optiver-realized-volatility-prediction/overview/evaluation - There is no loss function for this as the gradient is constant, meaning the Hessian is equal to 0. + The corresponding Loss function is Squared Percentage Error. Args: XGBoost (Bool): Set to True if using XGBoost. We assume LightGBM as default use. Note that you should also set `maximize=False` in the XGBoost train function From 25575888e1c6d044cc6dae3e8483bdda4b50ea08 Mon Sep 17 00:00:00 2001 From: Dan Date: Thu, 8 Jul 2021 20:53:14 +0200 Subject: [PATCH 4/4] add SPE import --- docs/tutorials/RMSPE.ipynb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/tutorials/RMSPE.ipynb b/docs/tutorials/RMSPE.ipynb index 402ade5..c7bd9ee 100644 --- a/docs/tutorials/RMSPE.ipynb +++ b/docs/tutorials/RMSPE.ipynb @@ -21,6 +21,7 @@ "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import mean_absolute_error\n", "from bokbokbok.eval_metrics.regression import RMSPEMetric\n", + "from bokbokbok.loss_functions.regression import SPELoss\n", "\n", "X, y = make_regression(n_samples=1000, \n", " n_features=10, \n", @@ -56,16 +57,15 @@ " 'num_leaves': 10,\n", " 'learning_rate': 0.1,\n", " 'verbose': 10,\n", - " #'objective': 'RMSE',\n", " }\n", "\n", "clf = lgb.train(params=params,\n", " train_set=train,\n", " valid_sets=[train, valid],\n", " valid_names=['train','valid'],\n", + " fobj=SPELoss(),\n", " feval=RMSPEMetric(),\n", - " early_stopping_rounds=3000,\n", - " verbose_eval=1)\n", + " early_stopping_rounds=3000)\n", "\n", "mean_absolute_error(y_valid, clf.predict(X_valid))" ] @@ -99,6 +99,7 @@ " num_boost_round=3000,\n", " early_stopping_rounds=100,\n", " verbose_eval=100,\n", + " obj=LogCoshLoss(),\n", " maximize=False,\n", " feval=RMSPEMetric(XGBoost=True),\n", " evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])\n", @@ -128,4 +129,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file