From 97b45b099a418e8e43a62bfcdbaa08472bf4f487 Mon Sep 17 00:00:00 2001
From: Dan <daniel.timbrell@ing.com>
Date: Thu, 8 Jul 2021 20:50:43 +0200
Subject: [PATCH 1/4] add SPE loss

---
 .../loss_functions/regression/__init__.py     |  4 +-
 .../regression/regression_loss_functions.py   | 53 +++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/bokbokbok/loss_functions/regression/__init__.py b/bokbokbok/loss_functions/regression/__init__.py
index b20f468..b0b2641 100644
--- a/bokbokbok/loss_functions/regression/__init__.py
+++ b/bokbokbok/loss_functions/regression/__init__.py
@@ -3,8 +3,10 @@
 
 from .regression_loss_functions import (
     LogCoshLoss,
+    SPELoss.
 )
 
 __all__ = [
-    "LogCoshLoss"
+    "LogCoshLoss",
+    "SPELoss"
 ]
\ No newline at end of file
diff --git a/bokbokbok/loss_functions/regression/regression_loss_functions.py b/bokbokbok/loss_functions/regression/regression_loss_functions.py
index 28d3e36..ceeea55 100644
--- a/bokbokbok/loss_functions/regression/regression_loss_functions.py
+++ b/bokbokbok/loss_functions/regression/regression_loss_functions.py
@@ -56,3 +56,56 @@ def log_cosh_loss(
         return grad, hess
 
     return log_cosh_loss
+
+
+def SPELoss():
+    """
+    Squared Percentage Error loss
+    """
+
+    def _gradient(yhat, dtrain):
+        """
+        Compute the gradient squared percentage error.
+        Args:
+            yhat (np.array): Predictions
+            dtrain: The XGBoost / LightGBM dataset
+
+        Returns:
+            SPE Gradient
+        """
+        y = dtrain.get_label()
+        return -2*(y-yhat)/(y**2)
+
+    def _hessian(yhat, dtrain):
+        """
+        Compute the hessian for squared percentage error.
+        Args:
+            yhat (np.array): Predictions
+            dtrain: The XGBoost / LightGBM dataset
+
+        Returns:
+            SPE Hessian
+        """
+        y = dtrain.get_label()
+        return 2/(y**2)
+
+    def squared_percentage(yhat, dtrain):
+        """
+        Calculate gradient and hessian for squared percentage error.
+
+        Args:
+            yhat (np.array): Predictions
+            dtrain: The XGBoost / LightGBM dataset
+
+        Returns:
+            grad: SPE loss gradient
+            hess: SPE loss Hessian
+        """
+        yhat[yhat < -1] = -1 + 1e-6
+        grad = _gradient(yhat, dtrain)
+
+        hess = _hessian(yhat, dtrain)
+
+        return grad, hess
+
+    return squared_percentage
\ No newline at end of file

From 937f992c8599c02456c368f4b6fb7d3dc0975d39 Mon Sep 17 00:00:00 2001
From: Dan <daniel.timbrell@ing.com>
Date: Thu, 8 Jul 2021 20:51:18 +0200
Subject: [PATCH 2/4] add SPELoss

---
 bokbokbok/loss_functions/regression/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bokbokbok/loss_functions/regression/__init__.py b/bokbokbok/loss_functions/regression/__init__.py
index b0b2641..803d87c 100644
--- a/bokbokbok/loss_functions/regression/__init__.py
+++ b/bokbokbok/loss_functions/regression/__init__.py
@@ -3,10 +3,10 @@
 
 from .regression_loss_functions import (
     LogCoshLoss,
-    SPELoss.
+    SPELoss,
 )
 
 __all__ = [
     "LogCoshLoss",
-    "SPELoss"
+    "SPELoss",
 ]
\ No newline at end of file

From e184c37a6d18886bcbcc4a0547175585be1cd130 Mon Sep 17 00:00:00 2001
From: Dan <daniel.timbrell@ing.com>
Date: Thu, 8 Jul 2021 20:52:36 +0200
Subject: [PATCH 3/4] add SPE commment

---
 bokbokbok/eval_metrics/regression/regression_eval_metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bokbokbok/eval_metrics/regression/regression_eval_metrics.py b/bokbokbok/eval_metrics/regression/regression_eval_metrics.py
index 98d4e1e..bbd549d 100644
--- a/bokbokbok/eval_metrics/regression/regression_eval_metrics.py
+++ b/bokbokbok/eval_metrics/regression/regression_eval_metrics.py
@@ -35,7 +35,7 @@ def RMSPEMetric(XGBoost=False):
     Calculates the Root Mean Squared Percentage Error:
     https://www.kaggle.com/c/optiver-realized-volatility-prediction/overview/evaluation
 
-    There is no loss function for this as the gradient is constant, meaning the Hessian is equal to 0.
+    The corresponding Loss function is Squared Percentage Error.
     Args:
         XGBoost (Bool): Set to True if using XGBoost. We assume LightGBM as default use.
                         Note that you should also set `maximize=False` in the XGBoost train function

From 25575888e1c6d044cc6dae3e8483bdda4b50ea08 Mon Sep 17 00:00:00 2001
From: Dan <daniel.timbrell@ing.com>
Date: Thu, 8 Jul 2021 20:53:14 +0200
Subject: [PATCH 4/4] add SPE import

---
 docs/tutorials/RMSPE.ipynb | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/docs/tutorials/RMSPE.ipynb b/docs/tutorials/RMSPE.ipynb
index 402ade5..c7bd9ee 100644
--- a/docs/tutorials/RMSPE.ipynb
+++ b/docs/tutorials/RMSPE.ipynb
@@ -21,6 +21,7 @@
     "from sklearn.model_selection import train_test_split\n",
     "from sklearn.metrics import mean_absolute_error\n",
     "from bokbokbok.eval_metrics.regression import RMSPEMetric\n",
+    "from bokbokbok.loss_functions.regression import SPELoss\n",
     "\n",
     "X, y = make_regression(n_samples=1000, \n",
     "                       n_features=10, \n",
@@ -56,16 +57,15 @@
     "     'num_leaves': 10,\n",
     "     'learning_rate': 0.1,\n",
     "     'verbose': 10,\n",
-    "     #'objective': 'RMSE',\n",
     "   }\n",
     "\n",
     "clf = lgb.train(params=params,\n",
     "                train_set=train,\n",
     "                valid_sets=[train, valid],\n",
     "                valid_names=['train','valid'],\n",
+    "                fobj=SPELoss(),\n",
     "                feval=RMSPEMetric(),\n",
-    "                early_stopping_rounds=3000,\n",
-    "                verbose_eval=1)\n",
+    "                early_stopping_rounds=3000)\n",
     "\n",
     "mean_absolute_error(y_valid, clf.predict(X_valid))"
    ]
@@ -99,6 +99,7 @@
     "          num_boost_round=3000,\n",
     "          early_stopping_rounds=100,\n",
     "          verbose_eval=100,\n",
+    "                    obj=LogCoshLoss(),\n",
     "          maximize=False,\n",
     "          feval=RMSPEMetric(XGBoost=True),\n",
     "          evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])\n",
@@ -128,4 +129,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file