From 18cae4cc062f918bbf53feff7d90420a70c535d9 Mon Sep 17 00:00:00 2001 From: Olivier Sprangers Date: Thu, 18 Apr 2024 15:00:51 +0200 Subject: [PATCH 01/11] deepnpts_firststab --- nbs/common.scalers.ipynb | 4 +- nbs/core.ipynb | 3 +- nbs/losses.pytorch.ipynb | 1712 +++++++++++++++++++++++++++-- nbs/models.deepnpts.ipynb | 1137 +++++++++++++++++++ neuralforecast/_modidx.py | 18 + neuralforecast/common/_scalers.py | 4 +- neuralforecast/core.py | 3 + neuralforecast/losses/pytorch.py | 110 +- neuralforecast/models/__init__.py | 3 +- neuralforecast/models/deepnpts.py | 557 ++++++++++ 10 files changed, 3371 insertions(+), 180 deletions(-) create mode 100644 nbs/models.deepnpts.ipynb create mode 100644 neuralforecast/models/deepnpts.py diff --git a/nbs/common.scalers.ipynb b/nbs/common.scalers.ipynb index c06fa0da2..921d5adaf 100644 --- a/nbs/common.scalers.ipynb +++ b/nbs/common.scalers.ipynb @@ -567,8 +567,8 @@ " shape = list(x.shape)\n", " shape[dim] = 1\n", "\n", - " x_shift = torch.zeros(shape)\n", - " x_scale = torch.ones(shape)\n", + " x_shift = torch.zeros(shape, device=x.device)\n", + " x_scale = torch.ones(shape, device=x.device)\n", "\n", " return x_shift, x_scale" ] diff --git a/nbs/core.ipynb b/nbs/core.ipynb index 2c2b15c50..710fcd0b4 100644 --- a/nbs/core.ipynb +++ b/nbs/core.ipynb @@ -90,7 +90,7 @@ " Informer, Autoformer, FEDformer,\n", " StemGNN, PatchTST, TimesNet, TimeLLM, TSMixer, TSMixerx,\n", " MLPMultivariate, iTransformer,\n", - " BiTCN,\n", + " BiTCN, DeepNPTS\n", ")" ] }, @@ -233,6 +233,7 @@ " 'mlpmultivariate': MLPMultivariate, 'automlpmultivariate': MLPMultivariate,\n", " 'itransformer': iTransformer, 'autoitransformer': iTransformer,\n", " 'bitcn': BiTCN, 'autobitcn': BiTCN,\n", + " 'deepnpts': DeepNPTS, 'autodeepnpts': DeepNPTS,\n", "}" ] }, diff --git a/nbs/losses.pytorch.ipynb b/nbs/losses.pytorch.ipynb index 55cd837b3..36adfaabd 100644 --- a/nbs/losses.pytorch.ipynb +++ b/nbs/losses.pytorch.ipynb @@ -244,7 +244,61 @@ "execution_count": null, "id": "1d004cd0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L85){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MAE.__init__\n", + "\n", + "> MAE.__init__ (horizon_weight=None)\n", + "\n", + "Mean Absolute Error\n", + "\n", + "Calculates Mean Absolute Error between\n", + "`y` and `y_hat`. MAE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the\n", + "deviation of the prediction and the true\n", + "value at a given time and averages these devations\n", + "over the length of the series.\n", + "\n", + "$$ \\mathrm{MAE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} |y_{\\tau} - \\hat{y}_{\\tau}| $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L85){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MAE.__init__\n", + "\n", + "> MAE.__init__ (horizon_weight=None)\n", + "\n", + "Mean Absolute Error\n", + "\n", + "Calculates Mean Absolute Error between\n", + "`y` and `y_hat`. MAE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the\n", + "deviation of the prediction and the true\n", + "value at a given time and averages these devations\n", + "over the length of the series.\n", + "\n", + "$$ \\mathrm{MAE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} |y_{\\tau} - \\hat{y}_{\\tau}| $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MAE, name='MAE.__init__', title_level=3)" ] @@ -254,7 +308,51 @@ "execution_count": null, "id": "0a20a273", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L106){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MAE.__call__\n", + "\n", + "> MAE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies datapoints to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mae`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L106){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MAE.__call__\n", + "\n", + "> MAE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies datapoints to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mae`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MAE.__call__, name='MAE.__call__', title_level=3)" ] @@ -328,7 +426,61 @@ "execution_count": null, "id": "e8c65b82", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L126){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MSE.__init__\n", + "\n", + "> MSE.__init__ (horizon_weight=None)\n", + "\n", + "Mean Squared Error\n", + "\n", + "Calculates Mean Squared Error between\n", + "`y` and `y_hat`. MSE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the \n", + "squared deviation of the prediction and the true\n", + "value at a given time, and averages these devations\n", + "over the length of the series.\n", + "\n", + "$$ \\mathrm{MSE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^{2} $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L126){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MSE.__init__\n", + "\n", + "> MSE.__init__ (horizon_weight=None)\n", + "\n", + "Mean Squared Error\n", + "\n", + "Calculates Mean Squared Error between\n", + "`y` and `y_hat`. MSE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the \n", + "squared deviation of the prediction and the true\n", + "value at a given time, and averages these devations\n", + "over the length of the series.\n", + "\n", + "$$ \\mathrm{MSE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^{2} $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MSE, name='MSE.__init__', title_level=3)" ] @@ -338,7 +490,51 @@ "execution_count": null, "id": "b0126a7f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L147){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MSE.__call__\n", + "\n", + "> MSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies datapoints to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mse`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L147){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MSE.__call__\n", + "\n", + "> MSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies datapoints to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mse`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MSE.__call__, name='MSE.__call__', title_level=3)" ] @@ -416,7 +612,67 @@ "execution_count": null, "id": "d961d383", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L167){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### RMSE.__init__\n", + "\n", + "> RMSE.__init__ (horizon_weight=None)\n", + "\n", + "Root Mean Squared Error\n", + "\n", + "Calculates Root Mean Squared Error between\n", + "`y` and `y_hat`. RMSE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the squared deviation\n", + "of the prediction and the observed value at a given time and\n", + "averages these devations over the length of the series.\n", + "Finally the RMSE will be in the same scale\n", + "as the original time series so its comparison with other\n", + "series is possible only if they share a common scale. \n", + "RMSE has a direct connection to the L2 norm.\n", + "\n", + "$$ \\mathrm{RMSE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\sqrt{\\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^{2}} $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L167){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### RMSE.__init__\n", + "\n", + "> RMSE.__init__ (horizon_weight=None)\n", + "\n", + "Root Mean Squared Error\n", + "\n", + "Calculates Root Mean Squared Error between\n", + "`y` and `y_hat`. RMSE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the squared deviation\n", + "of the prediction and the observed value at a given time and\n", + "averages these devations over the length of the series.\n", + "Finally the RMSE will be in the same scale\n", + "as the original time series so its comparison with other\n", + "series is possible only if they share a common scale. \n", + "RMSE has a direct connection to the L2 norm.\n", + "\n", + "$$ \\mathrm{RMSE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\sqrt{\\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^{2}} $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(RMSE, name='RMSE.__init__', title_level=3)" ] @@ -426,7 +682,51 @@ "execution_count": null, "id": "d398d3e3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L191){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### RMSE.__call__\n", + "\n", + "> RMSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies datapoints to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`rmse`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L191){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### RMSE.__call__\n", + "\n", + "> RMSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies datapoints to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`rmse`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(RMSE.__call__, name='RMSE.__call__', title_level=3)" ] @@ -517,7 +817,69 @@ "execution_count": null, "id": "174e8042", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L212){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MAPE.__init__\n", + "\n", + "> MAPE.__init__ (horizon_weight=None)\n", + "\n", + "Mean Absolute Percentage Error\n", + "\n", + "Calculates Mean Absolute Percentage Error between\n", + "`y` and `y_hat`. MAPE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the percentual deviation\n", + "of the prediction and the observed value at a given time and\n", + "averages these devations over the length of the series.\n", + "The closer to zero an observed value is, the higher penalty MAPE loss\n", + "assigns to the corresponding error.\n", + "\n", + "$$ \\mathrm{MAPE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{|y_{\\tau}|} $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Makridakis S., \"Accuracy measures: theoretical and practical concerns\".](https://www.sciencedirect.com/science/article/pii/0169207093900793)" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L212){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MAPE.__init__\n", + "\n", + "> MAPE.__init__ (horizon_weight=None)\n", + "\n", + "Mean Absolute Percentage Error\n", + "\n", + "Calculates Mean Absolute Percentage Error between\n", + "`y` and `y_hat`. MAPE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the percentual deviation\n", + "of the prediction and the observed value at a given time and\n", + "averages these devations over the length of the series.\n", + "The closer to zero an observed value is, the higher penalty MAPE loss\n", + "assigns to the corresponding error.\n", + "\n", + "$$ \\mathrm{MAPE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{|y_{\\tau}|} $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Makridakis S., \"Accuracy measures: theoretical and practical concerns\".](https://www.sciencedirect.com/science/article/pii/0169207093900793)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MAPE, name='MAPE.__init__', title_level=3)" ] @@ -527,7 +889,51 @@ "execution_count": null, "id": "da63f136", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L237){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MAPE.__call__\n", + "\n", + "> MAPE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mape`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L237){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MAPE.__call__\n", + "\n", + "> MAPE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mape`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MAPE.__call__, name='MAPE.__call__', title_level=3)" ] @@ -609,7 +1015,73 @@ "execution_count": null, "id": "dee99fb8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L259){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### SMAPE.__init__\n", + "\n", + "> SMAPE.__init__ (horizon_weight=None)\n", + "\n", + "Symmetric Mean Absolute Percentage Error\n", + "\n", + "Calculates Symmetric Mean Absolute Percentage Error between\n", + "`y` and `y_hat`. SMAPE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the relative deviation\n", + "of the prediction and the observed value scaled by the sum of the\n", + "absolute values for the prediction and observed value at a\n", + "given time, then averages these devations over the length\n", + "of the series. This allows the SMAPE to have bounds between\n", + "0% and 200% which is desireble compared to normal MAPE that\n", + "may be undetermined when the target is zero.\n", + "\n", + "$$ \\mathrm{sMAPE}_{2}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{|y_{\\tau}|+|\\hat{y}_{\\tau}|} $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Makridakis S., \"Accuracy measures: theoretical and practical concerns\".](https://www.sciencedirect.com/science/article/pii/0169207093900793)" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L259){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### SMAPE.__init__\n", + "\n", + "> SMAPE.__init__ (horizon_weight=None)\n", + "\n", + "Symmetric Mean Absolute Percentage Error\n", + "\n", + "Calculates Symmetric Mean Absolute Percentage Error between\n", + "`y` and `y_hat`. SMAPE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the relative deviation\n", + "of the prediction and the observed value scaled by the sum of the\n", + "absolute values for the prediction and observed value at a\n", + "given time, then averages these devations over the length\n", + "of the series. This allows the SMAPE to have bounds between\n", + "0% and 200% which is desireble compared to normal MAPE that\n", + "may be undetermined when the target is zero.\n", + "\n", + "$$ \\mathrm{sMAPE}_{2}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{|y_{\\tau}|+|\\hat{y}_{\\tau}|} $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Makridakis S., \"Accuracy measures: theoretical and practical concerns\".](https://www.sciencedirect.com/science/article/pii/0169207093900793)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(SMAPE, name='SMAPE.__init__', title_level=3)" ] @@ -619,7 +1091,51 @@ "execution_count": null, "id": "db62a845", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L286){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### SMAPE.__call__\n", + "\n", + "> SMAPE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`smape`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L286){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### SMAPE.__call__\n", + "\n", + "> SMAPE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`smape`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(SMAPE.__call__, name='SMAPE.__call__', title_level=3)" ] @@ -706,7 +1222,71 @@ "execution_count": null, "id": "b6a4cf21", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L308){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MASE.__init__\n", + "\n", + "> MASE.__init__ (seasonality:int, horizon_weight=None)\n", + "\n", + "Mean Absolute Scaled Error \n", + "Calculates the Mean Absolute Scaled Error between\n", + "`y` and `y_hat`. MASE measures the relative prediction\n", + "accuracy of a forecasting method by comparinng the mean absolute errors\n", + "of the prediction and the observed value against the mean\n", + "absolute errors of the seasonal naive model.\n", + "The MASE partially composed the Overall Weighted Average (OWA), \n", + "used in the M4 Competition.\n", + "\n", + "$$ \\mathrm{MASE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}, \\mathbf{\\hat{y}}^{season}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{\\mathrm{MAE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{season}_{\\tau})} $$\n", + "\n", + "**Parameters:**
\n", + "`seasonality`: int. Main frequency of the time series; Hourly 24, Daily 7, Weekly 52, Monthly 12, Quarterly 4, Yearly 1.\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Rob J. Hyndman, & Koehler, A. B. \"Another look at measures of forecast accuracy\".](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
\n", + "[Spyros Makridakis, Evangelos Spiliotis, Vassilios Assimakopoulos, \"The M4 Competition: 100,000 time series and 61 forecasting methods\".](https://www.sciencedirect.com/science/article/pii/S0169207019301128)" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L308){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MASE.__init__\n", + "\n", + "> MASE.__init__ (seasonality:int, horizon_weight=None)\n", + "\n", + "Mean Absolute Scaled Error \n", + "Calculates the Mean Absolute Scaled Error between\n", + "`y` and `y_hat`. MASE measures the relative prediction\n", + "accuracy of a forecasting method by comparinng the mean absolute errors\n", + "of the prediction and the observed value against the mean\n", + "absolute errors of the seasonal naive model.\n", + "The MASE partially composed the Overall Weighted Average (OWA), \n", + "used in the M4 Competition.\n", + "\n", + "$$ \\mathrm{MASE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}, \\mathbf{\\hat{y}}^{season}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{\\mathrm{MAE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{season}_{\\tau})} $$\n", + "\n", + "**Parameters:**
\n", + "`seasonality`: int. Main frequency of the time series; Hourly 24, Daily 7, Weekly 52, Monthly 12, Quarterly 4, Yearly 1.\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Rob J. Hyndman, & Koehler, A. B. \"Another look at measures of forecast accuracy\".](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
\n", + "[Spyros Makridakis, Evangelos Spiliotis, Vassilios Assimakopoulos, \"The M4 Competition: 100,000 time series and 61 forecasting methods\".](https://www.sciencedirect.com/science/article/pii/S0169207019301128)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MASE, name='MASE.__init__', title_level=3)" ] @@ -716,7 +1296,53 @@ "execution_count": null, "id": "32a2c11b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L335){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MASE.__call__\n", + "\n", + "> MASE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> y_insample:torch.Tensor, mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor (batch_size, output_size), Actual values.
\n", + "`y_hat`: tensor (batch_size, output_size)), Predicted values.
\n", + "`y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mase`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L335){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MASE.__call__\n", + "\n", + "> MASE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> y_insample:torch.Tensor, mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor (batch_size, output_size), Actual values.
\n", + "`y_hat`: tensor (batch_size, output_size)), Predicted values.
\n", + "`y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mase`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MASE.__call__, name='MASE.__call__', title_level=3)" ] @@ -803,7 +1429,69 @@ "execution_count": null, "id": "edeb6f9a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L364){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### relMSE.__init__\n", + "\n", + "> relMSE.__init__ (y_train, horizon_weight=None)\n", + "\n", + "Relative Mean Squared Error\n", + "Computes Relative Mean Squared Error (relMSE), as proposed by Hyndman & Koehler (2006)\n", + "as an alternative to percentage errors, to avoid measure unstability.\n", + "$$ \\mathrm{relMSE}(\\mathbf{y}, \\mathbf{\\hat{y}}, \\mathbf{\\hat{y}}^{naive1}) =\n", + "\\frac{\\mathrm{MSE}(\\mathbf{y}, \\mathbf{\\hat{y}})}{\\mathrm{MSE}(\\mathbf{y}, \\mathbf{\\hat{y}}^{naive1})} $$\n", + "\n", + "**Parameters:**
\n", + "`y_train`: numpy array, Training values.
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "- [Hyndman, R. J and Koehler, A. B. (2006).\n", + " \"Another look at measures of forecast accuracy\",\n", + " International Journal of Forecasting, Volume 22, Issue 4.](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
\n", + "- [Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", + " \"Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. \n", + " Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L364){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### relMSE.__init__\n", + "\n", + "> relMSE.__init__ (y_train, horizon_weight=None)\n", + "\n", + "Relative Mean Squared Error\n", + "Computes Relative Mean Squared Error (relMSE), as proposed by Hyndman & Koehler (2006)\n", + "as an alternative to percentage errors, to avoid measure unstability.\n", + "$$ \\mathrm{relMSE}(\\mathbf{y}, \\mathbf{\\hat{y}}, \\mathbf{\\hat{y}}^{naive1}) =\n", + "\\frac{\\mathrm{MSE}(\\mathbf{y}, \\mathbf{\\hat{y}})}{\\mathrm{MSE}(\\mathbf{y}, \\mathbf{\\hat{y}}^{naive1})} $$\n", + "\n", + "**Parameters:**
\n", + "`y_train`: numpy array, Training values.
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "- [Hyndman, R. J and Koehler, A. B. (2006).\n", + " \"Another look at measures of forecast accuracy\",\n", + " International Journal of Forecasting, Volume 22, Issue 4.](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
\n", + "- [Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", + " \"Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. \n", + " Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(relMSE, name='relMSE.__init__', title_level=3)" ] @@ -813,7 +1501,53 @@ "execution_count": null, "id": "a317b5c5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L391){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### relMSE.__call__\n", + "\n", + "> relMSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor (batch_size, output_size), Actual values.
\n", + "`y_hat`: tensor (batch_size, output_size)), Predicted values.
\n", + "`y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`relMSE`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L391){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### relMSE.__call__\n", + "\n", + "> relMSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor (batch_size, output_size), Actual values.
\n", + "`y_hat`: tensor (batch_size, output_size)), Predicted values.
\n", + "`y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`relMSE`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(relMSE.__call__, name='relMSE.__call__', title_level=3)" ] @@ -898,7 +1632,67 @@ "execution_count": null, "id": "70bd46d9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L418){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### QuantileLoss.__init__\n", + "\n", + "> QuantileLoss.__init__ (q, horizon_weight=None)\n", + "\n", + "Quantile Loss\n", + "\n", + "Computes the quantile loss between `y` and `y_hat`.\n", + "QL measures the deviation of a quantile forecast.\n", + "By weighting the absolute deviation in a non symmetric way, the\n", + "loss pays more attention to under or over estimation.\n", + "A common value for q is 0.5 for the deviation from the median (Pinball loss).\n", + "\n", + "$$ \\mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{(q)}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\Big( (1-q)\\,( \\hat{y}^{(q)}_{\\tau} - y_{\\tau} )_{+} + q\\,( y_{\\tau} - \\hat{y}^{(q)}_{\\tau} )_{+} \\Big) $$\n", + "\n", + "**Parameters:**
\n", + "`q`: float, between 0 and 1. The slope of the quantile loss, in the context of quantile regression, the q determines the conditional quantile level.
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L418){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### QuantileLoss.__init__\n", + "\n", + "> QuantileLoss.__init__ (q, horizon_weight=None)\n", + "\n", + "Quantile Loss\n", + "\n", + "Computes the quantile loss between `y` and `y_hat`.\n", + "QL measures the deviation of a quantile forecast.\n", + "By weighting the absolute deviation in a non symmetric way, the\n", + "loss pays more attention to under or over estimation.\n", + "A common value for q is 0.5 for the deviation from the median (Pinball loss).\n", + "\n", + "$$ \\mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{(q)}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\Big( (1-q)\\,( \\hat{y}^{(q)}_{\\tau} - y_{\\tau} )_{+} + q\\,( y_{\\tau} - \\hat{y}^{(q)}_{\\tau} )_{+} \\Big) $$\n", + "\n", + "**Parameters:**
\n", + "`q`: float, between 0 and 1. The slope of the quantile loss, in the context of quantile regression, the q determines the conditional quantile level.
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(QuantileLoss, name='QuantileLoss.__init__', title_level=3)" ] @@ -908,7 +1702,51 @@ "execution_count": null, "id": "0b1588e9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L445){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### QuantileLoss.__call__\n", + "\n", + "> QuantileLoss.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies datapoints to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`quantile_loss`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L445){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### QuantileLoss.__call__\n", + "\n", + "> QuantileLoss.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies datapoints to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`quantile_loss`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(QuantileLoss.__call__, name='QuantileLoss.__call__', title_level=3)" ] @@ -1080,7 +1918,87 @@ "execution_count": null, "id": "8f42ec82", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L494){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MQLoss.__init__\n", + "\n", + "> MQLoss.__init__ (level=[80, 90], quantiles=None, horizon_weight=None)\n", + "\n", + "Multi-Quantile loss\n", + "\n", + "Calculates the Multi-Quantile loss (MQL) between `y` and `y_hat`.\n", + "MQL calculates the average multi-quantile Loss for\n", + "a given set of quantiles, based on the absolute \n", + "difference between predicted quantiles and observed values.\n", + "\n", + "$$ \\mathrm{MQL}(\\mathbf{y}_{\\tau},[\\mathbf{\\hat{y}}^{(q_{1})}_{\\tau}, ... ,\\hat{y}^{(q_{n})}_{\\tau}]) = \\frac{1}{n} \\sum_{q_{i}} \\mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{(q_{i})}_{\\tau}) $$\n", + "\n", + "The limit behavior of MQL allows to measure the accuracy \n", + "of a full predictive distribution $\\mathbf{\\hat{F}}_{\\tau}$ with \n", + "the continuous ranked probability score (CRPS). This can be achieved \n", + "through a numerical integration technique, that discretizes the quantiles \n", + "and treats the CRPS integral with a left Riemann approximation, averaging over \n", + "uniformly distanced quantiles. \n", + "\n", + "$$ \\mathrm{CRPS}(y_{\\tau}, \\mathbf{\\hat{F}}_{\\tau}) = \\int^{1}_{0} \\mathrm{QL}(y_{\\tau}, \\hat{y}^{(q)}_{\\tau}) dq $$\n", + "\n", + "**Parameters:**
\n", + "`level`: int list [0,100]. Probability levels for prediction intervals (Defaults median).\n", + "`quantiles`: float list [0., 1.]. Alternative to level, quantiles to estimate from y distribution.\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)
\n", + "[James E. Matheson and Robert L. Winkler, \"Scoring Rules for Continuous Probability Distributions\".](https://www.jstor.org/stable/2629907)" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L494){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MQLoss.__init__\n", + "\n", + "> MQLoss.__init__ (level=[80, 90], quantiles=None, horizon_weight=None)\n", + "\n", + "Multi-Quantile loss\n", + "\n", + "Calculates the Multi-Quantile loss (MQL) between `y` and `y_hat`.\n", + "MQL calculates the average multi-quantile Loss for\n", + "a given set of quantiles, based on the absolute \n", + "difference between predicted quantiles and observed values.\n", + "\n", + "$$ \\mathrm{MQL}(\\mathbf{y}_{\\tau},[\\mathbf{\\hat{y}}^{(q_{1})}_{\\tau}, ... ,\\hat{y}^{(q_{n})}_{\\tau}]) = \\frac{1}{n} \\sum_{q_{i}} \\mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{(q_{i})}_{\\tau}) $$\n", + "\n", + "The limit behavior of MQL allows to measure the accuracy \n", + "of a full predictive distribution $\\mathbf{\\hat{F}}_{\\tau}$ with \n", + "the continuous ranked probability score (CRPS). This can be achieved \n", + "through a numerical integration technique, that discretizes the quantiles \n", + "and treats the CRPS integral with a left Riemann approximation, averaging over \n", + "uniformly distanced quantiles. \n", + "\n", + "$$ \\mathrm{CRPS}(y_{\\tau}, \\mathbf{\\hat{F}}_{\\tau}) = \\int^{1}_{0} \\mathrm{QL}(y_{\\tau}, \\hat{y}^{(q)}_{\\tau}) dq $$\n", + "\n", + "**Parameters:**
\n", + "`level`: int list [0,100]. Probability levels for prediction intervals (Defaults median).\n", + "`quantiles`: float list [0., 1.]. Alternative to level, quantiles to estimate from y distribution.\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)
\n", + "[James E. Matheson and Robert L. Winkler, \"Scoring Rules for Continuous Probability Distributions\".](https://www.jstor.org/stable/2629907)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MQLoss, name='MQLoss.__init__', title_level=3)" ] @@ -1090,7 +2008,51 @@ "execution_count": null, "id": "bac2237a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L568){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MQLoss.__call__\n", + "\n", + "> MQLoss.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mqloss`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L568){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MQLoss.__call__\n", + "\n", + "> MQLoss.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mqloss`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MQLoss.__call__, name='MQLoss.__call__', title_level=3)" ] @@ -1109,7 +2071,17 @@ "execution_count": null, "id": "da37f2ef", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['-lo-98.0', '-lo-80.0', '-median', '-hi-80.0', '-hi-98.0']\n", + "Parameter containing:\n", + "tensor([0.0100, 0.1000, 0.5000, 0.9000, 0.9900])\n" + ] + } + ], "source": [ "# | hide\n", "# Unit tests to check MQLoss' stored quantiles\n", @@ -1654,7 +2626,99 @@ "execution_count": null, "id": "a462101b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L913){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DistributionLoss.__init__\n", + "\n", + "> DistributionLoss.__init__ (distribution, level=[80, 90], quantiles=None,\n", + "> num_samples=1000, return_params=False,\n", + "> **distribution_kwargs)\n", + "\n", + "DistributionLoss\n", + "\n", + "This PyTorch module wraps the `torch.distribution` classes allowing it to \n", + "interact with NeuralForecast models modularly. It shares the negative \n", + "log-likelihood as the optimization objective and a sample method to \n", + "generate empirically the quantiles defined by the `level` list.\n", + "\n", + "Additionally, it implements a distribution transformation that factorizes the\n", + "scale-dependent likelihood parameters into a base scale and a multiplier \n", + "efficiently learnable within the network's non-linearities operating ranges.\n", + "\n", + "Available distributions:
\n", + "- Poisson
\n", + "- Normal
\n", + "- StudentT
\n", + "- NegativeBinomial
\n", + "- Tweedie
\n", + "- Bernoulli (Temporal Classifiers)\n", + "\n", + "**Parameters:**
\n", + "`distribution`: str, identifier of a torch.distributions.Distribution class.
\n", + "`level`: float list [0,100], confidence levels for prediction intervals.
\n", + "`quantiles`: float list [0,1], alternative to level list, target quantiles.
\n", + "`num_samples`: int=500, number of samples for the empirical quantiles.
\n", + "`return_params`: bool=False, wether or not return the Distribution parameters.

\n", + "\n", + "**References:**
\n", + "- [PyTorch Probability Distributions Package: StudentT.](https://pytorch.org/docs/stable/distributions.html#studentt)
\n", + "- [David Salinas, Valentin Flunkert, Jan Gasthaus, Tim Januschowski (2020).\n", + " \"DeepAR: Probabilistic forecasting with autoregressive recurrent networks\". International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207019301888)
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L913){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DistributionLoss.__init__\n", + "\n", + "> DistributionLoss.__init__ (distribution, level=[80, 90], quantiles=None,\n", + "> num_samples=1000, return_params=False,\n", + "> **distribution_kwargs)\n", + "\n", + "DistributionLoss\n", + "\n", + "This PyTorch module wraps the `torch.distribution` classes allowing it to \n", + "interact with NeuralForecast models modularly. It shares the negative \n", + "log-likelihood as the optimization objective and a sample method to \n", + "generate empirically the quantiles defined by the `level` list.\n", + "\n", + "Additionally, it implements a distribution transformation that factorizes the\n", + "scale-dependent likelihood parameters into a base scale and a multiplier \n", + "efficiently learnable within the network's non-linearities operating ranges.\n", + "\n", + "Available distributions:
\n", + "- Poisson
\n", + "- Normal
\n", + "- StudentT
\n", + "- NegativeBinomial
\n", + "- Tweedie
\n", + "- Bernoulli (Temporal Classifiers)\n", + "\n", + "**Parameters:**
\n", + "`distribution`: str, identifier of a torch.distributions.Distribution class.
\n", + "`level`: float list [0,100], confidence levels for prediction intervals.
\n", + "`quantiles`: float list [0,1], alternative to level list, target quantiles.
\n", + "`num_samples`: int=500, number of samples for the empirical quantiles.
\n", + "`return_params`: bool=False, wether or not return the Distribution parameters.

\n", + "\n", + "**References:**
\n", + "- [PyTorch Probability Distributions Package: StudentT.](https://pytorch.org/docs/stable/distributions.html#studentt)
\n", + "- [David Salinas, Valentin Flunkert, Jan Gasthaus, Tim Januschowski (2020).\n", + " \"DeepAR: Probabilistic forecasting with autoregressive recurrent networks\". International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207019301888)
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(DistributionLoss, name='DistributionLoss.__init__', title_level=3)" ] @@ -1664,7 +2728,65 @@ "execution_count": null, "id": "d8c367f8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1040){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DistributionLoss.sample\n", + "\n", + "> DistributionLoss.sample (distr_args:torch.Tensor,\n", + "> num_samples:Optional[int]=None)\n", + "\n", + "Construct the empirical quantiles from the estimated Distribution,\n", + "sampling from it `num_samples` independently.\n", + "\n", + "**Parameters**
\n", + "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", + "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", + " of the resulting distribution.
\n", + "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", + " of the resulting distribution.
\n", + "`num_samples`: int=500, overwrite number of samples for the empirical quantiles.
\n", + "\n", + "**Returns**
\n", + "`samples`: tensor, shape [B,H,`num_samples`].
\n", + "`quantiles`: tensor, empirical quantiles defined by `levels`.
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1040){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DistributionLoss.sample\n", + "\n", + "> DistributionLoss.sample (distr_args:torch.Tensor,\n", + "> num_samples:Optional[int]=None)\n", + "\n", + "Construct the empirical quantiles from the estimated Distribution,\n", + "sampling from it `num_samples` independently.\n", + "\n", + "**Parameters**
\n", + "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", + "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", + " of the resulting distribution.
\n", + "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", + " of the resulting distribution.
\n", + "`num_samples`: int=500, overwrite number of samples for the empirical quantiles.
\n", + "\n", + "**Returns**
\n", + "`samples`: tensor, shape [B,H,`num_samples`].
\n", + "`quantiles`: tensor, empirical quantiles defined by `levels`.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(DistributionLoss.sample, name='DistributionLoss.sample', title_level=3)" ] @@ -1674,7 +2796,75 @@ "execution_count": null, "id": "04e32679", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1083){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DistributionLoss.__call__\n", + "\n", + "> DistributionLoss.__call__ (y:torch.Tensor, distr_args:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "Computes the negative log-likelihood objective function. \n", + "To estimate the following predictive distribution:\n", + "\n", + "$$\\mathrm{P}(\\mathbf{y}_{\\tau}\\,|\\,\\theta) \\quad \\mathrm{and} \\quad -\\log(\\mathrm{P}(\\mathbf{y}_{\\tau}\\,|\\,\\theta))$$\n", + "\n", + "where $\\theta$ represents the distributions parameters. It aditionally \n", + "summarizes the objective signal using a weighted average using the `mask` tensor. \n", + "\n", + "**Parameters**
\n", + "`y`: tensor, Actual values.
\n", + "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", + "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", + " of the resulting distribution.
\n", + "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", + " of the resulting distribution.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns**
\n", + "`loss`: scalar, weighted loss function against which backpropagation will be performed.
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1083){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DistributionLoss.__call__\n", + "\n", + "> DistributionLoss.__call__ (y:torch.Tensor, distr_args:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "Computes the negative log-likelihood objective function. \n", + "To estimate the following predictive distribution:\n", + "\n", + "$$\\mathrm{P}(\\mathbf{y}_{\\tau}\\,|\\,\\theta) \\quad \\mathrm{and} \\quad -\\log(\\mathrm{P}(\\mathbf{y}_{\\tau}\\,|\\,\\theta))$$\n", + "\n", + "where $\\theta$ represents the distributions parameters. It aditionally \n", + "summarizes the objective signal using a weighted average using the `mask` tensor. \n", + "\n", + "**Parameters**
\n", + "`y`: tensor, Actual values.
\n", + "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", + "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", + " of the resulting distribution.
\n", + "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", + " of the resulting distribution.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns**
\n", + "`loss`: scalar, weighted loss function against which backpropagation will be performed.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(DistributionLoss.__call__, name='DistributionLoss.__call__', title_level=3)" ] @@ -1684,7 +2874,17 @@ "execution_count": null, "id": "14a7e381", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['', '-lo-98.0', '-lo-80.0', '-median', '-hi-80.0', '-hi-98.0']\n", + "Parameter containing:\n", + "tensor([0.0100, 0.1000, 0.5000, 0.9000, 0.9900])\n" + ] + } + ], "source": [ "# | hide\n", "# Unit tests to check DistributionLoss' stored quantiles\n", @@ -1764,35 +2964,42 @@ " # If True, predict_step will return Distribution's parameters\n", " self.return_params = return_params\n", " if self.return_params:\n", - " self.param_names = [f\"-lambda-{i}\" for i in range(1, n_components + 1)]\n", + " lambda_names = [f\"-lambda-{i}\" for i in range(1, n_components + 1)]\n", + " weight_names = [f\"-weight-{i}\" for i in range(1, n_components + 1)]\n", + " self.param_names = [i for j in zip(lambda_names, weight_names) for i in j]\n", " self.output_names = self.output_names + self.param_names\n", "\n", " # Add first output entry for the sample_mean\n", " self.output_names.insert(0, \"\")\n", "\n", - " self.outputsize_multiplier = n_components\n", + " self.outputsize_multiplier = 2 * n_components\n", " self.is_distribution_output = True\n", "\n", " def domain_map(self, output: torch.Tensor):\n", - " return (output,)#, weights\n", - " \n", - " def scale_decouple(self, \n", - " output,\n", - " loc: Optional[torch.Tensor] = None,\n", - " scale: Optional[torch.Tensor] = None):\n", - " \"\"\" Scale Decouple\n", + " lambdas, weights = output.chunk(2, dim=-1)\n", + " return (lambdas, weights)\n", + "\n", + " def scale_decouple(\n", + " self,\n", + " output,\n", + " loc: Optional[torch.Tensor] = None,\n", + " scale: Optional[torch.Tensor] = None,\n", + " ):\n", + " \"\"\"Scale Decouple\n", "\n", " Stabilizes model's output optimization, by learning residual\n", " variance and residual location based on anchoring `loc`, `scale`.\n", " Also adds domain protection to the distribution parameters.\n", " \"\"\"\n", - " lambdas = output[0]\n", + " lambdas, weights = output\n", + " weights = F.softmax(weights, dim=-1)\n", + "\n", " if (loc is not None) and (scale is not None):\n", " loc = loc.view(lambdas.size(dim=0), 1, -1)\n", " scale = scale.view(lambdas.size(dim=0), 1, -1)\n", " lambdas = (lambdas * scale) + loc\n", " lambdas = F.softplus(lambdas)\n", - " return (lambdas,)\n", + " return (lambdas, weights)\n", "\n", " def sample(self, distr_args, num_samples=None):\n", " \"\"\"\n", @@ -1814,15 +3021,10 @@ " if num_samples is None:\n", " num_samples = self.num_samples\n", "\n", - " lambdas = distr_args[0]\n", + " lambdas, weights = distr_args\n", " B, H, K = lambdas.size()\n", " Q = len(self.quantiles)\n", "\n", - " # Sample K ~ Mult(weights)\n", - " # shared across B, H\n", - " # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)\n", - " weights = (1/K) * torch.ones_like(lambdas, device=lambdas.device)\n", - "\n", " # Avoid loop, vectorize\n", " weights = weights.reshape(-1, K)\n", " lambdas = lambdas.flatten() \n", @@ -1860,7 +3062,7 @@ " \n", " def neglog_likelihood(self,\n", " y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None,):\n", " if mask is None: \n", " mask = (y > 0) * 1\n", @@ -1868,11 +3070,9 @@ " mask = mask * ((y > 0) * 1)\n", "\n", " eps = 1e-10\n", - " lambdas = distr_args[0]\n", + " lambdas, weights = distr_args\n", " B, H, K = lambdas.size()\n", "\n", - " weights = (1/K) * torch.ones_like(lambdas, device=lambdas.device)\n", - "\n", " y = y[:,:,None]\n", " mask = mask[:,:,None]\n", "\n", @@ -1897,7 +3097,7 @@ " return loss\n", "\n", " def __call__(self, y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None):\n", "\n", " return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)\n" @@ -1908,7 +3108,83 @@ "execution_count": null, "id": "62d7daba", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1117){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### PMM.__init__\n", + "\n", + "> PMM.__init__ (n_components=10, level=[80, 90], quantiles=None,\n", + "> num_samples=1000, return_params=False,\n", + "> batch_correlation=False, horizon_correlation=False)\n", + "\n", + "Poisson Mixture Mesh\n", + "\n", + "This Poisson Mixture statistical model assumes independence across groups of \n", + "data $\\mathcal{G}=\\{[g_{i}]\\}$, and estimates relationships within the group.\n", + "\n", + "$$ \\mathrm{P}\\left(\\mathbf{y}_{[b][t+1:t+H]}\\right) = \n", + "\\prod_{ [g_{i}] \\in \\mathcal{G}} \\mathrm{P} \\left(\\mathbf{y}_{[g_{i}][\\tau]} \\right) =\n", + "\\prod_{\\beta\\in[g_{i}]} \n", + "\\left(\\sum_{k=1}^{K} w_k \\prod_{(\\beta,\\tau) \\in [g_i][t+1:t+H]} \\mathrm{Poisson}(y_{\\beta,\\tau}, \\hat{\\lambda}_{\\beta,\\tau,k}) \\right)$$\n", + "\n", + "**Parameters:**
\n", + "`n_components`: int=10, the number of mixture components.
\n", + "`level`: float list [0,100], confidence levels for prediction intervals.
\n", + "`quantiles`: float list [0,1], alternative to level list, target quantiles.
\n", + "`return_params`: bool=False, wether or not return the Distribution parameters.
\n", + "`batch_correlation`: bool=False, wether or not model batch correlations.
\n", + "`horizon_correlation`: bool=False, wether or not model horizon correlations.
\n", + "\n", + "**References:**
\n", + "[Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", + "Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. Submitted to the International \n", + "Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1117){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### PMM.__init__\n", + "\n", + "> PMM.__init__ (n_components=10, level=[80, 90], quantiles=None,\n", + "> num_samples=1000, return_params=False,\n", + "> batch_correlation=False, horizon_correlation=False)\n", + "\n", + "Poisson Mixture Mesh\n", + "\n", + "This Poisson Mixture statistical model assumes independence across groups of \n", + "data $\\mathcal{G}=\\{[g_{i}]\\}$, and estimates relationships within the group.\n", + "\n", + "$$ \\mathrm{P}\\left(\\mathbf{y}_{[b][t+1:t+H]}\\right) = \n", + "\\prod_{ [g_{i}] \\in \\mathcal{G}} \\mathrm{P} \\left(\\mathbf{y}_{[g_{i}][\\tau]} \\right) =\n", + "\\prod_{\\beta\\in[g_{i}]} \n", + "\\left(\\sum_{k=1}^{K} w_k \\prod_{(\\beta,\\tau) \\in [g_i][t+1:t+H]} \\mathrm{Poisson}(y_{\\beta,\\tau}, \\hat{\\lambda}_{\\beta,\\tau,k}) \\right)$$\n", + "\n", + "**Parameters:**
\n", + "`n_components`: int=10, the number of mixture components.
\n", + "`level`: float list [0,100], confidence levels for prediction intervals.
\n", + "`quantiles`: float list [0,1], alternative to level list, target quantiles.
\n", + "`return_params`: bool=False, wether or not return the Distribution parameters.
\n", + "`batch_correlation`: bool=False, wether or not model batch correlations.
\n", + "`horizon_correlation`: bool=False, wether or not model horizon correlations.
\n", + "\n", + "**References:**
\n", + "[Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", + "Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. Submitted to the International \n", + "Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(PMM, name='PMM.__init__', title_level=3)" ] @@ -1918,7 +3194,63 @@ "execution_count": null, "id": "fa8da65c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1206){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### PMM.sample\n", + "\n", + "> PMM.sample (distr_args, num_samples=None)\n", + "\n", + "Construct the empirical quantiles from the estimated Distribution,\n", + "sampling from it `num_samples` independently.\n", + "\n", + "**Parameters**
\n", + "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", + "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", + " of the resulting distribution.
\n", + "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", + " of the resulting distribution.
\n", + "`num_samples`: int=500, overwrites number of samples for the empirical quantiles.
\n", + "\n", + "**Returns**
\n", + "`samples`: tensor, shape [B,H,`num_samples`].
\n", + "`quantiles`: tensor, empirical quantiles defined by `levels`.
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1206){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### PMM.sample\n", + "\n", + "> PMM.sample (distr_args, num_samples=None)\n", + "\n", + "Construct the empirical quantiles from the estimated Distribution,\n", + "sampling from it `num_samples` independently.\n", + "\n", + "**Parameters**
\n", + "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", + "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", + " of the resulting distribution.
\n", + "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", + " of the resulting distribution.
\n", + "`num_samples`: int=500, overwrites number of samples for the empirical quantiles.
\n", + "\n", + "**Returns**
\n", + "`samples`: tensor, shape [B,H,`num_samples`].
\n", + "`quantiles`: tensor, empirical quantiles defined by `levels`.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(PMM.sample, name='PMM.sample', title_level=3)" ] @@ -1928,7 +3260,39 @@ "execution_count": null, "id": "ba75717c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1305){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### PMM.__call__\n", + "\n", + "> PMM.__call__ (y:torch.Tensor, distr_args:Tuple[torch.Tensor],\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "Call self as a function." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1305){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### PMM.__call__\n", + "\n", + "> PMM.__call__ (y:torch.Tensor, distr_args:Tuple[torch.Tensor],\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "Call self as a function." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(PMM.__call__, name='PMM.__call__', title_level=3)" ] @@ -1947,7 +3311,17 @@ "execution_count": null, "id": "e4a20e21", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['', '-lo-98.0', '-lo-80.0', '-median', '-hi-80.0', '-hi-98.0']\n", + "Parameter containing:\n", + "tensor([0.0100, 0.1000, 0.5000, 0.9000, 0.9900])\n" + ] + } + ], "source": [ "# | hide\n", "# Unit tests to check PMM's stored quantiles\n", @@ -1971,11 +3345,43 @@ "execution_count": null, "id": "a56a2fbe", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "weights.shape (N,H,K) \t torch.Size([2, 2, 3])\n", + "lambdas.shape (N,H,K) \t torch.Size([2, 2, 3])\n", + "samples.shape (N,H,num_samples) torch.Size([2, 2, 1000])\n", + "sample_mean.shape (N,H) torch.Size([2, 2, 1])\n", + "quants.shape (N,H,Q) \t\t torch.Size([2, 2, 5])\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "#| hide\n", - "# Create single mixture and broadcast to N,H,K\n", - "weights = torch.ones((1,3))[None, :, :]\n", + "# Create single mixture and broadcast to N, H, K\n", + "weights = torch.ones((2,3))[None, :, :]\n", "lambdas = torch.Tensor([[5,10,15], [10,20,30]])[None, :, :]\n", "\n", "# Create repetitions for the batch dimension N.\n", @@ -1987,7 +3393,7 @@ "print('lambdas.shape (N,H,K) \\t', lambdas.shape)\n", "\n", "distr = PMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9])\n", - "distr_args = (lambdas,)\n", + "distr_args = (lambdas, weights)\n", "samples, sample_mean, quants = distr.sample(distr_args)\n", "\n", "print('samples.shape (N,H,num_samples) ', samples.shape)\n", @@ -2092,38 +3498,44 @@ " if self.return_params:\n", " mu_names = [f\"-mu-{i}\" for i in range(1, n_components + 1)]\n", " std_names = [f\"-std-{i}\" for i in range(1, n_components + 1)]\n", - " mu_std_names = [i for j in zip(mu_names, std_names) for i in j]\n", - " self.output_names = self.output_names + mu_std_names\n", + " weight_names = [f\"-weight-{i}\" for i in range(1, n_components + 1)]\n", + " self.param_names = [i for j in zip(mu_names, std_names, weight_names) for i in j]\n", + " self.output_names = self.output_names + self.param_names\n", "\n", " # Add first output entry for the sample_mean\n", " self.output_names.insert(0, \"\")\n", "\n", - " self.outputsize_multiplier = 2 * n_components\n", + " self.outputsize_multiplier = 3 * n_components\n", " self.is_distribution_output = True\n", "\n", " def domain_map(self, output: torch.Tensor):\n", - " means, stds = torch.tensor_split(output, 2, dim=-1)\n", - " return (means, stds)\n", + " means, stds, weights = output.chunk(3, dim=-1)\n", + "\n", + " return (means, stds, weights)\n", "\n", - " def scale_decouple(self, \n", - " output,\n", - " loc: Optional[torch.Tensor] = None,\n", - " scale: Optional[torch.Tensor] = None,\n", - " eps: float=0.2):\n", - " \"\"\" Scale Decouple\n", + " def scale_decouple(\n", + " self,\n", + " output,\n", + " loc: Optional[torch.Tensor] = None,\n", + " scale: Optional[torch.Tensor] = None,\n", + " eps: float = 0.2,\n", + " ):\n", + " \"\"\"Scale Decouple\n", "\n", " Stabilizes model's output optimization, by learning residual\n", " variance and residual location based on anchoring `loc`, `scale`.\n", " Also adds domain protection to the distribution parameters.\n", " \"\"\"\n", - " means, stds = output\n", + " means, stds, weights = output\n", " stds = F.softplus(stds)\n", + " weights = F.softmax(weights, dim=-1)\n", " if (loc is not None) and (scale is not None):\n", " loc = loc.view(means.size(dim=0), 1, -1)\n", - " scale = scale.view(means.size(dim=0), 1, -1) \n", + " scale = scale.view(means.size(dim=0), 1, -1)\n", " means = (means * scale) + loc\n", " stds = (stds + eps) * scale\n", - " return (means, stds)\n", + "\n", + " return (means, stds, weights)\n", "\n", " def sample(self, distr_args, num_samples=None):\n", " \"\"\"\n", @@ -2145,17 +3557,11 @@ " if num_samples is None:\n", " num_samples = self.num_samples\n", " \n", - " means, stds = distr_args\n", + " means, stds, weights = distr_args\n", " B, H, K = means.size()\n", " Q = len(self.quantiles)\n", " assert means.shape == stds.shape\n", "\n", - " # Sample K ~ Mult(weights)\n", - " # shared across B, H\n", - " # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)\n", - " \n", - " weights = (1/K) * torch.ones_like(means, device=means.device)\n", - " \n", " # Avoid loop, vectorize\n", " weights = weights.reshape(-1, K)\n", " means = means.flatten()\n", @@ -2195,17 +3601,15 @@ "\n", " def neglog_likelihood(self,\n", " y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None):\n", "\n", " if mask is None: \n", " mask = torch.ones_like(y)\n", " \n", - " means, stds = distr_args\n", + " means, stds, weights = distr_args\n", " B, H, K = means.size()\n", - " \n", - " weights = (1/K) * torch.ones_like(means, device=means.device)\n", - " \n", + " \n", " y = y[:,:, None]\n", " mask = mask[:,:,None]\n", " \n", @@ -2228,7 +3632,7 @@ " return loss\n", " \n", " def __call__(self, y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None,):\n", "\n", " return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)" @@ -2278,7 +3682,17 @@ "execution_count": null, "id": "8ebe4250", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['', '-lo-98.0', '-lo-80.0', '-median', '-hi-80.0', '-hi-98.0']\n", + "Parameter containing:\n", + "tensor([0.0100, 0.1000, 0.5000, 0.9000, 0.9900])\n" + ] + } + ], "source": [ "# | hide\n", "# Unit tests to check PMM's stored quantiles\n", @@ -2302,7 +3716,40 @@ "execution_count": null, "id": "684d2382", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "weights.shape (N,H,K) \t torch.Size([2, 2, 3])\n", + "means.shape (N,H,K) \t torch.Size([2, 2, 3])\n", + "stds.shape (N,H,K) \t torch.Size([2, 2, 3])\n", + "samples.shape (N,H,num_samples) torch.Size([2, 2, 1000])\n", + "sample_mean.shape (N,H) torch.Size([2, 2, 1])\n", + "quants.shape (N,H,Q) \t\t torch.Size([2, 2, 5])\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "#| hide\n", "# Create single mixture and broadcast to N,H,K\n", @@ -2319,7 +3766,7 @@ "print('stds.shape (N,H,K) \\t', stds.shape)\n", "\n", "distr = GMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9])\n", - "distr_args = (means, stds)\n", + "distr_args = (means, stds, weights)\n", "samples, sample_mean, quants = distr.sample(distr_args)\n", "\n", "print('samples.shape (N,H,num_samples) ', samples.shape)\n", @@ -2419,47 +3866,55 @@ " # If True, predict_step will return Distribution's parameters\n", " self.return_params = return_params\n", " if self.return_params:\n", - " total_count_names = [f\"-total_count-{i}\" for i in range(1, n_components + 1)]\n", + " total_count_names = [\n", + " f\"-total_count-{i}\" for i in range(1, n_components + 1)\n", + " ]\n", " probs_names = [f\"-probs-{i}\" for i in range(1, n_components + 1)]\n", - " param_names = [i for j in zip(total_count_names, probs_names) for i in j]\n", - " self.output_names = self.output_names + param_names\n", + " weight_names = [f\"-weight-{i}\" for i in range(1, n_components + 1)]\n", + " self.param_names = [i for j in zip(total_count_names, probs_names, weight_names) for i in j]\n", + " self.output_names = self.output_names + self.param_names\n", "\n", " # Add first output entry for the sample_mean\n", - " self.output_names.insert(0, \"\") \n", + " self.output_names.insert(0, \"\")\n", "\n", - " self.outputsize_multiplier = 2 * n_components\n", + " self.outputsize_multiplier = 3 * n_components\n", " self.is_distribution_output = True\n", "\n", " def domain_map(self, output: torch.Tensor):\n", - " mu, alpha = torch.tensor_split(output, 2, dim=-1)\n", - " return (mu, alpha)\n", + " mu, alpha, weights = output.chunk(3, dim=-1)\n", "\n", - " def scale_decouple(self, \n", - " output,\n", - " loc: Optional[torch.Tensor] = None,\n", - " scale: Optional[torch.Tensor] = None,\n", - " eps: float=0.2):\n", - " \"\"\" Scale Decouple\n", + " return mu, alpha, weights\n", + "\n", + " def scale_decouple(\n", + " self,\n", + " output,\n", + " loc: Optional[torch.Tensor] = None,\n", + " scale: Optional[torch.Tensor] = None,\n", + " eps: float = 1e-6,\n", + " ):\n", + " \"\"\"Scale Decouple\n", "\n", " Stabilizes model's output optimization, by learning residual\n", " variance and residual location based on anchoring `loc`, `scale`.\n", " Also adds domain protection to the distribution parameters.\n", " \"\"\"\n", " # Efficient NBinomial parametrization\n", - " mu, alpha = output\n", - " mu = F.softplus(mu) + 1e-8\n", - " alpha = F.softplus(alpha) + 1e-8 # alpha = 1/total_counts\n", + " mu, alpha, weights = output\n", + " mu = F.softplus(mu) + eps\n", + " alpha = F.softplus(alpha) + eps # alpha = 1/total_counts\n", + " weights = F.softmax(weights, dim=-1)\n", " if (loc is not None) and (scale is not None):\n", " loc = loc.view(mu.size(dim=0), 1, -1)\n", " mu *= loc\n", - " alpha /= (loc + 1.)\n", + " alpha /= loc + 1.0\n", "\n", " # mu = total_count * (probs/(1-probs))\n", " # => probs = mu / (total_count + mu)\n", " # => probs = mu / [total_count * (1 + mu * (1/total_count))]\n", " total_count = 1.0 / alpha\n", - " probs = (mu * alpha / (1.0 + mu * alpha)) + 1e-8 \n", - " return (total_count, probs)\n", + " probs = (mu * alpha / (1.0 + mu * alpha))\n", + " probs = torch.clamp(probs, eps, 1 - eps)\n", + " return (total_count, probs, weights)\n", "\n", " def sample(self, distr_args, num_samples=None):\n", " \"\"\"\n", @@ -2481,16 +3936,10 @@ " if num_samples is None:\n", " num_samples = self.num_samples\n", " \n", - " total_count, probs = distr_args\n", + " total_count, probs, weights = distr_args\n", " B, H, K = total_count.size()\n", " Q = len(self.quantiles)\n", " assert total_count.shape == probs.shape\n", - "\n", - " # Sample K ~ Mult(weights)\n", - " # shared across B, H\n", - " # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)\n", - " \n", - " weights = (1/K) * torch.ones_like(probs, device=probs.device)\n", " \n", " # Avoid loop, vectorize\n", " weights = weights.reshape(-1, K)\n", @@ -2533,17 +3982,15 @@ "\n", " def neglog_likelihood(self,\n", " y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None):\n", "\n", " if mask is None: \n", " mask = torch.ones_like(y)\n", " \n", - " total_count, probs = distr_args\n", + " total_count, probs, weights = distr_args\n", " B, H, K = total_count.size()\n", " \n", - " weights = (1/K) * torch.ones_like(probs, device=probs.device)\n", - " \n", " y = y[:,:, None]\n", " mask = mask[:,:,None]\n", "\n", @@ -2567,7 +4014,7 @@ " return loss\n", " \n", " def __call__(self, y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None,):\n", "\n", " return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)" @@ -2608,7 +4055,40 @@ "execution_count": null, "id": "b67e2931", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "weights.shape (N,H,K) \t torch.Size([2, 2, 3])\n", + "counts.shape (N,H,K) \t torch.Size([2, 2, 3])\n", + "probs.shape (N,H,K) \t torch.Size([2, 2, 3])\n", + "samples.shape (N,H,num_samples) torch.Size([2, 2, 2000])\n", + "sample_mean.shape (N,H) torch.Size([2, 2, 1])\n", + "quants.shape (N,H,Q) \t\t torch.Size([2, 2, 5])\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "#| hide\n", "# Create single mixture and broadcast to N,H,K\n", @@ -2625,7 +4105,7 @@ "print('probs.shape (N,H,K) \\t', probs.shape)\n", "\n", "model = NBMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9])\n", - "distr_args = (counts, probs)\n", + "distr_args = (counts, probs, weights)\n", "samples, sample_mean, quants = model.sample(distr_args, num_samples=2000)\n", "\n", "print('samples.shape (N,H,num_samples) ', samples.shape)\n", diff --git a/nbs/models.deepnpts.ipynb b/nbs/models.deepnpts.ipynb new file mode 100644 index 000000000..6bafac332 --- /dev/null +++ b/nbs/models.deepnpts.ipynb @@ -0,0 +1,1137 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp models.deepnpts" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# DeepNPTS" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a non-parametric baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a tunable strategy. This strategy is learned by exploiting the information across multiple related time series. This model provides a strong, simple baseline for time series forecasting.\n", + "\n", + "\n", + "**References**
\n", + "[Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
\n", + "\n", + "\n", + ":::{.callout-warning collapse=\"false\"}\n", + "#### Exogenous Variables, Losses, and Parameters Availability\n", + "\n", + "Given the sampling procedure during inference, DeepNPTS only supports `DistributionLoss` as training loss.\n", + "\n", + "Note that DeepNPTS generates a non-parametric forecast distribution using Monte Carlo. We use this sampling procedure also during validation to make it closer to the inference procedure. Therefore, only the `MQLoss` is available for validation.\n", + "\n", + "Aditionally, Monte Carlo implies that historic exogenous variables are not available for the model.\n", + ":::" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import numpy as np\n", + "\n", + "import torch\n", + "import torch.nn as nn\n", + "import neuralforecast.losses.pytorch as losses\n", + "from typing import Optional\n", + "from functools import partial\n", + "\n", + "\n", + "from neuralforecast.common._base_windows import BaseWindows\n", + "from neuralforecast.losses.pytorch import MQLoss, GMM, PMM, NBMM\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "import logging\n", + "import warnings\n", + "\n", + "from fastcore.test import test_eq\n", + "from nbdev.showdoc import show_doc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. DeepNPTS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class DeepNPTS(BaseWindows):\n", + " \"\"\" DeepNPTS\n", + "\n", + " Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. \n", + "\n", + " **Parameters:**
\n", + " `h`: int, Forecast horizon.
\n", + " `input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", + " `hidden_size`: int=32, hidden size of dense layers.
\n", + " `batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", + " `dropout`: float=0.1, dropout.
\n", + " `n_layers`: int=2, number of dense layers.
\n", + " `trajectory_samples`: int=100, number of Monte Carlo trajectories during inference.
\n", + " `stat_exog_list`: str list, static exogenous columns.
\n", + " `hist_exog_list`: str list, historic exogenous columns.
\n", + " `futr_exog_list`: str list, future exogenous columns.
\n", + " `exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", + " `loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + " `valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + " `max_steps`: int=1000, maximum number of training steps.
\n", + " `learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", + " `num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", + " `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", + " `val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", + " `batch_size`: int=32, number of different series in each batch.
\n", + " `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", + " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", + " `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", + " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `step_size`: int=1, step size between each window of temporal data.
\n", + " `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", + " `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", + " `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", + " `drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", + " `alias`: str, optional, Custom name of the model.
\n", + " `optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", + " `optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", + " `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", + "\n", + " **References**
\n", + " - [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
\n", + "\n", + " \"\"\"\n", + " # Class attributes\n", + " SAMPLING_TYPE = 'windows'\n", + " \n", + " def __init__(self,\n", + " h,\n", + " input_size: int = -1,\n", + " hidden_size: int = 32,\n", + " batch_norm: bool = True,\n", + " dropout: float = 0.1,\n", + " n_layers: int = 2,\n", + " trajectory_samples: int = 100,\n", + " futr_exog_list = None,\n", + " hist_exog_list = None,\n", + " stat_exog_list = None,\n", + " exclude_insample_y = False,\n", + " loss = GMM(),\n", + " valid_loss = MQLoss(level=[80, 90]),\n", + " max_steps: int = 1000,\n", + " learning_rate: float = 1e-5,\n", + " num_lr_decays: int = 3,\n", + " early_stop_patience_steps: int =-1,\n", + " val_check_steps: int = 100,\n", + " batch_size: int = 32,\n", + " valid_batch_size: Optional[int] = None,\n", + " windows_batch_size: int = 1024,\n", + " inference_windows_batch_size: int = -1,\n", + " start_padding_enabled = False,\n", + " step_size: int = 1,\n", + " scaler_type: str = 'standard',\n", + " random_seed: int = 1,\n", + " num_workers_loader = 0,\n", + " drop_last_loader = False,\n", + " optimizer = None,\n", + " optimizer_kwargs = None,\n", + " **trainer_kwargs):\n", + "\n", + " if hist_exog_list is not None:\n", + " raise Exception('DeepNPTS does not support historical exogenous variables.')\n", + "\n", + " if exclude_insample_y:\n", + " raise Exception('DeepNPTS has no possibility for excluding y.')\n", + " \n", + " supported_losses = (losses.GMM,\n", + " losses.PMM,\n", + " losses.NBMM)\n", + "\n", + " if not isinstance(loss, supported_losses):\n", + " raise Exception('DeepNPTS only supports GMM, PMM or NBMM as loss function.') \n", + " \n", + " if not isinstance(valid_loss, losses.MQLoss):\n", + " raise Exception('DeepNPTS only supports MQLoss as validation loss.')\n", + " \n", + " # Overwrite n_components, it has to be the input_size in DeepNPTS\n", + " loss.n_components = input_size\n", + " \n", + " # Inherit BaseWindows class\n", + " super(DeepNPTS, self).__init__(h=h,\n", + " input_size=input_size,\n", + " futr_exog_list=futr_exog_list,\n", + " hist_exog_list=hist_exog_list,\n", + " stat_exog_list=stat_exog_list,\n", + " exclude_insample_y = exclude_insample_y,\n", + " loss=loss,\n", + " valid_loss=valid_loss,\n", + " max_steps=max_steps,\n", + " learning_rate=learning_rate,\n", + " num_lr_decays=num_lr_decays,\n", + " early_stop_patience_steps=early_stop_patience_steps,\n", + " val_check_steps=val_check_steps,\n", + " batch_size=batch_size,\n", + " windows_batch_size=windows_batch_size,\n", + " valid_batch_size=valid_batch_size,\n", + " inference_windows_batch_size=inference_windows_batch_size,\n", + " start_padding_enabled=start_padding_enabled,\n", + " step_size=step_size,\n", + " scaler_type=scaler_type,\n", + " num_workers_loader=num_workers_loader,\n", + " drop_last_loader=drop_last_loader,\n", + " random_seed=random_seed,\n", + " optimizer=optimizer,\n", + " optimizer_kwargs=optimizer_kwargs,\n", + " **trainer_kwargs)\n", + "\n", + " self.h = h\n", + " self.h_backup = self.h # Used because h=1 during training\n", + " self.use_softmax = True\n", + " self.hidden_size = hidden_size\n", + " self.dropout = dropout\n", + " self.trajectory_samples = trajectory_samples\n", + "\n", + " self.futr_exog_size = len(self.futr_exog_list)\n", + " self.stat_exog_size = len(self.stat_exog_list)\n", + "\n", + " input_dim = input_size * (1 + self.futr_exog_size) + self.stat_exog_size\n", + " # Create DeepNPTSNetwork\n", + " modules = [] \n", + " for i in range(n_layers):\n", + " modules.append(nn.Linear(input_dim if i == 0 else hidden_size, hidden_size))\n", + " modules.append(nn.ReLU())\n", + " if batch_norm:\n", + " modules.append(nn.BatchNorm1d(hidden_size))\n", + " if dropout > 0.0:\n", + " modules.append(nn.Dropout(dropout))\n", + "\n", + " self.deepnptsnetwork = nn.Sequential(*modules)\n", + " self.deepnptsnetwork.apply(partial(self._init_weights, scale=0.07))\n", + "\n", + " # Add output layers for Mixture distribution \n", + " output_modules = []\n", + " if dropout > 0.0:\n", + " output_modules.append(nn.Dropout(self.dropout))\n", + " \n", + " if isinstance(loss, GMM):\n", + " output_modules.append(nn.Linear(hidden_size, input_size + 1))\n", + " elif isinstance(loss, PMM):\n", + " output_modules.append(nn.Linear(hidden_size, input_size))\n", + " elif isinstance(loss, NBMM):\n", + " output_modules.append(nn.Linear(hidden_size, input_size))\n", + "\n", + " self.output_layer = nn.Sequential(*output_modules)\n", + " self.output_layer.apply(self._init_weights)\n", + "\n", + "\n", + " @staticmethod\n", + " def _init_weights(module, scale=1.0):\n", + " if type(module) == nn.Linear:\n", + " nn.init.uniform_(module.weight, -scale, scale)\n", + " nn.init.zeros_(module.bias)\n", + "\n", + " def _domain_map(self, o_t, insample_y):\n", + " if isinstance(self.loss, GMM):\n", + " weights = o_t[:, :-1] # [B, L + 1] -> [B, L]\n", + " kernel_width = o_t[:, -1:] # [B, L + 1] -> [B, 1]\n", + " kernel_width = torch.repeat_interleave(input=kernel_width,\n", + " repeats=weights.shape[1],\n", + " dim=-1) # [B, 1] -> [B, L]\n", + " output = torch.cat([insample_y, kernel_width, weights], dim=-1) # [B, L] + [B, L] + [B, L] = [B, 3 * L]\n", + " output = output.unsqueeze(1) # [B, 3 * L] = [B, 1, 3 * L]\n", + " elif isinstance(self.loss, PMM):\n", + " weights = o_t # [B, L] -> [B, L]\n", + " output = torch.cat([insample_y, weights], dim=-1) # [B, L] + [B, L] = [B, 2 * L]\n", + " output = output.unsqueeze(1) # [B, 2 * L] = [B, 1, 2 * L] \n", + " elif isinstance(self.loss, NBMM):\n", + " weights = torch.ones_like(o_t) # [B, L] -> [B, L]\n", + " output = torch.cat([insample_y, o_t, weights], dim=-1) # [B, L] + [B, L] + [B, L] = [B, 3 * L]\n", + " output = output.unsqueeze(1) # [B, 3 * L] = [B, 1, 3 * \n", + "\n", + " else:\n", + " raise NotImplementedError\n", + " \n", + " return output\n", + "\n", + " # Override BaseWindows method\n", + " def training_step(self, batch, batch_idx):\n", + " \n", + " # Only train one-step ahead\n", + " self.h = 1\n", + " self.quantiles = self.loss.quantiles\n", + "\n", + " # Create and normalize windows [Ws, L+H, C]\n", + " y_idx = batch[\"y_idx\"]\n", + " windows = self._create_windows(batch, step=\"train\")\n", + " original_outsample_y = torch.clone(windows[\"temporal\"][:, -self.h :, y_idx])\n", + " windows = self._normalization(windows=windows, y_idx=y_idx)\n", + "\n", + " # Parse windows\n", + " (\n", + " insample_y,\n", + " insample_mask,\n", + " outsample_y,\n", + " outsample_mask,\n", + " _,\n", + " futr_exog,\n", + " stat_exog,\n", + " ) = self._parse_windows(batch, windows)\n", + "\n", + " windows_batch = dict(\n", + " insample_y=insample_y, # [Ws, L]\n", + " insample_mask=insample_mask, # [Ws, L]\n", + " futr_exog=futr_exog, # [Ws, L+H]\n", + " hist_exog=None, \n", + " stat_exog=stat_exog, # [Ws, 1]\n", + " y_idx=y_idx # [Ws, 1]\n", + " ) \n", + "\n", + " # Model Predictions\n", + " output = self.train_forward(windows_batch)\n", + "\n", + " _, y_loc, y_scale = self._inv_normalization(\n", + " y_hat=outsample_y, \n", + " temporal_cols=batch[\"temporal_cols\"], \n", + " y_idx=y_idx\n", + " )\n", + " # outsample_y = original_insample_y\n", + " outsample_y = original_outsample_y\n", + " distr_args = self.loss.scale_decouple(\n", + " output=output, loc=y_loc, scale=y_scale\n", + " )\n", + " loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n", + "\n", + " if torch.isnan(loss):\n", + " print(\"Model Parameters\", self.hparams)\n", + " print(\"insample_y\", torch.isnan(insample_y).sum())\n", + " print(\"outsample_y\", torch.isnan(outsample_y).sum())\n", + " print(\"output\", torch.isnan(output).sum())\n", + " raise Exception(\"Loss is NaN, training stopped.\")\n", + "\n", + " self.log(\"train_loss\", loss, prog_bar=True, on_epoch=True)\n", + " self.train_trajectories.append((self.global_step, float(loss)))\n", + "\n", + " self.h = self.h_backup \n", + " \n", + " return loss\n", + "\n", + " # Override BaseWindows method\n", + " def validation_step(self, batch, batch_idx):\n", + "\n", + " self.h = self.h_backup\n", + " self.quantiles = self.valid_loss.quantiles\n", + "\n", + " if self.val_size == 0:\n", + " return np.nan\n", + "\n", + " # TODO: Hack to compute number of windows\n", + " windows = self._create_windows(batch, step=\"val\")\n", + " n_windows = len(windows[\"temporal\"])\n", + " y_idx = batch[\"y_idx\"]\n", + "\n", + " # Number of windows in batch\n", + " windows_batch_size = self.inference_windows_batch_size\n", + " if windows_batch_size < 0:\n", + " windows_batch_size = n_windows\n", + " n_batches = int(np.ceil(n_windows / windows_batch_size))\n", + "\n", + " valid_losses = []\n", + " batch_sizes = []\n", + " for i in range(n_batches):\n", + " # Create and normalize windows [Ws, L+H, C]\n", + " w_idxs = np.arange(\n", + " i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows)\n", + " )\n", + " windows = self._create_windows(batch, step=\"val\", w_idxs=w_idxs)\n", + " original_outsample_y = torch.clone(windows[\"temporal\"][:, -self.h:, 0])\n", + " windows = self._normalization(windows=windows, y_idx=y_idx)\n", + "\n", + " # Parse windows\n", + " (\n", + " insample_y,\n", + " insample_mask,\n", + " _,\n", + " outsample_mask,\n", + " _,\n", + " futr_exog,\n", + " stat_exog,\n", + " ) = self._parse_windows(batch, windows)\n", + " \n", + " windows_batch = dict(\n", + " insample_y=insample_y, # [Ws, L]\n", + " insample_mask=insample_mask, # [Ws, L]\n", + " futr_exog=futr_exog, # [Ws, L+H]\n", + " hist_exog=None, # [Ws, L]\n", + " stat_exog=stat_exog,\n", + " y_idx=y_idx,\n", + " ) # [Ws, 1]\n", + "\n", + " # Model Predictions\n", + " output_batch = self(windows_batch)\n", + " # Monte Carlo already returns y_hat with mean and quantiles\n", + " output_batch = output_batch[:,:, 1:] # Remove mean\n", + " valid_loss_batch = self.valid_loss(y=original_outsample_y, y_hat=output_batch, mask=outsample_mask)\n", + " valid_losses.append(valid_loss_batch)\n", + " batch_sizes.append(len(output_batch))\n", + "\n", + " valid_loss = torch.stack(valid_losses)\n", + " batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device)\n", + " valid_loss = torch.sum(valid_loss * batch_sizes) / torch.sum(batch_sizes)\n", + "\n", + " if torch.isnan(valid_loss):\n", + " raise Exception(\"Loss is NaN, training stopped.\")\n", + "\n", + " self.log(\"valid_loss\", valid_loss, prog_bar=True, on_epoch=True)\n", + " self.validation_step_outputs.append(valid_loss)\n", + " return valid_loss\n", + "\n", + " # Override BaseWindows method\n", + " def predict_step(self, batch, batch_idx):\n", + "\n", + " self.h == self.h_backup\n", + " self.quantiles = self.loss.quantiles\n", + "\n", + " # TODO: Hack to compute number of windows\n", + " windows = self._create_windows(batch, step='predict')\n", + " n_windows = len(windows['temporal'])\n", + " y_idx = batch['y_idx']\n", + "\n", + " # Number of windows in batch\n", + " windows_batch_size = self.inference_windows_batch_size\n", + " if windows_batch_size < 0:\n", + " windows_batch_size = n_windows\n", + " n_batches = int(np.ceil(n_windows/windows_batch_size))\n", + "\n", + " y_hats = []\n", + " for i in range(n_batches):\n", + " # Create and normalize windows [Ws, L+H, C]\n", + " w_idxs = np.arange(i*windows_batch_size, \n", + " min((i+1)*windows_batch_size, n_windows))\n", + " windows = self._create_windows(batch, step='predict', w_idxs=w_idxs)\n", + " windows = self._normalization(windows=windows, y_idx=y_idx)\n", + "\n", + " # Parse windows\n", + " insample_y, insample_mask, _, _, _, futr_exog, stat_exog = self._parse_windows(batch, windows)\n", + " windows_batch = dict(insample_y=insample_y, # [Ws, L]\n", + " insample_mask=insample_mask, # [Ws, L]\n", + " futr_exog=futr_exog, # [Ws, L+H]\n", + " stat_exog=stat_exog,\n", + " y_idx=y_idx)\n", + " \n", + " # Model Predictions\n", + " y_hat = self(windows_batch)\n", + " # Monte Carlo already returns y_hat with mean and quantiles\n", + " y_hats.append(y_hat)\n", + " y_hat = torch.cat(y_hats, dim=0)\n", + " return y_hat\n", + "\n", + " def train_forward(self, windows_batch):\n", + " # Parse windows_batch\n", + " x_t = windows_batch['insample_y'].unsqueeze(-1) # [B, L, 1]\n", + " futr_exog = windows_batch['futr_exog'] # [B, L + h, F]\n", + " stat_exog = windows_batch['stat_exog'] # [B, S]\n", + "\n", + " batch_size, seq_len = x_t.shape[:2] # B = batch_size, L = seq_len\n", + "\n", + " # Concatenate x_t with future exogenous\n", + " if self.futr_exog_size > 0: \n", + " futr_exog_t = futr_exog[:, :seq_len] # [B, L + h, F] -> [B, L, F]\n", + " x_t = torch.cat((x_t, futr_exog_t), dim=2) # [B, L, 1] + [B, L, F] -> [B, L, 1 + F] \n", + " \n", + " x_t = x_t.reshape(batch_size, -1) # [B, L, 1 + F] -> [B, L * (1 + F)]\n", + "\n", + " # Concatenate x_t with static exogenous\n", + " if self.stat_exog_size > 0:\n", + " x_t = torch.cat((x_t, stat_exog), dim=1) # [B, L * (1 + F)] + [B, S] -> [B, L * (1 + F) + S]\n", + "\n", + " # Run through DeepNPTSNetwork\n", + " h_t = self.deepnptsnetwork(x_t) # [B, L * (1 + F) + S] -> [B, hidden_size]\n", + " o_t = self.output_layer(h_t) # [B, hidden_size] -> [B, L + 1]\n", + "\n", + " output = self._domain_map(o_t, windows_batch['insample_y']) # [B, L + 1], [B, L] -> [B, 3 * L]\n", + " output = self.loss.domain_map(output) # [B, 3 * L] -> ([B, L], [B, L], [B, L])\n", + "\n", + " return output\n", + "\n", + " def forward(self, windows_batch):\n", + " # Parse windows_batch\n", + " insample_y_t = windows_batch['insample_y'].unsqueeze(-1) # [B, L, 1]\n", + " futr_exog = windows_batch['futr_exog'] # [B, L + h, F]\n", + " stat_exog = windows_batch['stat_exog'] # [B, S]\n", + " y_idx = windows_batch['y_idx']\n", + "\n", + " batch_size, seq_len = insample_y_t.shape[:2] # B = batch_size, L = seq_len\n", + " device = insample_y_t.device\n", + " dtype = insample_y_t.dtype\n", + "\n", + " # Repeat insample_y for trajectory samples\n", + " insample_y_t = torch.repeat_interleave(input=insample_y_t, \n", + " repeats=self.trajectory_samples, \n", + " dim=0) # [B, L, 1] -> [B * n_samples, L, 1]\n", + " \n", + " # Input x_t is insample_y at time t\n", + " x_t = insample_y_t\n", + "\n", + " # Repeat futr_exog if available for trajectory samples and add to x_t \n", + " if self.futr_exog_size > 0: \n", + " futr_exog = torch.repeat_interleave(input=futr_exog, \n", + " repeats=self.trajectory_samples, \n", + " dim=0) # [B, L + h, F] -> [B * n_samples, L + h, F] \n", + " x_t = torch.cat((x_t, futr_exog[:, :seq_len]), dim=2) # [B * n_samples, L, 1] + [B * n_samples, L, F] -> [B * n_samples, L, 1 + F] \n", + " \n", + " x_t = x_t.reshape(batch_size * self.trajectory_samples, -1) # [B * n_samples, L, 1 + F] -> [B * n_samples, L * (1 + F)]\n", + "\n", + " # Repeat stat_exog if available for trajectory samples and add to x_t\n", + " if self.stat_exog_size > 0:\n", + " stat_exog = torch.repeat_interleave(\n", + " input=stat_exog, \n", + " repeats=self.trajectory_samples, \n", + " dim=0) # [B, S] -> [B * n_samples, S] \n", + " x_t = torch.cat((x_t, stat_exog), dim=1) # [B * n_samples, L * (1 + F)] + [B * n_samples, S] -> [B * n_samples, L * (1 + F) + S]\n", + "\n", + " # Scales for inverse normalization\n", + " y_scale = self.scaler.x_scale[:, :, y_idx]\n", + " y_loc = self.scaler.x_shift[:, :, y_idx]\n", + " y_scale = torch.repeat_interleave(input=y_scale, \n", + " repeats=self.trajectory_samples, \n", + " dim=0)\n", + " y_loc = torch.repeat_interleave(input=y_loc, \n", + " repeats=self.trajectory_samples, \n", + " dim=0)\n", + " # Create forecasts tensor\n", + " forecasts = torch.zeros((batch_size, \n", + " self.h,\n", + " len(self.quantiles) + 1), \n", + " device=device, \n", + " dtype=dtype)\n", + " \n", + " # Recursive predictions\n", + " for t in range(self.h):\n", + " # Run input throught DeepNPTSNetwork\n", + " h_t = self.deepnptsnetwork(x_t) # [B * n_samples, L * (1 + F) + S] -> [B, hidden_size]\n", + " o_t = self.output_layer(h_t) # [B * n_samples, hidden_size] -> [B * n_samples, L (+ 1)]\n", + " output = self._domain_map(o_t, insample_y_t.squeeze(-1)) # [B * n_samples, L + 1], [B * n_samples, L] -> [B * n_samples, 3 * L]\n", + " output = self.loss.domain_map(output) # [B * n_samples, 3 * L] -> ([B * n_samples, L], [B * n_samples, L], [B * n_samples, L])\n", + "\n", + " # Inverse normalization\n", + " distr_args = self.loss.scale_decouple(output=output, \n", + " loc=y_loc, \n", + " scale=y_scale)\n", + "\n", + " # Sample and create probabilistic outputs\n", + " samples_t_flat, _, _ = self.loss.sample(distr_args=distr_args, \n", + " num_samples=1)\n", + "\n", + " samples_t_flat = samples_t_flat.squeeze()\n", + " samples_t = samples_t_flat.reshape(batch_size, \n", + " self.trajectory_samples) # [B * n_samples] -> [B, n_samples] \n", + " \n", + " samples_t_mean = torch.mean(samples_t, dim=-1) # [B, n_samples] -> [B] \n", + " quantiles_t = torch.quantile(input=samples_t, \n", + " q=self.quantiles, \n", + " dim=-1) # [B, n_samples] -> [Q, B]\n", + " forecasts[:, t, 0] = samples_t_mean\n", + " forecasts[:, t, 1:] = quantiles_t.permute(1, 0)\n", + "\n", + " insample_y_t_next = self.scaler.scaler(samples_t_flat, \n", + " y_loc.squeeze(), \n", + " y_scale.squeeze()) # [B * n_samples] -> [B * n_samples]\n", + " insample_y_t_next = insample_y_t_next.unsqueeze(-1)\\\n", + " .unsqueeze(-1) # [B * n_samples] -> [B * n_samples, 1, 1]\n", + "\n", + " # Update insample_y_t \n", + " insample_y_t = torch.cat([insample_y_t[:, 1:], \n", + " insample_y_t_next], \n", + " dim=1) # [B * n_samples, L - 1, 1] + [B * n_samples, 1, 1] -> [B * n_samples, L, 1]\n", + " \n", + " # Update input\n", + " x_t = insample_y_t\n", + " # Concatenate x_t with future exogenous\n", + " if self.futr_exog_size > 0: \n", + " x_t = torch.cat((x_t, \n", + " futr_exog[:, t:seq_len + t]), \n", + " dim=2) # [B * n_samples, L, 1] + [B * n_samples, L, F] -> [B * n_samples, L, 1 + F] \n", + " \n", + " x_t = x_t.reshape(batch_size * self.trajectory_samples\n", + " , -1) # [B * n_samples, L, 1 + F] -> [B * n_samples, L * (1 + F)]\n", + "\n", + " # Concatenate x_t with static exogenous\n", + " if self.stat_exog_size > 0:\n", + " x_t = torch.cat((x_t, stat_exog), dim=1) # [B * n_samples, L * (1 + F)] + [B * n_samples, S] -> [B * n_samples, L * (1 + F) + S]\n", + " \n", + " return forecasts\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/deepnpts.py#L20){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DeepNPTS\n", + "\n", + "> DeepNPTS (h, input_size:int=-1, hidden_size:int=32, batch_norm:bool=True,\n", + "> dropout:float=0.5, n_layers:int=2, trajectory_samples:int=100,\n", + "> futr_exog_list=None, hist_exog_list=None, stat_exog_list=None,\n", + "> exclude_insample_y=False, loss=GMM(), valid_loss=MQLoss(),\n", + "> max_steps:int=1000, learning_rate:float=0.001,\n", + "> num_lr_decays:int=3, early_stop_patience_steps:int=-1,\n", + "> val_check_steps:int=100, batch_size:int=32,\n", + "> valid_batch_size:Optional[int]=None,\n", + "> windows_batch_size:int=1024,\n", + "> inference_windows_batch_size:int=-1,\n", + "> start_padding_enabled=False, step_size:int=1,\n", + "> scaler_type:str='standard', random_seed:int=1,\n", + "> num_workers_loader=0, drop_last_loader=False, optimizer=None,\n", + "> optimizer_kwargs=None, **trainer_kwargs)\n", + "\n", + "DeepNPTS\n", + "\n", + "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. \n", + "\n", + "**Parameters:**
\n", + "`h`: int, Forecast horizon.
\n", + "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", + "`hidden_size`: int=32, hidden size of dense layers.
\n", + "`batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", + "`dropout`: float=0.1, dropout.
\n", + "`n_layers`: int=2, number of dense layers.
\n", + "`trajectory_samples`: int=100, number of Monte Carlo trajectories during inference.
\n", + "`stat_exog_list`: str list, static exogenous columns.
\n", + "`hist_exog_list`: str list, historic exogenous columns.
\n", + "`futr_exog_list`: str list, future exogenous columns.
\n", + "`exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", + "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`max_steps`: int=1000, maximum number of training steps.
\n", + "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", + "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", + "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", + "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", + "`batch_size`: int=32, number of different series in each batch.
\n", + "`valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", + "`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", + "`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", + "`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + "`step_size`: int=1, step size between each window of temporal data.
\n", + "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", + "`random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", + "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", + "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", + "`alias`: str, optional, Custom name of the model.
\n", + "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", + "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", + "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", + "\n", + "**References**
\n", + "- [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/deepnpts.py#L20){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DeepNPTS\n", + "\n", + "> DeepNPTS (h, input_size:int=-1, hidden_size:int=32, batch_norm:bool=True,\n", + "> dropout:float=0.5, n_layers:int=2, trajectory_samples:int=100,\n", + "> futr_exog_list=None, hist_exog_list=None, stat_exog_list=None,\n", + "> exclude_insample_y=False, loss=GMM(), valid_loss=MQLoss(),\n", + "> max_steps:int=1000, learning_rate:float=0.001,\n", + "> num_lr_decays:int=3, early_stop_patience_steps:int=-1,\n", + "> val_check_steps:int=100, batch_size:int=32,\n", + "> valid_batch_size:Optional[int]=None,\n", + "> windows_batch_size:int=1024,\n", + "> inference_windows_batch_size:int=-1,\n", + "> start_padding_enabled=False, step_size:int=1,\n", + "> scaler_type:str='standard', random_seed:int=1,\n", + "> num_workers_loader=0, drop_last_loader=False, optimizer=None,\n", + "> optimizer_kwargs=None, **trainer_kwargs)\n", + "\n", + "DeepNPTS\n", + "\n", + "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. \n", + "\n", + "**Parameters:**
\n", + "`h`: int, Forecast horizon.
\n", + "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", + "`hidden_size`: int=32, hidden size of dense layers.
\n", + "`batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", + "`dropout`: float=0.1, dropout.
\n", + "`n_layers`: int=2, number of dense layers.
\n", + "`trajectory_samples`: int=100, number of Monte Carlo trajectories during inference.
\n", + "`stat_exog_list`: str list, static exogenous columns.
\n", + "`hist_exog_list`: str list, historic exogenous columns.
\n", + "`futr_exog_list`: str list, future exogenous columns.
\n", + "`exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", + "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`max_steps`: int=1000, maximum number of training steps.
\n", + "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", + "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", + "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", + "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", + "`batch_size`: int=32, number of different series in each batch.
\n", + "`valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", + "`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", + "`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", + "`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + "`step_size`: int=1, step size between each window of temporal data.
\n", + "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", + "`random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", + "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", + "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", + "`alias`: str, optional, Custom name of the model.
\n", + "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", + "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", + "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", + "\n", + "**References**
\n", + "- [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(DeepNPTS, title_level=3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "### DeepNPTS.fit\n", + "\n", + "> DeepNPTS.fit (dataset, val_size=0, test_size=0, random_seed=None,\n", + "> distributed_config=None)\n", + "\n", + "Fit.\n", + "\n", + "The `fit` method, optimizes the neural network's weights using the\n", + "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n", + "and the `loss` function as defined during the initialization.\n", + "Within `fit` we use a PyTorch Lightning `Trainer` that\n", + "inherits the initialization's `self.trainer_kwargs`, to customize\n", + "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n", + "\n", + "The method is designed to be compatible with SKLearn-like classes\n", + "and in particular to be compatible with the StatsForecast library.\n", + "\n", + "By default the `model` is not saving training checkpoints to protect\n", + "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`val_size`: int, validation size for temporal cross-validation.
\n", + "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", + "`test_size`: int, test size for temporal cross-validation.
" + ], + "text/plain": [ + "---\n", + "\n", + "### DeepNPTS.fit\n", + "\n", + "> DeepNPTS.fit (dataset, val_size=0, test_size=0, random_seed=None,\n", + "> distributed_config=None)\n", + "\n", + "Fit.\n", + "\n", + "The `fit` method, optimizes the neural network's weights using the\n", + "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n", + "and the `loss` function as defined during the initialization.\n", + "Within `fit` we use a PyTorch Lightning `Trainer` that\n", + "inherits the initialization's `self.trainer_kwargs`, to customize\n", + "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n", + "\n", + "The method is designed to be compatible with SKLearn-like classes\n", + "and in particular to be compatible with the StatsForecast library.\n", + "\n", + "By default the `model` is not saving training checkpoints to protect\n", + "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`val_size`: int, validation size for temporal cross-validation.
\n", + "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", + "`test_size`: int, test size for temporal cross-validation.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(DeepNPTS.fit, name='DeepNPTS.fit', title_level=3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "### DeepNPTS.predict\n", + "\n", + "> DeepNPTS.predict (dataset, test_size=None, step_size=1, random_seed=None,\n", + "> **data_module_kwargs)\n", + "\n", + "Predict.\n", + "\n", + "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`test_size`: int=None, test size for temporal cross-validation.
\n", + "`step_size`: int=1, Step size between each window.
\n", + "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", + "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)." + ], + "text/plain": [ + "---\n", + "\n", + "### DeepNPTS.predict\n", + "\n", + "> DeepNPTS.predict (dataset, test_size=None, step_size=1, random_seed=None,\n", + "> **data_module_kwargs)\n", + "\n", + "Predict.\n", + "\n", + "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`test_size`: int=None, test size for temporal cross-validation.
\n", + "`step_size`: int=1, Step size between each window.
\n", + "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", + "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(DeepNPTS.predict, name='DeepNPTS.predict', title_level=3)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage Example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from neuralforecast import NeuralForecast\n", + "from neuralforecast.losses.pytorch import MQLoss, DistributionLoss, GMM\n", + "from neuralforecast.tsdataset import TimeSeriesDataset\n", + "from neuralforecast.utils import AirPassengers, AirPassengersPanel, AirPassengersStatic" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Seed set to 1\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b74158f17d254e4884139ee5c48e5706", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | | 0/? [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#| eval: false\n", + "import pandas as pd\n", + "import pytorch_lightning as pl\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from neuralforecast import NeuralForecast\n", + "#from neuralforecast.models import DeepAR\n", + "from neuralforecast.losses.pytorch import DistributionLoss, HuberMQLoss\n", + "from neuralforecast.utils import AirPassengers, AirPassengersPanel, AirPassengersStatic\n", + "\n", + "#AirPassengersPanel['y'] = AirPassengersPanel['y'] + 10\n", + "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n", + "\n", + "nf = NeuralForecast(\n", + " models=[DeepNPTS(h=12,\n", + " input_size=12,\n", + " trajectory_samples=100,\n", + " loss=GMM(),\n", + " # learning_rate=1e-5,\n", + " n_layers = 2,\n", + " dropout=0.0,\n", + " stat_exog_list=['airline1'],\n", + " futr_exog_list=['trend'],\n", + " max_steps=1000,\n", + " val_check_steps=10,\n", + " early_stop_patience_steps=3,\n", + " scaler_type='robust',\n", + " enable_progress_bar=True),\n", + " ],\n", + " freq='M'\n", + ")\n", + "nf.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)\n", + "Y_hat_df = nf.predict(futr_df=Y_test_df)\n", + "\n", + "# Plot quantile predictions\n", + "Y_hat_df = Y_hat_df.reset_index(drop=False).drop(columns=['unique_id','ds'])\n", + "plot_df = pd.concat([Y_test_df, Y_hat_df], axis=1)\n", + "plot_df = pd.concat([Y_train_df, plot_df])\n", + "\n", + "plot_df = plot_df[plot_df.unique_id=='Airline1'].drop('unique_id', axis=1)\n", + "plt.plot(plot_df['ds'], plot_df['y'], c='black', label='True')\n", + "plt.plot(plot_df['ds'], plot_df['DeepNPTS'], c='red', label='mean')\n", + "plt.plot(plot_df['ds'], plot_df['DeepNPTS-median'], c='blue', label='median')\n", + "plt.fill_between(x=plot_df['ds'][-12:], \n", + " y1=plot_df['DeepNPTS-lo-90'][-12:].values, \n", + " y2=plot_df['DeepNPTS-hi-90'][-12:].values,\n", + " alpha=0.4, label='level 90')\n", + "plt.legend()\n", + "plt.grid()\n", + "plt.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/neuralforecast/_modidx.py b/neuralforecast/_modidx.py index 208162bd1..3efc57b96 100644 --- a/neuralforecast/_modidx.py +++ b/neuralforecast/_modidx.py @@ -508,6 +508,24 @@ 'neuralforecast/models/deepar.py'), 'neuralforecast.models.deepar.DeepAR.validation_step': ( 'models.deepar.html#deepar.validation_step', 'neuralforecast/models/deepar.py')}, + 'neuralforecast.models.deepnpts': { 'neuralforecast.models.deepnpts.DeepNPTS': ( 'models.deepnpts.html#deepnpts', + 'neuralforecast/models/deepnpts.py'), + 'neuralforecast.models.deepnpts.DeepNPTS.__init__': ( 'models.deepnpts.html#deepnpts.__init__', + 'neuralforecast/models/deepnpts.py'), + 'neuralforecast.models.deepnpts.DeepNPTS._domain_map': ( 'models.deepnpts.html#deepnpts._domain_map', + 'neuralforecast/models/deepnpts.py'), + 'neuralforecast.models.deepnpts.DeepNPTS._init_weights': ( 'models.deepnpts.html#deepnpts._init_weights', + 'neuralforecast/models/deepnpts.py'), + 'neuralforecast.models.deepnpts.DeepNPTS.forward': ( 'models.deepnpts.html#deepnpts.forward', + 'neuralforecast/models/deepnpts.py'), + 'neuralforecast.models.deepnpts.DeepNPTS.predict_step': ( 'models.deepnpts.html#deepnpts.predict_step', + 'neuralforecast/models/deepnpts.py'), + 'neuralforecast.models.deepnpts.DeepNPTS.train_forward': ( 'models.deepnpts.html#deepnpts.train_forward', + 'neuralforecast/models/deepnpts.py'), + 'neuralforecast.models.deepnpts.DeepNPTS.training_step': ( 'models.deepnpts.html#deepnpts.training_step', + 'neuralforecast/models/deepnpts.py'), + 'neuralforecast.models.deepnpts.DeepNPTS.validation_step': ( 'models.deepnpts.html#deepnpts.validation_step', + 'neuralforecast/models/deepnpts.py')}, 'neuralforecast.models.dilated_rnn': { 'neuralforecast.models.dilated_rnn.AttentiveLSTMLayer': ( 'models.dilated_rnn.html#attentivelstmlayer', 'neuralforecast/models/dilated_rnn.py'), 'neuralforecast.models.dilated_rnn.AttentiveLSTMLayer.__init__': ( 'models.dilated_rnn.html#attentivelstmlayer.__init__', diff --git a/neuralforecast/common/_scalers.py b/neuralforecast/common/_scalers.py index 15ddb3bd4..bef76f7e9 100644 --- a/neuralforecast/common/_scalers.py +++ b/neuralforecast/common/_scalers.py @@ -313,8 +313,8 @@ def identity_statistics(x, mask, dim=-1, eps=1e-6): shape = list(x.shape) shape[dim] = 1 - x_shift = torch.zeros(shape) - x_scale = torch.ones(shape) + x_shift = torch.zeros(shape, device=x.device) + x_scale = torch.ones(shape, device=x.device) return x_shift, x_scale diff --git a/neuralforecast/core.py b/neuralforecast/core.py index f0a52224b..3c2ce1942 100644 --- a/neuralforecast/core.py +++ b/neuralforecast/core.py @@ -56,6 +56,7 @@ MLPMultivariate, iTransformer, BiTCN, + DeepNPTS, ) # %% ../nbs/core.ipynb 5 @@ -164,6 +165,8 @@ def _insample_times( "autoitransformer": iTransformer, "bitcn": BiTCN, "autobitcn": BiTCN, + "deepnpts": DeepNPTS, + "autodeepnpts": DeepNPTS, } # %% ../nbs/core.ipynb 8 diff --git a/neuralforecast/losses/pytorch.py b/neuralforecast/losses/pytorch.py index d7f29c83b..2e5ede2f5 100644 --- a/neuralforecast/losses/pytorch.py +++ b/neuralforecast/losses/pytorch.py @@ -1166,17 +1166,20 @@ def __init__( # If True, predict_step will return Distribution's parameters self.return_params = return_params if self.return_params: - self.param_names = [f"-lambda-{i}" for i in range(1, n_components + 1)] + lambda_names = [f"-lambda-{i}" for i in range(1, n_components + 1)] + weight_names = [f"-weight-{i}" for i in range(1, n_components + 1)] + self.param_names = [i for j in zip(lambda_names, weight_names) for i in j] self.output_names = self.output_names + self.param_names # Add first output entry for the sample_mean self.output_names.insert(0, "") - self.outputsize_multiplier = n_components + self.outputsize_multiplier = 2 * n_components self.is_distribution_output = True def domain_map(self, output: torch.Tensor): - return (output,) # , weights + lambdas, weights = output.chunk(2, dim=-1) + return (lambdas, weights) def scale_decouple( self, @@ -1190,13 +1193,15 @@ def scale_decouple( variance and residual location based on anchoring `loc`, `scale`. Also adds domain protection to the distribution parameters. """ - lambdas = output[0] + lambdas, weights = output + weights = F.softmax(weights, dim=-1) + if (loc is not None) and (scale is not None): loc = loc.view(lambdas.size(dim=0), 1, -1) scale = scale.view(lambdas.size(dim=0), 1, -1) lambdas = (lambdas * scale) + loc lambdas = F.softplus(lambdas) - return (lambdas,) + return (lambdas, weights) def sample(self, distr_args, num_samples=None): """ @@ -1218,15 +1223,10 @@ def sample(self, distr_args, num_samples=None): if num_samples is None: num_samples = self.num_samples - lambdas = distr_args[0] + lambdas, weights = distr_args B, H, K = lambdas.size() Q = len(self.quantiles) - # Sample K ~ Mult(weights) - # shared across B, H - # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2) - weights = (1 / K) * torch.ones_like(lambdas, device=lambdas.device) - # Avoid loop, vectorize weights = weights.reshape(-1, K) lambdas = lambdas.flatten() @@ -1267,7 +1267,7 @@ def sample(self, distr_args, num_samples=None): def neglog_likelihood( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): if mask is None: @@ -1276,11 +1276,9 @@ def neglog_likelihood( mask = mask * ((y > 0) * 1) eps = 1e-10 - lambdas = distr_args[0] + lambdas, weights = distr_args B, H, K = lambdas.size() - weights = (1 / K) * torch.ones_like(lambdas, device=lambdas.device) - y = y[:, :, None] mask = mask[:, :, None] @@ -1307,7 +1305,7 @@ def neglog_likelihood( def __call__( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): @@ -1369,18 +1367,22 @@ def __init__( if self.return_params: mu_names = [f"-mu-{i}" for i in range(1, n_components + 1)] std_names = [f"-std-{i}" for i in range(1, n_components + 1)] - mu_std_names = [i for j in zip(mu_names, std_names) for i in j] - self.output_names = self.output_names + mu_std_names + weight_names = [f"-weight-{i}" for i in range(1, n_components + 1)] + self.param_names = [ + i for j in zip(mu_names, std_names, weight_names) for i in j + ] + self.output_names = self.output_names + self.param_names # Add first output entry for the sample_mean self.output_names.insert(0, "") - self.outputsize_multiplier = 2 * n_components + self.outputsize_multiplier = 3 * n_components self.is_distribution_output = True def domain_map(self, output: torch.Tensor): - means, stds = torch.tensor_split(output, 2, dim=-1) - return (means, stds) + means, stds, weights = output.chunk(3, dim=-1) + + return (means, stds, weights) def scale_decouple( self, @@ -1395,14 +1397,16 @@ def scale_decouple( variance and residual location based on anchoring `loc`, `scale`. Also adds domain protection to the distribution parameters. """ - means, stds = output + means, stds, weights = output stds = F.softplus(stds) + weights = F.softmax(weights, dim=-1) if (loc is not None) and (scale is not None): loc = loc.view(means.size(dim=0), 1, -1) scale = scale.view(means.size(dim=0), 1, -1) means = (means * scale) + loc stds = (stds + eps) * scale - return (means, stds) + + return (means, stds, weights) def sample(self, distr_args, num_samples=None): """ @@ -1424,17 +1428,11 @@ def sample(self, distr_args, num_samples=None): if num_samples is None: num_samples = self.num_samples - means, stds = distr_args + means, stds, weights = distr_args B, H, K = means.size() Q = len(self.quantiles) assert means.shape == stds.shape - # Sample K ~ Mult(weights) - # shared across B, H - # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2) - - weights = (1 / K) * torch.ones_like(means, device=means.device) - # Avoid loop, vectorize weights = weights.reshape(-1, K) means = means.flatten() @@ -1475,18 +1473,16 @@ def sample(self, distr_args, num_samples=None): def neglog_likelihood( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): if mask is None: mask = torch.ones_like(y) - means, stds = distr_args + means, stds, weights = distr_args B, H, K = means.size() - weights = (1 / K) * torch.ones_like(means, device=means.device) - y = y[:, :, None] mask = mask[:, :, None] @@ -1514,7 +1510,7 @@ def neglog_likelihood( def __call__( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): @@ -1572,25 +1568,29 @@ def __init__( f"-total_count-{i}" for i in range(1, n_components + 1) ] probs_names = [f"-probs-{i}" for i in range(1, n_components + 1)] - param_names = [i for j in zip(total_count_names, probs_names) for i in j] - self.output_names = self.output_names + param_names + weight_names = [f"-weight-{i}" for i in range(1, n_components + 1)] + self.param_names = [ + i for j in zip(total_count_names, probs_names, weight_names) for i in j + ] + self.output_names = self.output_names + self.param_names # Add first output entry for the sample_mean self.output_names.insert(0, "") - self.outputsize_multiplier = 2 * n_components + self.outputsize_multiplier = 3 * n_components self.is_distribution_output = True def domain_map(self, output: torch.Tensor): - mu, alpha = torch.tensor_split(output, 2, dim=-1) - return (mu, alpha) + mu, alpha, weights = output.chunk(3, dim=-1) + + return mu, alpha, weights def scale_decouple( self, output, loc: Optional[torch.Tensor] = None, scale: Optional[torch.Tensor] = None, - eps: float = 0.2, + eps: float = 1e-6, ): """Scale Decouple @@ -1599,9 +1599,10 @@ def scale_decouple( Also adds domain protection to the distribution parameters. """ # Efficient NBinomial parametrization - mu, alpha = output - mu = F.softplus(mu) + 1e-8 - alpha = F.softplus(alpha) + 1e-8 # alpha = 1/total_counts + mu, alpha, weights = output + mu = F.softplus(mu) + eps + alpha = F.softplus(alpha) + eps # alpha = 1/total_counts + weights = F.softmax(weights, dim=-1) if (loc is not None) and (scale is not None): loc = loc.view(mu.size(dim=0), 1, -1) mu *= loc @@ -1611,8 +1612,9 @@ def scale_decouple( # => probs = mu / (total_count + mu) # => probs = mu / [total_count * (1 + mu * (1/total_count))] total_count = 1.0 / alpha - probs = (mu * alpha / (1.0 + mu * alpha)) + 1e-8 - return (total_count, probs) + probs = mu * alpha / (1.0 + mu * alpha) + probs = torch.clamp(probs, eps, 1 - eps) + return (total_count, probs, weights) def sample(self, distr_args, num_samples=None): """ @@ -1634,17 +1636,11 @@ def sample(self, distr_args, num_samples=None): if num_samples is None: num_samples = self.num_samples - total_count, probs = distr_args + total_count, probs, weights = distr_args B, H, K = total_count.size() Q = len(self.quantiles) assert total_count.shape == probs.shape - # Sample K ~ Mult(weights) - # shared across B, H - # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2) - - weights = (1 / K) * torch.ones_like(probs, device=probs.device) - # Avoid loop, vectorize weights = weights.reshape(-1, K) total_count = total_count.flatten() @@ -1686,18 +1682,16 @@ def sample(self, distr_args, num_samples=None): def neglog_likelihood( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): if mask is None: mask = torch.ones_like(y) - total_count, probs = distr_args + total_count, probs, weights = distr_args B, H, K = total_count.size() - weights = (1 / K) * torch.ones_like(probs, device=probs.device) - y = y[:, :, None] mask = mask[:, :, None] @@ -1728,7 +1722,7 @@ def neglog_likelihood( def __call__( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): diff --git a/neuralforecast/models/__init__.py b/neuralforecast/models/__init__.py index d4a6ead9d..b4e7153f1 100644 --- a/neuralforecast/models/__init__.py +++ b/neuralforecast/models/__init__.py @@ -3,6 +3,7 @@ 'TFT', 'VanillaTransformer', 'Informer', 'Autoformer', 'PatchTST', 'FEDformer', 'StemGNN', 'HINT', 'TimesNet', 'TimeLLM', 'TSMixer', 'TSMixerx', 'MLPMultivariate', 'iTransformer', 'BiTCN', + 'DeepNPTS' ] from .rnn import RNN @@ -32,4 +33,4 @@ from .mlpmultivariate import MLPMultivariate from .itransformer import iTransformer from .bitcn import BiTCN - +from .deepnpts import DeepNPTS diff --git a/neuralforecast/models/deepnpts.py b/neuralforecast/models/deepnpts.py new file mode 100644 index 000000000..d4da85974 --- /dev/null +++ b/neuralforecast/models/deepnpts.py @@ -0,0 +1,557 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/models.deepnpts.ipynb. + +# %% auto 0 +__all__ = ['DeepNPTS'] + +# %% ../../nbs/models.deepnpts.ipynb 3 +import numpy as np + +import torch +import torch.nn as nn +import neuralforecast.losses.pytorch as losses +from typing import Optional +from functools import partial + + +from ..common._base_windows import BaseWindows +from ..losses.pytorch import MQLoss, GMM, PMM, NBMM + +# %% ../../nbs/models.deepnpts.ipynb 7 +class DeepNPTS(BaseWindows): + """DeepNPTS + + Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. + + **Parameters:**
+ `h`: int, Forecast horizon.
+ `input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
+ `hidden_size`: int=32, hidden size of dense layers.
+ `batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
+ `dropout`: float=0.1, dropout.
+ `n_layers`: int=2, number of dense layers.
+ `trajectory_samples`: int=100, number of Monte Carlo trajectories during inference.
+ `stat_exog_list`: str list, static exogenous columns.
+ `hist_exog_list`: str list, historic exogenous columns.
+ `futr_exog_list`: str list, future exogenous columns.
+ `exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
+ `loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
+ `valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
+ `max_steps`: int=1000, maximum number of training steps.
+ `learning_rate`: float=1e-3, Learning rate between (0, 1).
+ `num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
+ `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
+ `val_check_steps`: int=100, Number of training steps between every validation loss check.
+ `batch_size`: int=32, number of different series in each batch.
+ `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
+ `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
+ `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
+ `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `step_size`: int=1, step size between each window of temporal data.
+ `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
+ `random_seed`: int, random_seed for pytorch initializer and numpy generators.
+ `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
+ `drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
+ `alias`: str, optional, Custom name of the model.
+ `optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
+ `optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
+ `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
+ + **References**
+ - [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). "Deep Non-Parametric Time Series Forecaster". arXiv.](https://arxiv.org/abs/2312.14657)
+ + """ + + # Class attributes + SAMPLING_TYPE = "windows" + + def __init__( + self, + h, + input_size: int = -1, + hidden_size: int = 32, + batch_norm: bool = True, + dropout: float = 0.1, + n_layers: int = 2, + trajectory_samples: int = 100, + futr_exog_list=None, + hist_exog_list=None, + stat_exog_list=None, + exclude_insample_y=False, + loss=GMM(), + valid_loss=MQLoss(level=[80, 90]), + max_steps: int = 1000, + learning_rate: float = 1e-5, + num_lr_decays: int = 3, + early_stop_patience_steps: int = -1, + val_check_steps: int = 100, + batch_size: int = 32, + valid_batch_size: Optional[int] = None, + windows_batch_size: int = 1024, + inference_windows_batch_size: int = -1, + start_padding_enabled=False, + step_size: int = 1, + scaler_type: str = "standard", + random_seed: int = 1, + num_workers_loader=0, + drop_last_loader=False, + optimizer=None, + optimizer_kwargs=None, + **trainer_kwargs + ): + + if hist_exog_list is not None: + raise Exception("DeepNPTS does not support historical exogenous variables.") + + if exclude_insample_y: + raise Exception("DeepNPTS has no possibility for excluding y.") + + supported_losses = (losses.GMM, losses.PMM, losses.NBMM) + + if not isinstance(loss, supported_losses): + raise Exception("DeepNPTS only supports GMM, PMM or NBMM as loss function.") + + if not isinstance(valid_loss, losses.MQLoss): + raise Exception("DeepNPTS only supports MQLoss as validation loss.") + + # Overwrite n_components, it has to be the input_size in DeepNPTS + loss.n_components = input_size + + # Inherit BaseWindows class + super(DeepNPTS, self).__init__( + h=h, + input_size=input_size, + futr_exog_list=futr_exog_list, + hist_exog_list=hist_exog_list, + stat_exog_list=stat_exog_list, + exclude_insample_y=exclude_insample_y, + loss=loss, + valid_loss=valid_loss, + max_steps=max_steps, + learning_rate=learning_rate, + num_lr_decays=num_lr_decays, + early_stop_patience_steps=early_stop_patience_steps, + val_check_steps=val_check_steps, + batch_size=batch_size, + windows_batch_size=windows_batch_size, + valid_batch_size=valid_batch_size, + inference_windows_batch_size=inference_windows_batch_size, + start_padding_enabled=start_padding_enabled, + step_size=step_size, + scaler_type=scaler_type, + num_workers_loader=num_workers_loader, + drop_last_loader=drop_last_loader, + random_seed=random_seed, + optimizer=optimizer, + optimizer_kwargs=optimizer_kwargs, + **trainer_kwargs + ) + + self.h = h + self.h_backup = self.h # Used because h=1 during training + self.use_softmax = True + self.hidden_size = hidden_size + self.dropout = dropout + self.trajectory_samples = trajectory_samples + + self.futr_exog_size = len(self.futr_exog_list) + self.stat_exog_size = len(self.stat_exog_list) + + input_dim = input_size * (1 + self.futr_exog_size) + self.stat_exog_size + # Create DeepNPTSNetwork + modules = [] + for i in range(n_layers): + modules.append(nn.Linear(input_dim if i == 0 else hidden_size, hidden_size)) + modules.append(nn.ReLU()) + if batch_norm: + modules.append(nn.BatchNorm1d(hidden_size)) + if dropout > 0.0: + modules.append(nn.Dropout(dropout)) + + self.deepnptsnetwork = nn.Sequential(*modules) + self.deepnptsnetwork.apply(partial(self._init_weights, scale=0.07)) + + # Add output layers for Mixture distribution + output_modules = [] + if dropout > 0.0: + output_modules.append(nn.Dropout(self.dropout)) + + if isinstance(loss, GMM): + output_modules.append(nn.Linear(hidden_size, input_size + 1)) + elif isinstance(loss, PMM): + output_modules.append(nn.Linear(hidden_size, input_size)) + elif isinstance(loss, NBMM): + output_modules.append(nn.Linear(hidden_size, input_size)) + + self.output_layer = nn.Sequential(*output_modules) + self.output_layer.apply(self._init_weights) + + @staticmethod + def _init_weights(module, scale=1.0): + if type(module) == nn.Linear: + nn.init.uniform_(module.weight, -scale, scale) + nn.init.zeros_(module.bias) + + def _domain_map(self, o_t, insample_y): + if isinstance(self.loss, GMM): + weights = o_t[:, :-1] # [B, L + 1] -> [B, L] + kernel_width = o_t[:, -1:] # [B, L + 1] -> [B, 1] + kernel_width = torch.repeat_interleave( + input=kernel_width, repeats=weights.shape[1], dim=-1 + ) # [B, 1] -> [B, L] + output = torch.cat( + [insample_y, kernel_width, weights], dim=-1 + ) # [B, L] + [B, L] + [B, L] = [B, 3 * L] + output = output.unsqueeze(1) # [B, 3 * L] = [B, 1, 3 * L] + elif isinstance(self.loss, PMM): + weights = o_t # [B, L] -> [B, L] + output = torch.cat( + [insample_y, weights], dim=-1 + ) # [B, L] + [B, L] = [B, 2 * L] + output = output.unsqueeze(1) # [B, 2 * L] = [B, 1, 2 * L] + elif isinstance(self.loss, NBMM): + weights = torch.ones_like(o_t) # [B, L] -> [B, L] + output = torch.cat( + [insample_y, o_t, weights], dim=-1 + ) # [B, L] + [B, L] + [B, L] = [B, 3 * L] + output = output.unsqueeze(1) # [B, 3 * L] = [B, 1, 3 * + + else: + raise NotImplementedError + + return output + + # Override BaseWindows method + def training_step(self, batch, batch_idx): + + # Only train one-step ahead + self.h = 1 + self.quantiles = self.loss.quantiles + + # Create and normalize windows [Ws, L+H, C] + y_idx = batch["y_idx"] + windows = self._create_windows(batch, step="train") + original_outsample_y = torch.clone(windows["temporal"][:, -self.h :, y_idx]) + windows = self._normalization(windows=windows, y_idx=y_idx) + + # Parse windows + ( + insample_y, + insample_mask, + outsample_y, + outsample_mask, + _, + futr_exog, + stat_exog, + ) = self._parse_windows(batch, windows) + + windows_batch = dict( + insample_y=insample_y, # [Ws, L] + insample_mask=insample_mask, # [Ws, L] + futr_exog=futr_exog, # [Ws, L+H] + hist_exog=None, + stat_exog=stat_exog, # [Ws, 1] + y_idx=y_idx, # [Ws, 1] + ) + + # Model Predictions + output = self.train_forward(windows_batch) + + _, y_loc, y_scale = self._inv_normalization( + y_hat=outsample_y, temporal_cols=batch["temporal_cols"], y_idx=y_idx + ) + # outsample_y = original_insample_y + outsample_y = original_outsample_y + distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale) + loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask) + + if torch.isnan(loss): + print("Model Parameters", self.hparams) + print("insample_y", torch.isnan(insample_y).sum()) + print("outsample_y", torch.isnan(outsample_y).sum()) + print("output", torch.isnan(output).sum()) + raise Exception("Loss is NaN, training stopped.") + + self.log("train_loss", loss, prog_bar=True, on_epoch=True) + self.train_trajectories.append((self.global_step, float(loss))) + + self.h = self.h_backup + + return loss + + # Override BaseWindows method + def validation_step(self, batch, batch_idx): + + self.h = self.h_backup + self.quantiles = self.valid_loss.quantiles + + if self.val_size == 0: + return np.nan + + # TODO: Hack to compute number of windows + windows = self._create_windows(batch, step="val") + n_windows = len(windows["temporal"]) + y_idx = batch["y_idx"] + + # Number of windows in batch + windows_batch_size = self.inference_windows_batch_size + if windows_batch_size < 0: + windows_batch_size = n_windows + n_batches = int(np.ceil(n_windows / windows_batch_size)) + + valid_losses = [] + batch_sizes = [] + for i in range(n_batches): + # Create and normalize windows [Ws, L+H, C] + w_idxs = np.arange( + i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows) + ) + windows = self._create_windows(batch, step="val", w_idxs=w_idxs) + original_outsample_y = torch.clone(windows["temporal"][:, -self.h :, 0]) + windows = self._normalization(windows=windows, y_idx=y_idx) + + # Parse windows + ( + insample_y, + insample_mask, + _, + outsample_mask, + _, + futr_exog, + stat_exog, + ) = self._parse_windows(batch, windows) + + windows_batch = dict( + insample_y=insample_y, # [Ws, L] + insample_mask=insample_mask, # [Ws, L] + futr_exog=futr_exog, # [Ws, L+H] + hist_exog=None, # [Ws, L] + stat_exog=stat_exog, + y_idx=y_idx, + ) # [Ws, 1] + + # Model Predictions + output_batch = self(windows_batch) + # Monte Carlo already returns y_hat with mean and quantiles + output_batch = output_batch[:, :, 1:] # Remove mean + valid_loss_batch = self.valid_loss( + y=original_outsample_y, y_hat=output_batch, mask=outsample_mask + ) + valid_losses.append(valid_loss_batch) + batch_sizes.append(len(output_batch)) + + valid_loss = torch.stack(valid_losses) + batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device) + valid_loss = torch.sum(valid_loss * batch_sizes) / torch.sum(batch_sizes) + + if torch.isnan(valid_loss): + raise Exception("Loss is NaN, training stopped.") + + self.log("valid_loss", valid_loss, prog_bar=True, on_epoch=True) + self.validation_step_outputs.append(valid_loss) + return valid_loss + + # Override BaseWindows method + def predict_step(self, batch, batch_idx): + + self.h == self.h_backup + self.quantiles = self.loss.quantiles + + # TODO: Hack to compute number of windows + windows = self._create_windows(batch, step="predict") + n_windows = len(windows["temporal"]) + y_idx = batch["y_idx"] + + # Number of windows in batch + windows_batch_size = self.inference_windows_batch_size + if windows_batch_size < 0: + windows_batch_size = n_windows + n_batches = int(np.ceil(n_windows / windows_batch_size)) + + y_hats = [] + for i in range(n_batches): + # Create and normalize windows [Ws, L+H, C] + w_idxs = np.arange( + i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows) + ) + windows = self._create_windows(batch, step="predict", w_idxs=w_idxs) + windows = self._normalization(windows=windows, y_idx=y_idx) + + # Parse windows + insample_y, insample_mask, _, _, _, futr_exog, stat_exog = ( + self._parse_windows(batch, windows) + ) + windows_batch = dict( + insample_y=insample_y, # [Ws, L] + insample_mask=insample_mask, # [Ws, L] + futr_exog=futr_exog, # [Ws, L+H] + stat_exog=stat_exog, + y_idx=y_idx, + ) + + # Model Predictions + y_hat = self(windows_batch) + # Monte Carlo already returns y_hat with mean and quantiles + y_hats.append(y_hat) + y_hat = torch.cat(y_hats, dim=0) + return y_hat + + def train_forward(self, windows_batch): + # Parse windows_batch + x_t = windows_batch["insample_y"].unsqueeze(-1) # [B, L, 1] + futr_exog = windows_batch["futr_exog"] # [B, L + h, F] + stat_exog = windows_batch["stat_exog"] # [B, S] + + batch_size, seq_len = x_t.shape[:2] # B = batch_size, L = seq_len + + # Concatenate x_t with future exogenous + if self.futr_exog_size > 0: + futr_exog_t = futr_exog[:, :seq_len] # [B, L + h, F] -> [B, L, F] + x_t = torch.cat( + (x_t, futr_exog_t), dim=2 + ) # [B, L, 1] + [B, L, F] -> [B, L, 1 + F] + + x_t = x_t.reshape(batch_size, -1) # [B, L, 1 + F] -> [B, L * (1 + F)] + + # Concatenate x_t with static exogenous + if self.stat_exog_size > 0: + x_t = torch.cat( + (x_t, stat_exog), dim=1 + ) # [B, L * (1 + F)] + [B, S] -> [B, L * (1 + F) + S] + + # Run through DeepNPTSNetwork + h_t = self.deepnptsnetwork(x_t) # [B, L * (1 + F) + S] -> [B, hidden_size] + o_t = self.output_layer(h_t) # [B, hidden_size] -> [B, L + 1] + + output = self._domain_map( + o_t, windows_batch["insample_y"] + ) # [B, L + 1], [B, L] -> [B, 3 * L] + output = self.loss.domain_map( + output + ) # [B, 3 * L] -> ([B, L], [B, L], [B, L]) + + return output + + def forward(self, windows_batch): + # Parse windows_batch + insample_y_t = windows_batch["insample_y"].unsqueeze(-1) # [B, L, 1] + futr_exog = windows_batch["futr_exog"] # [B, L + h, F] + stat_exog = windows_batch["stat_exog"] # [B, S] + y_idx = windows_batch["y_idx"] + + batch_size, seq_len = insample_y_t.shape[:2] # B = batch_size, L = seq_len + device = insample_y_t.device + dtype = insample_y_t.dtype + + # Repeat insample_y for trajectory samples + insample_y_t = torch.repeat_interleave( + input=insample_y_t, repeats=self.trajectory_samples, dim=0 + ) # [B, L, 1] -> [B * n_samples, L, 1] + + # Input x_t is insample_y at time t + x_t = insample_y_t + + # Repeat futr_exog if available for trajectory samples and add to x_t + if self.futr_exog_size > 0: + futr_exog = torch.repeat_interleave( + input=futr_exog, repeats=self.trajectory_samples, dim=0 + ) # [B, L + h, F] -> [B * n_samples, L + h, F] + x_t = torch.cat( + (x_t, futr_exog[:, :seq_len]), dim=2 + ) # [B * n_samples, L, 1] + [B * n_samples, L, F] -> [B * n_samples, L, 1 + F] + + x_t = x_t.reshape( + batch_size * self.trajectory_samples, -1 + ) # [B * n_samples, L, 1 + F] -> [B * n_samples, L * (1 + F)] + + # Repeat stat_exog if available for trajectory samples and add to x_t + if self.stat_exog_size > 0: + stat_exog = torch.repeat_interleave( + input=stat_exog, repeats=self.trajectory_samples, dim=0 + ) # [B, S] -> [B * n_samples, S] + x_t = torch.cat( + (x_t, stat_exog), dim=1 + ) # [B * n_samples, L * (1 + F)] + [B * n_samples, S] -> [B * n_samples, L * (1 + F) + S] + + # Scales for inverse normalization + y_scale = self.scaler.x_scale[:, :, y_idx] + y_loc = self.scaler.x_shift[:, :, y_idx] + y_scale = torch.repeat_interleave( + input=y_scale, repeats=self.trajectory_samples, dim=0 + ) + y_loc = torch.repeat_interleave( + input=y_loc, repeats=self.trajectory_samples, dim=0 + ) + # Create forecasts tensor + forecasts = torch.zeros( + (batch_size, self.h, len(self.quantiles) + 1), device=device, dtype=dtype + ) + + # Recursive predictions + for t in range(self.h): + # Run input throught DeepNPTSNetwork + h_t = self.deepnptsnetwork( + x_t + ) # [B * n_samples, L * (1 + F) + S] -> [B, hidden_size] + o_t = self.output_layer( + h_t + ) # [B * n_samples, hidden_size] -> [B * n_samples, L (+ 1)] + output = self._domain_map( + o_t, insample_y_t.squeeze(-1) + ) # [B * n_samples, L + 1], [B * n_samples, L] -> [B * n_samples, 3 * L] + output = self.loss.domain_map( + output + ) # [B * n_samples, 3 * L] -> ([B * n_samples, L], [B * n_samples, L], [B * n_samples, L]) + + # Inverse normalization + distr_args = self.loss.scale_decouple( + output=output, loc=y_loc, scale=y_scale + ) + + # Sample and create probabilistic outputs + samples_t_flat, _, _ = self.loss.sample( + distr_args=distr_args, num_samples=1 + ) + + samples_t_flat = samples_t_flat.squeeze() + samples_t = samples_t_flat.reshape( + batch_size, self.trajectory_samples + ) # [B * n_samples] -> [B, n_samples] + + samples_t_mean = torch.mean(samples_t, dim=-1) # [B, n_samples] -> [B] + quantiles_t = torch.quantile( + input=samples_t, q=self.quantiles, dim=-1 + ) # [B, n_samples] -> [Q, B] + forecasts[:, t, 0] = samples_t_mean + forecasts[:, t, 1:] = quantiles_t.permute(1, 0) + + insample_y_t_next = self.scaler.scaler( + samples_t_flat, y_loc.squeeze(), y_scale.squeeze() + ) # [B * n_samples] -> [B * n_samples] + insample_y_t_next = insample_y_t_next.unsqueeze(-1).unsqueeze( + -1 + ) # [B * n_samples] -> [B * n_samples, 1, 1] + + # Update insample_y_t + insample_y_t = torch.cat( + [insample_y_t[:, 1:], insample_y_t_next], dim=1 + ) # [B * n_samples, L - 1, 1] + [B * n_samples, 1, 1] -> [B * n_samples, L, 1] + + # Update input + x_t = insample_y_t + # Concatenate x_t with future exogenous + if self.futr_exog_size > 0: + x_t = torch.cat( + (x_t, futr_exog[:, t : seq_len + t]), dim=2 + ) # [B * n_samples, L, 1] + [B * n_samples, L, F] -> [B * n_samples, L, 1 + F] + + x_t = x_t.reshape( + batch_size * self.trajectory_samples, -1 + ) # [B * n_samples, L, 1 + F] -> [B * n_samples, L * (1 + F)] + + # Concatenate x_t with static exogenous + if self.stat_exog_size > 0: + x_t = torch.cat( + (x_t, stat_exog), dim=1 + ) # [B * n_samples, L * (1 + F)] + [B * n_samples, S] -> [B * n_samples, L * (1 + F) + S] + + return forecasts From 17d1ef0007c4e8a00127b7c6ea937b74f23bbdb4 Mon Sep 17 00:00:00 2001 From: Olivier Sprangers Date: Mon, 22 Apr 2024 23:01:57 +0200 Subject: [PATCH 02/11] deepnpts_simple --- nbs/core.ipynb | 3 +- nbs/losses.pytorch.ipynb | 1714 ++--------------------------- nbs/models.deepnpts.ipynb | 869 +-------------- nbs/models.ipynb | 147 +-- neuralforecast/_modidx.py | 14 +- neuralforecast/losses/pytorch.py | 118 +- neuralforecast/models/__init__.py | 3 +- neuralforecast/models/deepnpts.py | 454 +------- 8 files changed, 297 insertions(+), 3025 deletions(-) diff --git a/nbs/core.ipynb b/nbs/core.ipynb index 710fcd0b4..42c5321f1 100644 --- a/nbs/core.ipynb +++ b/nbs/core.ipynb @@ -90,7 +90,7 @@ " Informer, Autoformer, FEDformer,\n", " StemGNN, PatchTST, TimesNet, TimeLLM, TSMixer, TSMixerx,\n", " MLPMultivariate, iTransformer,\n", - " BiTCN, DeepNPTS\n", + " BiTCN, DeepNPTS,\n", ")" ] }, @@ -234,6 +234,7 @@ " 'itransformer': iTransformer, 'autoitransformer': iTransformer,\n", " 'bitcn': BiTCN, 'autobitcn': BiTCN,\n", " 'deepnpts': DeepNPTS, 'autodeepnpts': DeepNPTS,\n", + "\n", "}" ] }, diff --git a/nbs/losses.pytorch.ipynb b/nbs/losses.pytorch.ipynb index 36adfaabd..387da910d 100644 --- a/nbs/losses.pytorch.ipynb +++ b/nbs/losses.pytorch.ipynb @@ -67,7 +67,7 @@ " Normal, \n", " StudentT, \n", " Poisson,\n", - " NegativeBinomial\n", + " NegativeBinomial,\n", ")\n", "\n", "from torch.distributions import constraints" @@ -244,61 +244,7 @@ "execution_count": null, "id": "1d004cd0", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L85){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MAE.__init__\n", - "\n", - "> MAE.__init__ (horizon_weight=None)\n", - "\n", - "Mean Absolute Error\n", - "\n", - "Calculates Mean Absolute Error between\n", - "`y` and `y_hat`. MAE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the\n", - "deviation of the prediction and the true\n", - "value at a given time and averages these devations\n", - "over the length of the series.\n", - "\n", - "$$ \\mathrm{MAE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} |y_{\\tau} - \\hat{y}_{\\tau}| $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L85){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MAE.__init__\n", - "\n", - "> MAE.__init__ (horizon_weight=None)\n", - "\n", - "Mean Absolute Error\n", - "\n", - "Calculates Mean Absolute Error between\n", - "`y` and `y_hat`. MAE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the\n", - "deviation of the prediction and the true\n", - "value at a given time and averages these devations\n", - "over the length of the series.\n", - "\n", - "$$ \\mathrm{MAE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} |y_{\\tau} - \\hat{y}_{\\tau}| $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MAE, name='MAE.__init__', title_level=3)" ] @@ -308,51 +254,7 @@ "execution_count": null, "id": "0a20a273", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L106){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MAE.__call__\n", - "\n", - "> MAE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies datapoints to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mae`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L106){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MAE.__call__\n", - "\n", - "> MAE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies datapoints to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mae`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MAE.__call__, name='MAE.__call__', title_level=3)" ] @@ -426,61 +328,7 @@ "execution_count": null, "id": "e8c65b82", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L126){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MSE.__init__\n", - "\n", - "> MSE.__init__ (horizon_weight=None)\n", - "\n", - "Mean Squared Error\n", - "\n", - "Calculates Mean Squared Error between\n", - "`y` and `y_hat`. MSE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the \n", - "squared deviation of the prediction and the true\n", - "value at a given time, and averages these devations\n", - "over the length of the series.\n", - "\n", - "$$ \\mathrm{MSE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^{2} $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L126){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MSE.__init__\n", - "\n", - "> MSE.__init__ (horizon_weight=None)\n", - "\n", - "Mean Squared Error\n", - "\n", - "Calculates Mean Squared Error between\n", - "`y` and `y_hat`. MSE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the \n", - "squared deviation of the prediction and the true\n", - "value at a given time, and averages these devations\n", - "over the length of the series.\n", - "\n", - "$$ \\mathrm{MSE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^{2} $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MSE, name='MSE.__init__', title_level=3)" ] @@ -490,51 +338,7 @@ "execution_count": null, "id": "b0126a7f", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L147){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MSE.__call__\n", - "\n", - "> MSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies datapoints to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mse`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L147){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MSE.__call__\n", - "\n", - "> MSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies datapoints to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mse`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MSE.__call__, name='MSE.__call__', title_level=3)" ] @@ -612,67 +416,7 @@ "execution_count": null, "id": "d961d383", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L167){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### RMSE.__init__\n", - "\n", - "> RMSE.__init__ (horizon_weight=None)\n", - "\n", - "Root Mean Squared Error\n", - "\n", - "Calculates Root Mean Squared Error between\n", - "`y` and `y_hat`. RMSE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the squared deviation\n", - "of the prediction and the observed value at a given time and\n", - "averages these devations over the length of the series.\n", - "Finally the RMSE will be in the same scale\n", - "as the original time series so its comparison with other\n", - "series is possible only if they share a common scale. \n", - "RMSE has a direct connection to the L2 norm.\n", - "\n", - "$$ \\mathrm{RMSE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\sqrt{\\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^{2}} $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L167){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### RMSE.__init__\n", - "\n", - "> RMSE.__init__ (horizon_weight=None)\n", - "\n", - "Root Mean Squared Error\n", - "\n", - "Calculates Root Mean Squared Error between\n", - "`y` and `y_hat`. RMSE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the squared deviation\n", - "of the prediction and the observed value at a given time and\n", - "averages these devations over the length of the series.\n", - "Finally the RMSE will be in the same scale\n", - "as the original time series so its comparison with other\n", - "series is possible only if they share a common scale. \n", - "RMSE has a direct connection to the L2 norm.\n", - "\n", - "$$ \\mathrm{RMSE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\sqrt{\\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^{2}} $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(RMSE, name='RMSE.__init__', title_level=3)" ] @@ -682,51 +426,7 @@ "execution_count": null, "id": "d398d3e3", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L191){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### RMSE.__call__\n", - "\n", - "> RMSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies datapoints to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`rmse`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L191){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### RMSE.__call__\n", - "\n", - "> RMSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies datapoints to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`rmse`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(RMSE.__call__, name='RMSE.__call__', title_level=3)" ] @@ -817,69 +517,7 @@ "execution_count": null, "id": "174e8042", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L212){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MAPE.__init__\n", - "\n", - "> MAPE.__init__ (horizon_weight=None)\n", - "\n", - "Mean Absolute Percentage Error\n", - "\n", - "Calculates Mean Absolute Percentage Error between\n", - "`y` and `y_hat`. MAPE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the percentual deviation\n", - "of the prediction and the observed value at a given time and\n", - "averages these devations over the length of the series.\n", - "The closer to zero an observed value is, the higher penalty MAPE loss\n", - "assigns to the corresponding error.\n", - "\n", - "$$ \\mathrm{MAPE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{|y_{\\tau}|} $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Makridakis S., \"Accuracy measures: theoretical and practical concerns\".](https://www.sciencedirect.com/science/article/pii/0169207093900793)" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L212){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MAPE.__init__\n", - "\n", - "> MAPE.__init__ (horizon_weight=None)\n", - "\n", - "Mean Absolute Percentage Error\n", - "\n", - "Calculates Mean Absolute Percentage Error between\n", - "`y` and `y_hat`. MAPE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the percentual deviation\n", - "of the prediction and the observed value at a given time and\n", - "averages these devations over the length of the series.\n", - "The closer to zero an observed value is, the higher penalty MAPE loss\n", - "assigns to the corresponding error.\n", - "\n", - "$$ \\mathrm{MAPE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{|y_{\\tau}|} $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Makridakis S., \"Accuracy measures: theoretical and practical concerns\".](https://www.sciencedirect.com/science/article/pii/0169207093900793)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MAPE, name='MAPE.__init__', title_level=3)" ] @@ -889,51 +527,7 @@ "execution_count": null, "id": "da63f136", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L237){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MAPE.__call__\n", - "\n", - "> MAPE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mape`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L237){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MAPE.__call__\n", - "\n", - "> MAPE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mape`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MAPE.__call__, name='MAPE.__call__', title_level=3)" ] @@ -1015,73 +609,7 @@ "execution_count": null, "id": "dee99fb8", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L259){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### SMAPE.__init__\n", - "\n", - "> SMAPE.__init__ (horizon_weight=None)\n", - "\n", - "Symmetric Mean Absolute Percentage Error\n", - "\n", - "Calculates Symmetric Mean Absolute Percentage Error between\n", - "`y` and `y_hat`. SMAPE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the relative deviation\n", - "of the prediction and the observed value scaled by the sum of the\n", - "absolute values for the prediction and observed value at a\n", - "given time, then averages these devations over the length\n", - "of the series. This allows the SMAPE to have bounds between\n", - "0% and 200% which is desireble compared to normal MAPE that\n", - "may be undetermined when the target is zero.\n", - "\n", - "$$ \\mathrm{sMAPE}_{2}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{|y_{\\tau}|+|\\hat{y}_{\\tau}|} $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Makridakis S., \"Accuracy measures: theoretical and practical concerns\".](https://www.sciencedirect.com/science/article/pii/0169207093900793)" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L259){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### SMAPE.__init__\n", - "\n", - "> SMAPE.__init__ (horizon_weight=None)\n", - "\n", - "Symmetric Mean Absolute Percentage Error\n", - "\n", - "Calculates Symmetric Mean Absolute Percentage Error between\n", - "`y` and `y_hat`. SMAPE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the relative deviation\n", - "of the prediction and the observed value scaled by the sum of the\n", - "absolute values for the prediction and observed value at a\n", - "given time, then averages these devations over the length\n", - "of the series. This allows the SMAPE to have bounds between\n", - "0% and 200% which is desireble compared to normal MAPE that\n", - "may be undetermined when the target is zero.\n", - "\n", - "$$ \\mathrm{sMAPE}_{2}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{|y_{\\tau}|+|\\hat{y}_{\\tau}|} $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Makridakis S., \"Accuracy measures: theoretical and practical concerns\".](https://www.sciencedirect.com/science/article/pii/0169207093900793)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(SMAPE, name='SMAPE.__init__', title_level=3)" ] @@ -1091,51 +619,7 @@ "execution_count": null, "id": "db62a845", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L286){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### SMAPE.__call__\n", - "\n", - "> SMAPE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`smape`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L286){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### SMAPE.__call__\n", - "\n", - "> SMAPE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`smape`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(SMAPE.__call__, name='SMAPE.__call__', title_level=3)" ] @@ -1222,71 +706,7 @@ "execution_count": null, "id": "b6a4cf21", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L308){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MASE.__init__\n", - "\n", - "> MASE.__init__ (seasonality:int, horizon_weight=None)\n", - "\n", - "Mean Absolute Scaled Error \n", - "Calculates the Mean Absolute Scaled Error between\n", - "`y` and `y_hat`. MASE measures the relative prediction\n", - "accuracy of a forecasting method by comparinng the mean absolute errors\n", - "of the prediction and the observed value against the mean\n", - "absolute errors of the seasonal naive model.\n", - "The MASE partially composed the Overall Weighted Average (OWA), \n", - "used in the M4 Competition.\n", - "\n", - "$$ \\mathrm{MASE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}, \\mathbf{\\hat{y}}^{season}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{\\mathrm{MAE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{season}_{\\tau})} $$\n", - "\n", - "**Parameters:**
\n", - "`seasonality`: int. Main frequency of the time series; Hourly 24, Daily 7, Weekly 52, Monthly 12, Quarterly 4, Yearly 1.\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Rob J. Hyndman, & Koehler, A. B. \"Another look at measures of forecast accuracy\".](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
\n", - "[Spyros Makridakis, Evangelos Spiliotis, Vassilios Assimakopoulos, \"The M4 Competition: 100,000 time series and 61 forecasting methods\".](https://www.sciencedirect.com/science/article/pii/S0169207019301128)" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L308){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MASE.__init__\n", - "\n", - "> MASE.__init__ (seasonality:int, horizon_weight=None)\n", - "\n", - "Mean Absolute Scaled Error \n", - "Calculates the Mean Absolute Scaled Error between\n", - "`y` and `y_hat`. MASE measures the relative prediction\n", - "accuracy of a forecasting method by comparinng the mean absolute errors\n", - "of the prediction and the observed value against the mean\n", - "absolute errors of the seasonal naive model.\n", - "The MASE partially composed the Overall Weighted Average (OWA), \n", - "used in the M4 Competition.\n", - "\n", - "$$ \\mathrm{MASE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}, \\mathbf{\\hat{y}}^{season}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{\\mathrm{MAE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{season}_{\\tau})} $$\n", - "\n", - "**Parameters:**
\n", - "`seasonality`: int. Main frequency of the time series; Hourly 24, Daily 7, Weekly 52, Monthly 12, Quarterly 4, Yearly 1.\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Rob J. Hyndman, & Koehler, A. B. \"Another look at measures of forecast accuracy\".](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
\n", - "[Spyros Makridakis, Evangelos Spiliotis, Vassilios Assimakopoulos, \"The M4 Competition: 100,000 time series and 61 forecasting methods\".](https://www.sciencedirect.com/science/article/pii/S0169207019301128)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MASE, name='MASE.__init__', title_level=3)" ] @@ -1296,53 +716,7 @@ "execution_count": null, "id": "32a2c11b", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L335){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MASE.__call__\n", - "\n", - "> MASE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> y_insample:torch.Tensor, mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor (batch_size, output_size), Actual values.
\n", - "`y_hat`: tensor (batch_size, output_size)), Predicted values.
\n", - "`y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mase`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L335){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MASE.__call__\n", - "\n", - "> MASE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> y_insample:torch.Tensor, mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor (batch_size, output_size), Actual values.
\n", - "`y_hat`: tensor (batch_size, output_size)), Predicted values.
\n", - "`y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mase`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MASE.__call__, name='MASE.__call__', title_level=3)" ] @@ -1429,69 +803,7 @@ "execution_count": null, "id": "edeb6f9a", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L364){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### relMSE.__init__\n", - "\n", - "> relMSE.__init__ (y_train, horizon_weight=None)\n", - "\n", - "Relative Mean Squared Error\n", - "Computes Relative Mean Squared Error (relMSE), as proposed by Hyndman & Koehler (2006)\n", - "as an alternative to percentage errors, to avoid measure unstability.\n", - "$$ \\mathrm{relMSE}(\\mathbf{y}, \\mathbf{\\hat{y}}, \\mathbf{\\hat{y}}^{naive1}) =\n", - "\\frac{\\mathrm{MSE}(\\mathbf{y}, \\mathbf{\\hat{y}})}{\\mathrm{MSE}(\\mathbf{y}, \\mathbf{\\hat{y}}^{naive1})} $$\n", - "\n", - "**Parameters:**
\n", - "`y_train`: numpy array, Training values.
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "- [Hyndman, R. J and Koehler, A. B. (2006).\n", - " \"Another look at measures of forecast accuracy\",\n", - " International Journal of Forecasting, Volume 22, Issue 4.](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
\n", - "- [Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", - " \"Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. \n", - " Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L364){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### relMSE.__init__\n", - "\n", - "> relMSE.__init__ (y_train, horizon_weight=None)\n", - "\n", - "Relative Mean Squared Error\n", - "Computes Relative Mean Squared Error (relMSE), as proposed by Hyndman & Koehler (2006)\n", - "as an alternative to percentage errors, to avoid measure unstability.\n", - "$$ \\mathrm{relMSE}(\\mathbf{y}, \\mathbf{\\hat{y}}, \\mathbf{\\hat{y}}^{naive1}) =\n", - "\\frac{\\mathrm{MSE}(\\mathbf{y}, \\mathbf{\\hat{y}})}{\\mathrm{MSE}(\\mathbf{y}, \\mathbf{\\hat{y}}^{naive1})} $$\n", - "\n", - "**Parameters:**
\n", - "`y_train`: numpy array, Training values.
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "- [Hyndman, R. J and Koehler, A. B. (2006).\n", - " \"Another look at measures of forecast accuracy\",\n", - " International Journal of Forecasting, Volume 22, Issue 4.](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
\n", - "- [Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", - " \"Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. \n", - " Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(relMSE, name='relMSE.__init__', title_level=3)" ] @@ -1501,53 +813,7 @@ "execution_count": null, "id": "a317b5c5", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L391){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### relMSE.__call__\n", - "\n", - "> relMSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor (batch_size, output_size), Actual values.
\n", - "`y_hat`: tensor (batch_size, output_size)), Predicted values.
\n", - "`y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`relMSE`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L391){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### relMSE.__call__\n", - "\n", - "> relMSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor (batch_size, output_size), Actual values.
\n", - "`y_hat`: tensor (batch_size, output_size)), Predicted values.
\n", - "`y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`relMSE`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(relMSE.__call__, name='relMSE.__call__', title_level=3)" ] @@ -1632,67 +898,7 @@ "execution_count": null, "id": "70bd46d9", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L418){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### QuantileLoss.__init__\n", - "\n", - "> QuantileLoss.__init__ (q, horizon_weight=None)\n", - "\n", - "Quantile Loss\n", - "\n", - "Computes the quantile loss between `y` and `y_hat`.\n", - "QL measures the deviation of a quantile forecast.\n", - "By weighting the absolute deviation in a non symmetric way, the\n", - "loss pays more attention to under or over estimation.\n", - "A common value for q is 0.5 for the deviation from the median (Pinball loss).\n", - "\n", - "$$ \\mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{(q)}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\Big( (1-q)\\,( \\hat{y}^{(q)}_{\\tau} - y_{\\tau} )_{+} + q\\,( y_{\\tau} - \\hat{y}^{(q)}_{\\tau} )_{+} \\Big) $$\n", - "\n", - "**Parameters:**
\n", - "`q`: float, between 0 and 1. The slope of the quantile loss, in the context of quantile regression, the q determines the conditional quantile level.
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L418){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### QuantileLoss.__init__\n", - "\n", - "> QuantileLoss.__init__ (q, horizon_weight=None)\n", - "\n", - "Quantile Loss\n", - "\n", - "Computes the quantile loss between `y` and `y_hat`.\n", - "QL measures the deviation of a quantile forecast.\n", - "By weighting the absolute deviation in a non symmetric way, the\n", - "loss pays more attention to under or over estimation.\n", - "A common value for q is 0.5 for the deviation from the median (Pinball loss).\n", - "\n", - "$$ \\mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{(q)}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\Big( (1-q)\\,( \\hat{y}^{(q)}_{\\tau} - y_{\\tau} )_{+} + q\\,( y_{\\tau} - \\hat{y}^{(q)}_{\\tau} )_{+} \\Big) $$\n", - "\n", - "**Parameters:**
\n", - "`q`: float, between 0 and 1. The slope of the quantile loss, in the context of quantile regression, the q determines the conditional quantile level.
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(QuantileLoss, name='QuantileLoss.__init__', title_level=3)" ] @@ -1702,51 +908,7 @@ "execution_count": null, "id": "0b1588e9", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L445){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### QuantileLoss.__call__\n", - "\n", - "> QuantileLoss.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies datapoints to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`quantile_loss`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L445){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### QuantileLoss.__call__\n", - "\n", - "> QuantileLoss.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies datapoints to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`quantile_loss`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(QuantileLoss.__call__, name='QuantileLoss.__call__', title_level=3)" ] @@ -1918,87 +1080,7 @@ "execution_count": null, "id": "8f42ec82", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L494){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MQLoss.__init__\n", - "\n", - "> MQLoss.__init__ (level=[80, 90], quantiles=None, horizon_weight=None)\n", - "\n", - "Multi-Quantile loss\n", - "\n", - "Calculates the Multi-Quantile loss (MQL) between `y` and `y_hat`.\n", - "MQL calculates the average multi-quantile Loss for\n", - "a given set of quantiles, based on the absolute \n", - "difference between predicted quantiles and observed values.\n", - "\n", - "$$ \\mathrm{MQL}(\\mathbf{y}_{\\tau},[\\mathbf{\\hat{y}}^{(q_{1})}_{\\tau}, ... ,\\hat{y}^{(q_{n})}_{\\tau}]) = \\frac{1}{n} \\sum_{q_{i}} \\mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{(q_{i})}_{\\tau}) $$\n", - "\n", - "The limit behavior of MQL allows to measure the accuracy \n", - "of a full predictive distribution $\\mathbf{\\hat{F}}_{\\tau}$ with \n", - "the continuous ranked probability score (CRPS). This can be achieved \n", - "through a numerical integration technique, that discretizes the quantiles \n", - "and treats the CRPS integral with a left Riemann approximation, averaging over \n", - "uniformly distanced quantiles. \n", - "\n", - "$$ \\mathrm{CRPS}(y_{\\tau}, \\mathbf{\\hat{F}}_{\\tau}) = \\int^{1}_{0} \\mathrm{QL}(y_{\\tau}, \\hat{y}^{(q)}_{\\tau}) dq $$\n", - "\n", - "**Parameters:**
\n", - "`level`: int list [0,100]. Probability levels for prediction intervals (Defaults median).\n", - "`quantiles`: float list [0., 1.]. Alternative to level, quantiles to estimate from y distribution.\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)
\n", - "[James E. Matheson and Robert L. Winkler, \"Scoring Rules for Continuous Probability Distributions\".](https://www.jstor.org/stable/2629907)" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L494){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MQLoss.__init__\n", - "\n", - "> MQLoss.__init__ (level=[80, 90], quantiles=None, horizon_weight=None)\n", - "\n", - "Multi-Quantile loss\n", - "\n", - "Calculates the Multi-Quantile loss (MQL) between `y` and `y_hat`.\n", - "MQL calculates the average multi-quantile Loss for\n", - "a given set of quantiles, based on the absolute \n", - "difference between predicted quantiles and observed values.\n", - "\n", - "$$ \\mathrm{MQL}(\\mathbf{y}_{\\tau},[\\mathbf{\\hat{y}}^{(q_{1})}_{\\tau}, ... ,\\hat{y}^{(q_{n})}_{\\tau}]) = \\frac{1}{n} \\sum_{q_{i}} \\mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{(q_{i})}_{\\tau}) $$\n", - "\n", - "The limit behavior of MQL allows to measure the accuracy \n", - "of a full predictive distribution $\\mathbf{\\hat{F}}_{\\tau}$ with \n", - "the continuous ranked probability score (CRPS). This can be achieved \n", - "through a numerical integration technique, that discretizes the quantiles \n", - "and treats the CRPS integral with a left Riemann approximation, averaging over \n", - "uniformly distanced quantiles. \n", - "\n", - "$$ \\mathrm{CRPS}(y_{\\tau}, \\mathbf{\\hat{F}}_{\\tau}) = \\int^{1}_{0} \\mathrm{QL}(y_{\\tau}, \\hat{y}^{(q)}_{\\tau}) dq $$\n", - "\n", - "**Parameters:**
\n", - "`level`: int list [0,100]. Probability levels for prediction intervals (Defaults median).\n", - "`quantiles`: float list [0., 1.]. Alternative to level, quantiles to estimate from y distribution.\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)
\n", - "[James E. Matheson and Robert L. Winkler, \"Scoring Rules for Continuous Probability Distributions\".](https://www.jstor.org/stable/2629907)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MQLoss, name='MQLoss.__init__', title_level=3)" ] @@ -2008,51 +1090,7 @@ "execution_count": null, "id": "bac2237a", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L568){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MQLoss.__call__\n", - "\n", - "> MQLoss.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mqloss`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L568){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MQLoss.__call__\n", - "\n", - "> MQLoss.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mqloss`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MQLoss.__call__, name='MQLoss.__call__', title_level=3)" ] @@ -2071,17 +1109,7 @@ "execution_count": null, "id": "da37f2ef", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['-lo-98.0', '-lo-80.0', '-median', '-hi-80.0', '-hi-98.0']\n", - "Parameter containing:\n", - "tensor([0.0100, 0.1000, 0.5000, 0.9000, 0.9900])\n" - ] - } - ], + "outputs": [], "source": [ "# | hide\n", "# Unit tests to check MQLoss' stored quantiles\n", @@ -2626,99 +1654,7 @@ "execution_count": null, "id": "a462101b", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L913){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DistributionLoss.__init__\n", - "\n", - "> DistributionLoss.__init__ (distribution, level=[80, 90], quantiles=None,\n", - "> num_samples=1000, return_params=False,\n", - "> **distribution_kwargs)\n", - "\n", - "DistributionLoss\n", - "\n", - "This PyTorch module wraps the `torch.distribution` classes allowing it to \n", - "interact with NeuralForecast models modularly. It shares the negative \n", - "log-likelihood as the optimization objective and a sample method to \n", - "generate empirically the quantiles defined by the `level` list.\n", - "\n", - "Additionally, it implements a distribution transformation that factorizes the\n", - "scale-dependent likelihood parameters into a base scale and a multiplier \n", - "efficiently learnable within the network's non-linearities operating ranges.\n", - "\n", - "Available distributions:
\n", - "- Poisson
\n", - "- Normal
\n", - "- StudentT
\n", - "- NegativeBinomial
\n", - "- Tweedie
\n", - "- Bernoulli (Temporal Classifiers)\n", - "\n", - "**Parameters:**
\n", - "`distribution`: str, identifier of a torch.distributions.Distribution class.
\n", - "`level`: float list [0,100], confidence levels for prediction intervals.
\n", - "`quantiles`: float list [0,1], alternative to level list, target quantiles.
\n", - "`num_samples`: int=500, number of samples for the empirical quantiles.
\n", - "`return_params`: bool=False, wether or not return the Distribution parameters.

\n", - "\n", - "**References:**
\n", - "- [PyTorch Probability Distributions Package: StudentT.](https://pytorch.org/docs/stable/distributions.html#studentt)
\n", - "- [David Salinas, Valentin Flunkert, Jan Gasthaus, Tim Januschowski (2020).\n", - " \"DeepAR: Probabilistic forecasting with autoregressive recurrent networks\". International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207019301888)
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L913){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DistributionLoss.__init__\n", - "\n", - "> DistributionLoss.__init__ (distribution, level=[80, 90], quantiles=None,\n", - "> num_samples=1000, return_params=False,\n", - "> **distribution_kwargs)\n", - "\n", - "DistributionLoss\n", - "\n", - "This PyTorch module wraps the `torch.distribution` classes allowing it to \n", - "interact with NeuralForecast models modularly. It shares the negative \n", - "log-likelihood as the optimization objective and a sample method to \n", - "generate empirically the quantiles defined by the `level` list.\n", - "\n", - "Additionally, it implements a distribution transformation that factorizes the\n", - "scale-dependent likelihood parameters into a base scale and a multiplier \n", - "efficiently learnable within the network's non-linearities operating ranges.\n", - "\n", - "Available distributions:
\n", - "- Poisson
\n", - "- Normal
\n", - "- StudentT
\n", - "- NegativeBinomial
\n", - "- Tweedie
\n", - "- Bernoulli (Temporal Classifiers)\n", - "\n", - "**Parameters:**
\n", - "`distribution`: str, identifier of a torch.distributions.Distribution class.
\n", - "`level`: float list [0,100], confidence levels for prediction intervals.
\n", - "`quantiles`: float list [0,1], alternative to level list, target quantiles.
\n", - "`num_samples`: int=500, number of samples for the empirical quantiles.
\n", - "`return_params`: bool=False, wether or not return the Distribution parameters.

\n", - "\n", - "**References:**
\n", - "- [PyTorch Probability Distributions Package: StudentT.](https://pytorch.org/docs/stable/distributions.html#studentt)
\n", - "- [David Salinas, Valentin Flunkert, Jan Gasthaus, Tim Januschowski (2020).\n", - " \"DeepAR: Probabilistic forecasting with autoregressive recurrent networks\". International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207019301888)
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(DistributionLoss, name='DistributionLoss.__init__', title_level=3)" ] @@ -2728,65 +1664,7 @@ "execution_count": null, "id": "d8c367f8", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1040){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DistributionLoss.sample\n", - "\n", - "> DistributionLoss.sample (distr_args:torch.Tensor,\n", - "> num_samples:Optional[int]=None)\n", - "\n", - "Construct the empirical quantiles from the estimated Distribution,\n", - "sampling from it `num_samples` independently.\n", - "\n", - "**Parameters**
\n", - "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", - "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", - " of the resulting distribution.
\n", - "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", - " of the resulting distribution.
\n", - "`num_samples`: int=500, overwrite number of samples for the empirical quantiles.
\n", - "\n", - "**Returns**
\n", - "`samples`: tensor, shape [B,H,`num_samples`].
\n", - "`quantiles`: tensor, empirical quantiles defined by `levels`.
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1040){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DistributionLoss.sample\n", - "\n", - "> DistributionLoss.sample (distr_args:torch.Tensor,\n", - "> num_samples:Optional[int]=None)\n", - "\n", - "Construct the empirical quantiles from the estimated Distribution,\n", - "sampling from it `num_samples` independently.\n", - "\n", - "**Parameters**
\n", - "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", - "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", - " of the resulting distribution.
\n", - "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", - " of the resulting distribution.
\n", - "`num_samples`: int=500, overwrite number of samples for the empirical quantiles.
\n", - "\n", - "**Returns**
\n", - "`samples`: tensor, shape [B,H,`num_samples`].
\n", - "`quantiles`: tensor, empirical quantiles defined by `levels`.
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(DistributionLoss.sample, name='DistributionLoss.sample', title_level=3)" ] @@ -2796,75 +1674,7 @@ "execution_count": null, "id": "04e32679", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1083){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DistributionLoss.__call__\n", - "\n", - "> DistributionLoss.__call__ (y:torch.Tensor, distr_args:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "Computes the negative log-likelihood objective function. \n", - "To estimate the following predictive distribution:\n", - "\n", - "$$\\mathrm{P}(\\mathbf{y}_{\\tau}\\,|\\,\\theta) \\quad \\mathrm{and} \\quad -\\log(\\mathrm{P}(\\mathbf{y}_{\\tau}\\,|\\,\\theta))$$\n", - "\n", - "where $\\theta$ represents the distributions parameters. It aditionally \n", - "summarizes the objective signal using a weighted average using the `mask` tensor. \n", - "\n", - "**Parameters**
\n", - "`y`: tensor, Actual values.
\n", - "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", - "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", - " of the resulting distribution.
\n", - "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", - " of the resulting distribution.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns**
\n", - "`loss`: scalar, weighted loss function against which backpropagation will be performed.
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1083){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DistributionLoss.__call__\n", - "\n", - "> DistributionLoss.__call__ (y:torch.Tensor, distr_args:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "Computes the negative log-likelihood objective function. \n", - "To estimate the following predictive distribution:\n", - "\n", - "$$\\mathrm{P}(\\mathbf{y}_{\\tau}\\,|\\,\\theta) \\quad \\mathrm{and} \\quad -\\log(\\mathrm{P}(\\mathbf{y}_{\\tau}\\,|\\,\\theta))$$\n", - "\n", - "where $\\theta$ represents the distributions parameters. It aditionally \n", - "summarizes the objective signal using a weighted average using the `mask` tensor. \n", - "\n", - "**Parameters**
\n", - "`y`: tensor, Actual values.
\n", - "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", - "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", - " of the resulting distribution.
\n", - "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", - " of the resulting distribution.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns**
\n", - "`loss`: scalar, weighted loss function against which backpropagation will be performed.
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(DistributionLoss.__call__, name='DistributionLoss.__call__', title_level=3)" ] @@ -2874,17 +1684,7 @@ "execution_count": null, "id": "14a7e381", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['', '-lo-98.0', '-lo-80.0', '-median', '-hi-80.0', '-hi-98.0']\n", - "Parameter containing:\n", - "tensor([0.0100, 0.1000, 0.5000, 0.9000, 0.9900])\n" - ] - } - ], + "outputs": [], "source": [ "# | hide\n", "# Unit tests to check DistributionLoss' stored quantiles\n", @@ -2964,42 +1764,35 @@ " # If True, predict_step will return Distribution's parameters\n", " self.return_params = return_params\n", " if self.return_params:\n", - " lambda_names = [f\"-lambda-{i}\" for i in range(1, n_components + 1)]\n", - " weight_names = [f\"-weight-{i}\" for i in range(1, n_components + 1)]\n", - " self.param_names = [i for j in zip(lambda_names, weight_names) for i in j]\n", + " self.param_names = [f\"-lambda-{i}\" for i in range(1, n_components + 1)]\n", " self.output_names = self.output_names + self.param_names\n", "\n", " # Add first output entry for the sample_mean\n", " self.output_names.insert(0, \"\")\n", "\n", - " self.outputsize_multiplier = 2 * n_components\n", + " self.outputsize_multiplier = n_components\n", " self.is_distribution_output = True\n", "\n", " def domain_map(self, output: torch.Tensor):\n", - " lambdas, weights = output.chunk(2, dim=-1)\n", - " return (lambdas, weights)\n", - "\n", - " def scale_decouple(\n", - " self,\n", - " output,\n", - " loc: Optional[torch.Tensor] = None,\n", - " scale: Optional[torch.Tensor] = None,\n", - " ):\n", - " \"\"\"Scale Decouple\n", + " return (output,)#, weights\n", + " \n", + " def scale_decouple(self, \n", + " output,\n", + " loc: Optional[torch.Tensor] = None,\n", + " scale: Optional[torch.Tensor] = None):\n", + " \"\"\" Scale Decouple\n", "\n", " Stabilizes model's output optimization, by learning residual\n", " variance and residual location based on anchoring `loc`, `scale`.\n", " Also adds domain protection to the distribution parameters.\n", " \"\"\"\n", - " lambdas, weights = output\n", - " weights = F.softmax(weights, dim=-1)\n", - "\n", + " lambdas = output[0]\n", " if (loc is not None) and (scale is not None):\n", " loc = loc.view(lambdas.size(dim=0), 1, -1)\n", " scale = scale.view(lambdas.size(dim=0), 1, -1)\n", " lambdas = (lambdas * scale) + loc\n", " lambdas = F.softplus(lambdas)\n", - " return (lambdas, weights)\n", + " return (lambdas,)\n", "\n", " def sample(self, distr_args, num_samples=None):\n", " \"\"\"\n", @@ -3021,10 +1814,15 @@ " if num_samples is None:\n", " num_samples = self.num_samples\n", "\n", - " lambdas, weights = distr_args\n", + " lambdas = distr_args[0]\n", " B, H, K = lambdas.size()\n", " Q = len(self.quantiles)\n", "\n", + " # Sample K ~ Mult(weights)\n", + " # shared across B, H\n", + " # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)\n", + " weights = (1/K) * torch.ones_like(lambdas, device=lambdas.device)\n", + "\n", " # Avoid loop, vectorize\n", " weights = weights.reshape(-1, K)\n", " lambdas = lambdas.flatten() \n", @@ -3062,7 +1860,7 @@ " \n", " def neglog_likelihood(self,\n", " y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None,):\n", " if mask is None: \n", " mask = (y > 0) * 1\n", @@ -3070,9 +1868,11 @@ " mask = mask * ((y > 0) * 1)\n", "\n", " eps = 1e-10\n", - " lambdas, weights = distr_args\n", + " lambdas = distr_args[0]\n", " B, H, K = lambdas.size()\n", "\n", + " weights = (1/K) * torch.ones_like(lambdas, device=lambdas.device)\n", + "\n", " y = y[:,:,None]\n", " mask = mask[:,:,None]\n", "\n", @@ -3097,7 +1897,7 @@ " return loss\n", "\n", " def __call__(self, y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None):\n", "\n", " return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)\n" @@ -3108,83 +1908,7 @@ "execution_count": null, "id": "62d7daba", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1117){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### PMM.__init__\n", - "\n", - "> PMM.__init__ (n_components=10, level=[80, 90], quantiles=None,\n", - "> num_samples=1000, return_params=False,\n", - "> batch_correlation=False, horizon_correlation=False)\n", - "\n", - "Poisson Mixture Mesh\n", - "\n", - "This Poisson Mixture statistical model assumes independence across groups of \n", - "data $\\mathcal{G}=\\{[g_{i}]\\}$, and estimates relationships within the group.\n", - "\n", - "$$ \\mathrm{P}\\left(\\mathbf{y}_{[b][t+1:t+H]}\\right) = \n", - "\\prod_{ [g_{i}] \\in \\mathcal{G}} \\mathrm{P} \\left(\\mathbf{y}_{[g_{i}][\\tau]} \\right) =\n", - "\\prod_{\\beta\\in[g_{i}]} \n", - "\\left(\\sum_{k=1}^{K} w_k \\prod_{(\\beta,\\tau) \\in [g_i][t+1:t+H]} \\mathrm{Poisson}(y_{\\beta,\\tau}, \\hat{\\lambda}_{\\beta,\\tau,k}) \\right)$$\n", - "\n", - "**Parameters:**
\n", - "`n_components`: int=10, the number of mixture components.
\n", - "`level`: float list [0,100], confidence levels for prediction intervals.
\n", - "`quantiles`: float list [0,1], alternative to level list, target quantiles.
\n", - "`return_params`: bool=False, wether or not return the Distribution parameters.
\n", - "`batch_correlation`: bool=False, wether or not model batch correlations.
\n", - "`horizon_correlation`: bool=False, wether or not model horizon correlations.
\n", - "\n", - "**References:**
\n", - "[Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", - "Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. Submitted to the International \n", - "Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1117){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### PMM.__init__\n", - "\n", - "> PMM.__init__ (n_components=10, level=[80, 90], quantiles=None,\n", - "> num_samples=1000, return_params=False,\n", - "> batch_correlation=False, horizon_correlation=False)\n", - "\n", - "Poisson Mixture Mesh\n", - "\n", - "This Poisson Mixture statistical model assumes independence across groups of \n", - "data $\\mathcal{G}=\\{[g_{i}]\\}$, and estimates relationships within the group.\n", - "\n", - "$$ \\mathrm{P}\\left(\\mathbf{y}_{[b][t+1:t+H]}\\right) = \n", - "\\prod_{ [g_{i}] \\in \\mathcal{G}} \\mathrm{P} \\left(\\mathbf{y}_{[g_{i}][\\tau]} \\right) =\n", - "\\prod_{\\beta\\in[g_{i}]} \n", - "\\left(\\sum_{k=1}^{K} w_k \\prod_{(\\beta,\\tau) \\in [g_i][t+1:t+H]} \\mathrm{Poisson}(y_{\\beta,\\tau}, \\hat{\\lambda}_{\\beta,\\tau,k}) \\right)$$\n", - "\n", - "**Parameters:**
\n", - "`n_components`: int=10, the number of mixture components.
\n", - "`level`: float list [0,100], confidence levels for prediction intervals.
\n", - "`quantiles`: float list [0,1], alternative to level list, target quantiles.
\n", - "`return_params`: bool=False, wether or not return the Distribution parameters.
\n", - "`batch_correlation`: bool=False, wether or not model batch correlations.
\n", - "`horizon_correlation`: bool=False, wether or not model horizon correlations.
\n", - "\n", - "**References:**
\n", - "[Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", - "Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. Submitted to the International \n", - "Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(PMM, name='PMM.__init__', title_level=3)" ] @@ -3194,63 +1918,7 @@ "execution_count": null, "id": "fa8da65c", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1206){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### PMM.sample\n", - "\n", - "> PMM.sample (distr_args, num_samples=None)\n", - "\n", - "Construct the empirical quantiles from the estimated Distribution,\n", - "sampling from it `num_samples` independently.\n", - "\n", - "**Parameters**
\n", - "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", - "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", - " of the resulting distribution.
\n", - "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", - " of the resulting distribution.
\n", - "`num_samples`: int=500, overwrites number of samples for the empirical quantiles.
\n", - "\n", - "**Returns**
\n", - "`samples`: tensor, shape [B,H,`num_samples`].
\n", - "`quantiles`: tensor, empirical quantiles defined by `levels`.
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1206){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### PMM.sample\n", - "\n", - "> PMM.sample (distr_args, num_samples=None)\n", - "\n", - "Construct the empirical quantiles from the estimated Distribution,\n", - "sampling from it `num_samples` independently.\n", - "\n", - "**Parameters**
\n", - "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", - "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", - " of the resulting distribution.
\n", - "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", - " of the resulting distribution.
\n", - "`num_samples`: int=500, overwrites number of samples for the empirical quantiles.
\n", - "\n", - "**Returns**
\n", - "`samples`: tensor, shape [B,H,`num_samples`].
\n", - "`quantiles`: tensor, empirical quantiles defined by `levels`.
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(PMM.sample, name='PMM.sample', title_level=3)" ] @@ -3260,39 +1928,7 @@ "execution_count": null, "id": "ba75717c", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1305){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### PMM.__call__\n", - "\n", - "> PMM.__call__ (y:torch.Tensor, distr_args:Tuple[torch.Tensor],\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "Call self as a function." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1305){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### PMM.__call__\n", - "\n", - "> PMM.__call__ (y:torch.Tensor, distr_args:Tuple[torch.Tensor],\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "Call self as a function." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(PMM.__call__, name='PMM.__call__', title_level=3)" ] @@ -3311,17 +1947,7 @@ "execution_count": null, "id": "e4a20e21", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['', '-lo-98.0', '-lo-80.0', '-median', '-hi-80.0', '-hi-98.0']\n", - "Parameter containing:\n", - "tensor([0.0100, 0.1000, 0.5000, 0.9000, 0.9900])\n" - ] - } - ], + "outputs": [], "source": [ "# | hide\n", "# Unit tests to check PMM's stored quantiles\n", @@ -3345,43 +1971,11 @@ "execution_count": null, "id": "a56a2fbe", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "weights.shape (N,H,K) \t torch.Size([2, 2, 3])\n", - "lambdas.shape (N,H,K) \t torch.Size([2, 2, 3])\n", - "samples.shape (N,H,num_samples) torch.Size([2, 2, 1000])\n", - "sample_mean.shape (N,H) torch.Size([2, 2, 1])\n", - "quants.shape (N,H,Q) \t\t torch.Size([2, 2, 5])\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "#| hide\n", - "# Create single mixture and broadcast to N, H, K\n", - "weights = torch.ones((2,3))[None, :, :]\n", + "# Create single mixture and broadcast to N,H,K\n", + "weights = torch.ones((1,3))[None, :, :]\n", "lambdas = torch.Tensor([[5,10,15], [10,20,30]])[None, :, :]\n", "\n", "# Create repetitions for the batch dimension N.\n", @@ -3393,7 +1987,7 @@ "print('lambdas.shape (N,H,K) \\t', lambdas.shape)\n", "\n", "distr = PMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9])\n", - "distr_args = (lambdas, weights)\n", + "distr_args = (lambdas,)\n", "samples, sample_mean, quants = distr.sample(distr_args)\n", "\n", "print('samples.shape (N,H,num_samples) ', samples.shape)\n", @@ -3498,44 +2092,38 @@ " if self.return_params:\n", " mu_names = [f\"-mu-{i}\" for i in range(1, n_components + 1)]\n", " std_names = [f\"-std-{i}\" for i in range(1, n_components + 1)]\n", - " weight_names = [f\"-weight-{i}\" for i in range(1, n_components + 1)]\n", - " self.param_names = [i for j in zip(mu_names, std_names, weight_names) for i in j]\n", - " self.output_names = self.output_names + self.param_names\n", + " mu_std_names = [i for j in zip(mu_names, std_names) for i in j]\n", + " self.output_names = self.output_names + mu_std_names\n", "\n", " # Add first output entry for the sample_mean\n", " self.output_names.insert(0, \"\")\n", "\n", - " self.outputsize_multiplier = 3 * n_components\n", + " self.outputsize_multiplier = 2 * n_components\n", " self.is_distribution_output = True\n", "\n", " def domain_map(self, output: torch.Tensor):\n", - " means, stds, weights = output.chunk(3, dim=-1)\n", - "\n", - " return (means, stds, weights)\n", + " means, stds = torch.tensor_split(output, 2, dim=-1)\n", + " return (means, stds)\n", "\n", - " def scale_decouple(\n", - " self,\n", - " output,\n", - " loc: Optional[torch.Tensor] = None,\n", - " scale: Optional[torch.Tensor] = None,\n", - " eps: float = 0.2,\n", - " ):\n", - " \"\"\"Scale Decouple\n", + " def scale_decouple(self, \n", + " output,\n", + " loc: Optional[torch.Tensor] = None,\n", + " scale: Optional[torch.Tensor] = None,\n", + " eps: float=0.2):\n", + " \"\"\" Scale Decouple\n", "\n", " Stabilizes model's output optimization, by learning residual\n", " variance and residual location based on anchoring `loc`, `scale`.\n", " Also adds domain protection to the distribution parameters.\n", " \"\"\"\n", - " means, stds, weights = output\n", + " means, stds = output\n", " stds = F.softplus(stds)\n", - " weights = F.softmax(weights, dim=-1)\n", " if (loc is not None) and (scale is not None):\n", " loc = loc.view(means.size(dim=0), 1, -1)\n", - " scale = scale.view(means.size(dim=0), 1, -1)\n", + " scale = scale.view(means.size(dim=0), 1, -1) \n", " means = (means * scale) + loc\n", " stds = (stds + eps) * scale\n", - "\n", - " return (means, stds, weights)\n", + " return (means, stds)\n", "\n", " def sample(self, distr_args, num_samples=None):\n", " \"\"\"\n", @@ -3557,11 +2145,17 @@ " if num_samples is None:\n", " num_samples = self.num_samples\n", " \n", - " means, stds, weights = distr_args\n", + " means, stds = distr_args\n", " B, H, K = means.size()\n", " Q = len(self.quantiles)\n", " assert means.shape == stds.shape\n", "\n", + " # Sample K ~ Mult(weights)\n", + " # shared across B, H\n", + " # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)\n", + " \n", + " weights = (1/K) * torch.ones_like(means, device=means.device)\n", + " \n", " # Avoid loop, vectorize\n", " weights = weights.reshape(-1, K)\n", " means = means.flatten()\n", @@ -3601,15 +2195,17 @@ "\n", " def neglog_likelihood(self,\n", " y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None):\n", "\n", " if mask is None: \n", " mask = torch.ones_like(y)\n", " \n", - " means, stds, weights = distr_args\n", + " means, stds = distr_args\n", " B, H, K = means.size()\n", - " \n", + " \n", + " weights = (1/K) * torch.ones_like(means, device=means.device)\n", + " \n", " y = y[:,:, None]\n", " mask = mask[:,:,None]\n", " \n", @@ -3632,7 +2228,7 @@ " return loss\n", " \n", " def __call__(self, y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None,):\n", "\n", " return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)" @@ -3682,17 +2278,7 @@ "execution_count": null, "id": "8ebe4250", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['', '-lo-98.0', '-lo-80.0', '-median', '-hi-80.0', '-hi-98.0']\n", - "Parameter containing:\n", - "tensor([0.0100, 0.1000, 0.5000, 0.9000, 0.9900])\n" - ] - } - ], + "outputs": [], "source": [ "# | hide\n", "# Unit tests to check PMM's stored quantiles\n", @@ -3716,40 +2302,7 @@ "execution_count": null, "id": "684d2382", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "weights.shape (N,H,K) \t torch.Size([2, 2, 3])\n", - "means.shape (N,H,K) \t torch.Size([2, 2, 3])\n", - "stds.shape (N,H,K) \t torch.Size([2, 2, 3])\n", - "samples.shape (N,H,num_samples) torch.Size([2, 2, 1000])\n", - "sample_mean.shape (N,H) torch.Size([2, 2, 1])\n", - "quants.shape (N,H,Q) \t\t torch.Size([2, 2, 5])\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "#| hide\n", "# Create single mixture and broadcast to N,H,K\n", @@ -3766,7 +2319,7 @@ "print('stds.shape (N,H,K) \\t', stds.shape)\n", "\n", "distr = GMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9])\n", - "distr_args = (means, stds, weights)\n", + "distr_args = (means, stds)\n", "samples, sample_mean, quants = distr.sample(distr_args)\n", "\n", "print('samples.shape (N,H,num_samples) ', samples.shape)\n", @@ -3866,55 +2419,47 @@ " # If True, predict_step will return Distribution's parameters\n", " self.return_params = return_params\n", " if self.return_params:\n", - " total_count_names = [\n", - " f\"-total_count-{i}\" for i in range(1, n_components + 1)\n", - " ]\n", + " total_count_names = [f\"-total_count-{i}\" for i in range(1, n_components + 1)]\n", " probs_names = [f\"-probs-{i}\" for i in range(1, n_components + 1)]\n", - " weight_names = [f\"-weight-{i}\" for i in range(1, n_components + 1)]\n", - " self.param_names = [i for j in zip(total_count_names, probs_names, weight_names) for i in j]\n", - " self.output_names = self.output_names + self.param_names\n", + " param_names = [i for j in zip(total_count_names, probs_names) for i in j]\n", + " self.output_names = self.output_names + param_names\n", "\n", " # Add first output entry for the sample_mean\n", - " self.output_names.insert(0, \"\")\n", + " self.output_names.insert(0, \"\") \n", "\n", - " self.outputsize_multiplier = 3 * n_components\n", + " self.outputsize_multiplier = 2 * n_components\n", " self.is_distribution_output = True\n", "\n", " def domain_map(self, output: torch.Tensor):\n", - " mu, alpha, weights = output.chunk(3, dim=-1)\n", + " mu, alpha = torch.tensor_split(output, 2, dim=-1)\n", + " return (mu, alpha)\n", "\n", - " return mu, alpha, weights\n", - "\n", - " def scale_decouple(\n", - " self,\n", - " output,\n", - " loc: Optional[torch.Tensor] = None,\n", - " scale: Optional[torch.Tensor] = None,\n", - " eps: float = 1e-6,\n", - " ):\n", - " \"\"\"Scale Decouple\n", + " def scale_decouple(self, \n", + " output,\n", + " loc: Optional[torch.Tensor] = None,\n", + " scale: Optional[torch.Tensor] = None,\n", + " eps: float=0.2):\n", + " \"\"\" Scale Decouple\n", "\n", " Stabilizes model's output optimization, by learning residual\n", " variance and residual location based on anchoring `loc`, `scale`.\n", " Also adds domain protection to the distribution parameters.\n", " \"\"\"\n", " # Efficient NBinomial parametrization\n", - " mu, alpha, weights = output\n", - " mu = F.softplus(mu) + eps\n", - " alpha = F.softplus(alpha) + eps # alpha = 1/total_counts\n", - " weights = F.softmax(weights, dim=-1)\n", + " mu, alpha = output\n", + " mu = F.softplus(mu) + 1e-8\n", + " alpha = F.softplus(alpha) + 1e-8 # alpha = 1/total_counts\n", " if (loc is not None) and (scale is not None):\n", " loc = loc.view(mu.size(dim=0), 1, -1)\n", " mu *= loc\n", - " alpha /= loc + 1.0\n", + " alpha /= (loc + 1.)\n", "\n", " # mu = total_count * (probs/(1-probs))\n", " # => probs = mu / (total_count + mu)\n", " # => probs = mu / [total_count * (1 + mu * (1/total_count))]\n", " total_count = 1.0 / alpha\n", - " probs = (mu * alpha / (1.0 + mu * alpha))\n", - " probs = torch.clamp(probs, eps, 1 - eps)\n", - " return (total_count, probs, weights)\n", + " probs = (mu * alpha / (1.0 + mu * alpha)) + 1e-8 \n", + " return (total_count, probs)\n", "\n", " def sample(self, distr_args, num_samples=None):\n", " \"\"\"\n", @@ -3936,10 +2481,16 @@ " if num_samples is None:\n", " num_samples = self.num_samples\n", " \n", - " total_count, probs, weights = distr_args\n", + " total_count, probs = distr_args\n", " B, H, K = total_count.size()\n", " Q = len(self.quantiles)\n", " assert total_count.shape == probs.shape\n", + "\n", + " # Sample K ~ Mult(weights)\n", + " # shared across B, H\n", + " # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)\n", + " \n", + " weights = (1/K) * torch.ones_like(probs, device=probs.device)\n", " \n", " # Avoid loop, vectorize\n", " weights = weights.reshape(-1, K)\n", @@ -3982,15 +2533,17 @@ "\n", " def neglog_likelihood(self,\n", " y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None):\n", "\n", " if mask is None: \n", " mask = torch.ones_like(y)\n", " \n", - " total_count, probs, weights = distr_args\n", + " total_count, probs = distr_args\n", " B, H, K = total_count.size()\n", " \n", + " weights = (1/K) * torch.ones_like(probs, device=probs.device)\n", + " \n", " y = y[:,:, None]\n", " mask = mask[:,:,None]\n", "\n", @@ -4014,7 +2567,7 @@ " return loss\n", " \n", " def __call__(self, y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None,):\n", "\n", " return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)" @@ -4055,40 +2608,7 @@ "execution_count": null, "id": "b67e2931", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "weights.shape (N,H,K) \t torch.Size([2, 2, 3])\n", - "counts.shape (N,H,K) \t torch.Size([2, 2, 3])\n", - "probs.shape (N,H,K) \t torch.Size([2, 2, 3])\n", - "samples.shape (N,H,num_samples) torch.Size([2, 2, 2000])\n", - "sample_mean.shape (N,H) torch.Size([2, 2, 1])\n", - "quants.shape (N,H,Q) \t\t torch.Size([2, 2, 5])\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "#| hide\n", "# Create single mixture and broadcast to N,H,K\n", @@ -4105,7 +2625,7 @@ "print('probs.shape (N,H,K) \\t', probs.shape)\n", "\n", "model = NBMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9])\n", - "distr_args = (counts, probs, weights)\n", + "distr_args = (counts, probs)\n", "samples, sample_mean, quants = model.sample(distr_args, num_samples=2000)\n", "\n", "print('samples.shape (N,H,num_samples) ', samples.shape)\n", diff --git a/nbs/models.deepnpts.ipynb b/nbs/models.deepnpts.ipynb index 6bafac332..c1852c18a 100644 --- a/nbs/models.deepnpts.ipynb +++ b/nbs/models.deepnpts.ipynb @@ -22,7 +22,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a non-parametric baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a tunable strategy. This strategy is learned by exploiting the information across multiple related time series. This model provides a strong, simple baseline for time series forecasting.\n", + "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a non-parametric baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a tunable strategy. This strategy is learned by exploiting the information across multiple related time series. This model provides a strong, simple baseline for time series forecasting. \n", "\n", "\n", "**References**
\n", @@ -30,13 +30,10 @@ "\n", "\n", ":::{.callout-warning collapse=\"false\"}\n", - "#### Exogenous Variables, Losses, and Parameters Availability\n", + "#### Losses\n", "\n", - "Given the sampling procedure during inference, DeepNPTS only supports `DistributionLoss` as training loss.\n", + "This implementation differs from the original work in that a weighted sum of the empirical distribution is returned as forecast, rather than a sampled distributional output. Consequently, DeepNPTS only supports point losses as training loss.\n", "\n", - "Note that DeepNPTS generates a non-parametric forecast distribution using Monte Carlo. We use this sampling procedure also during validation to make it closer to the inference procedure. Therefore, only the `MQLoss` is available for validation.\n", - "\n", - "Aditionally, Monte Carlo implies that historic exogenous variables are not available for the model.\n", ":::" ] }, @@ -47,17 +44,15 @@ "outputs": [], "source": [ "#| export\n", - "import numpy as np\n", - "\n", "import torch\n", "import torch.nn as nn\n", + "import torch.nn.functional as F\n", "import neuralforecast.losses.pytorch as losses\n", "from typing import Optional\n", - "from functools import partial\n", "\n", "\n", "from neuralforecast.common._base_windows import BaseWindows\n", - "from neuralforecast.losses.pytorch import MQLoss, GMM, PMM, NBMM\n" + "from neuralforecast.losses.pytorch import MAE\n" ] }, { @@ -102,7 +97,7 @@ "class DeepNPTS(BaseWindows):\n", " \"\"\" DeepNPTS\n", "\n", - " Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. \n", + " Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by (weighted) sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series.\n", "\n", " **Parameters:**
\n", " `h`: int, Forecast horizon.
\n", @@ -111,7 +106,6 @@ " `batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", " `dropout`: float=0.1, dropout.
\n", " `n_layers`: int=2, number of dense layers.
\n", - " `trajectory_samples`: int=100, number of Monte Carlo trajectories during inference.
\n", " `stat_exog_list`: str list, static exogenous columns.
\n", " `hist_exog_list`: str list, historic exogenous columns.
\n", " `futr_exog_list`: str list, future exogenous columns.
\n", @@ -152,15 +146,14 @@ " batch_norm: bool = True,\n", " dropout: float = 0.1,\n", " n_layers: int = 2,\n", - " trajectory_samples: int = 100,\n", " futr_exog_list = None,\n", " hist_exog_list = None,\n", " stat_exog_list = None,\n", " exclude_insample_y = False,\n", - " loss = GMM(),\n", - " valid_loss = MQLoss(level=[80, 90]),\n", + " loss = MAE(),\n", + " valid_loss = MAE(),\n", " max_steps: int = 1000,\n", - " learning_rate: float = 1e-5,\n", + " learning_rate: float = 1e-3,\n", " num_lr_decays: int = 3,\n", " early_stop_patience_steps: int =-1,\n", " val_check_steps: int = 100,\n", @@ -178,25 +171,12 @@ " optimizer_kwargs = None,\n", " **trainer_kwargs):\n", "\n", - " if hist_exog_list is not None:\n", - " raise Exception('DeepNPTS does not support historical exogenous variables.')\n", - "\n", " if exclude_insample_y:\n", " raise Exception('DeepNPTS has no possibility for excluding y.')\n", - " \n", - " supported_losses = (losses.GMM,\n", - " losses.PMM,\n", - " losses.NBMM)\n", "\n", - " if not isinstance(loss, supported_losses):\n", - " raise Exception('DeepNPTS only supports GMM, PMM or NBMM as loss function.') \n", - " \n", - " if not isinstance(valid_loss, losses.MQLoss):\n", - " raise Exception('DeepNPTS only supports MQLoss as validation loss.')\n", + " if not isinstance(loss, losses.BasePointLoss):\n", + " raise Exception('DeepNPTS only supports point loss functions (MAE, MSE, etc) as loss function.') \n", " \n", - " # Overwrite n_components, it has to be the input_size in DeepNPTS\n", - " loss.n_components = input_size\n", - " \n", " # Inherit BaseWindows class\n", " super(DeepNPTS, self).__init__(h=h,\n", " input_size=input_size,\n", @@ -226,16 +206,15 @@ " **trainer_kwargs)\n", "\n", " self.h = h\n", - " self.h_backup = self.h # Used because h=1 during training\n", - " self.use_softmax = True\n", " self.hidden_size = hidden_size\n", " self.dropout = dropout\n", - " self.trajectory_samples = trajectory_samples\n", "\n", " self.futr_exog_size = len(self.futr_exog_list)\n", " self.stat_exog_size = len(self.stat_exog_list)\n", + " self.hist_exog_size = len(self.hist_exog_list)\n", "\n", - " input_dim = input_size * (1 + self.futr_exog_size) + self.stat_exog_size\n", + " input_dim = input_size * (1 + self.futr_exog_size + self.hist_exog_size) + self.stat_exog_size + self.h * self.futr_exog_size\n", + " \n", " # Create DeepNPTSNetwork\n", " modules = [] \n", " for i in range(n_layers):\n", @@ -246,503 +225,57 @@ " if dropout > 0.0:\n", " modules.append(nn.Dropout(dropout))\n", "\n", + " modules.append(nn.Linear(hidden_size, input_size * self.h))\n", " self.deepnptsnetwork = nn.Sequential(*modules)\n", - " self.deepnptsnetwork.apply(partial(self._init_weights, scale=0.07))\n", - "\n", - " # Add output layers for Mixture distribution \n", - " output_modules = []\n", - " if dropout > 0.0:\n", - " output_modules.append(nn.Dropout(self.dropout))\n", - " \n", - " if isinstance(loss, GMM):\n", - " output_modules.append(nn.Linear(hidden_size, input_size + 1))\n", - " elif isinstance(loss, PMM):\n", - " output_modules.append(nn.Linear(hidden_size, input_size))\n", - " elif isinstance(loss, NBMM):\n", - " output_modules.append(nn.Linear(hidden_size, input_size))\n", - "\n", - " self.output_layer = nn.Sequential(*output_modules)\n", - " self.output_layer.apply(self._init_weights)\n", - "\n", - "\n", - " @staticmethod\n", - " def _init_weights(module, scale=1.0):\n", - " if type(module) == nn.Linear:\n", - " nn.init.uniform_(module.weight, -scale, scale)\n", - " nn.init.zeros_(module.bias)\n", - "\n", - " def _domain_map(self, o_t, insample_y):\n", - " if isinstance(self.loss, GMM):\n", - " weights = o_t[:, :-1] # [B, L + 1] -> [B, L]\n", - " kernel_width = o_t[:, -1:] # [B, L + 1] -> [B, 1]\n", - " kernel_width = torch.repeat_interleave(input=kernel_width,\n", - " repeats=weights.shape[1],\n", - " dim=-1) # [B, 1] -> [B, L]\n", - " output = torch.cat([insample_y, kernel_width, weights], dim=-1) # [B, L] + [B, L] + [B, L] = [B, 3 * L]\n", - " output = output.unsqueeze(1) # [B, 3 * L] = [B, 1, 3 * L]\n", - " elif isinstance(self.loss, PMM):\n", - " weights = o_t # [B, L] -> [B, L]\n", - " output = torch.cat([insample_y, weights], dim=-1) # [B, L] + [B, L] = [B, 2 * L]\n", - " output = output.unsqueeze(1) # [B, 2 * L] = [B, 1, 2 * L] \n", - " elif isinstance(self.loss, NBMM):\n", - " weights = torch.ones_like(o_t) # [B, L] -> [B, L]\n", - " output = torch.cat([insample_y, o_t, weights], dim=-1) # [B, L] + [B, L] + [B, L] = [B, 3 * L]\n", - " output = output.unsqueeze(1) # [B, 3 * L] = [B, 1, 3 * \n", - "\n", - " else:\n", - " raise NotImplementedError\n", - " \n", - " return output\n", - "\n", - " # Override BaseWindows method\n", - " def training_step(self, batch, batch_idx):\n", - " \n", - " # Only train one-step ahead\n", - " self.h = 1\n", - " self.quantiles = self.loss.quantiles\n", - "\n", - " # Create and normalize windows [Ws, L+H, C]\n", - " y_idx = batch[\"y_idx\"]\n", - " windows = self._create_windows(batch, step=\"train\")\n", - " original_outsample_y = torch.clone(windows[\"temporal\"][:, -self.h :, y_idx])\n", - " windows = self._normalization(windows=windows, y_idx=y_idx)\n", - "\n", - " # Parse windows\n", - " (\n", - " insample_y,\n", - " insample_mask,\n", - " outsample_y,\n", - " outsample_mask,\n", - " _,\n", - " futr_exog,\n", - " stat_exog,\n", - " ) = self._parse_windows(batch, windows)\n", - "\n", - " windows_batch = dict(\n", - " insample_y=insample_y, # [Ws, L]\n", - " insample_mask=insample_mask, # [Ws, L]\n", - " futr_exog=futr_exog, # [Ws, L+H]\n", - " hist_exog=None, \n", - " stat_exog=stat_exog, # [Ws, 1]\n", - " y_idx=y_idx # [Ws, 1]\n", - " ) \n", - "\n", - " # Model Predictions\n", - " output = self.train_forward(windows_batch)\n", - "\n", - " _, y_loc, y_scale = self._inv_normalization(\n", - " y_hat=outsample_y, \n", - " temporal_cols=batch[\"temporal_cols\"], \n", - " y_idx=y_idx\n", - " )\n", - " # outsample_y = original_insample_y\n", - " outsample_y = original_outsample_y\n", - " distr_args = self.loss.scale_decouple(\n", - " output=output, loc=y_loc, scale=y_scale\n", - " )\n", - " loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n", - "\n", - " if torch.isnan(loss):\n", - " print(\"Model Parameters\", self.hparams)\n", - " print(\"insample_y\", torch.isnan(insample_y).sum())\n", - " print(\"outsample_y\", torch.isnan(outsample_y).sum())\n", - " print(\"output\", torch.isnan(output).sum())\n", - " raise Exception(\"Loss is NaN, training stopped.\")\n", - "\n", - " self.log(\"train_loss\", loss, prog_bar=True, on_epoch=True)\n", - " self.train_trajectories.append((self.global_step, float(loss)))\n", - "\n", - " self.h = self.h_backup \n", - " \n", - " return loss\n", - "\n", - " # Override BaseWindows method\n", - " def validation_step(self, batch, batch_idx):\n", - "\n", - " self.h = self.h_backup\n", - " self.quantiles = self.valid_loss.quantiles\n", - "\n", - " if self.val_size == 0:\n", - " return np.nan\n", - "\n", - " # TODO: Hack to compute number of windows\n", - " windows = self._create_windows(batch, step=\"val\")\n", - " n_windows = len(windows[\"temporal\"])\n", - " y_idx = batch[\"y_idx\"]\n", - "\n", - " # Number of windows in batch\n", - " windows_batch_size = self.inference_windows_batch_size\n", - " if windows_batch_size < 0:\n", - " windows_batch_size = n_windows\n", - " n_batches = int(np.ceil(n_windows / windows_batch_size))\n", - "\n", - " valid_losses = []\n", - " batch_sizes = []\n", - " for i in range(n_batches):\n", - " # Create and normalize windows [Ws, L+H, C]\n", - " w_idxs = np.arange(\n", - " i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows)\n", - " )\n", - " windows = self._create_windows(batch, step=\"val\", w_idxs=w_idxs)\n", - " original_outsample_y = torch.clone(windows[\"temporal\"][:, -self.h:, 0])\n", - " windows = self._normalization(windows=windows, y_idx=y_idx)\n", - "\n", - " # Parse windows\n", - " (\n", - " insample_y,\n", - " insample_mask,\n", - " _,\n", - " outsample_mask,\n", - " _,\n", - " futr_exog,\n", - " stat_exog,\n", - " ) = self._parse_windows(batch, windows)\n", - " \n", - " windows_batch = dict(\n", - " insample_y=insample_y, # [Ws, L]\n", - " insample_mask=insample_mask, # [Ws, L]\n", - " futr_exog=futr_exog, # [Ws, L+H]\n", - " hist_exog=None, # [Ws, L]\n", - " stat_exog=stat_exog,\n", - " y_idx=y_idx,\n", - " ) # [Ws, 1]\n", - "\n", - " # Model Predictions\n", - " output_batch = self(windows_batch)\n", - " # Monte Carlo already returns y_hat with mean and quantiles\n", - " output_batch = output_batch[:,:, 1:] # Remove mean\n", - " valid_loss_batch = self.valid_loss(y=original_outsample_y, y_hat=output_batch, mask=outsample_mask)\n", - " valid_losses.append(valid_loss_batch)\n", - " batch_sizes.append(len(output_batch))\n", - "\n", - " valid_loss = torch.stack(valid_losses)\n", - " batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device)\n", - " valid_loss = torch.sum(valid_loss * batch_sizes) / torch.sum(batch_sizes)\n", - "\n", - " if torch.isnan(valid_loss):\n", - " raise Exception(\"Loss is NaN, training stopped.\")\n", - "\n", - " self.log(\"valid_loss\", valid_loss, prog_bar=True, on_epoch=True)\n", - " self.validation_step_outputs.append(valid_loss)\n", - " return valid_loss\n", - "\n", - " # Override BaseWindows method\n", - " def predict_step(self, batch, batch_idx):\n", - "\n", - " self.h == self.h_backup\n", - " self.quantiles = self.loss.quantiles\n", - "\n", - " # TODO: Hack to compute number of windows\n", - " windows = self._create_windows(batch, step='predict')\n", - " n_windows = len(windows['temporal'])\n", - " y_idx = batch['y_idx']\n", - "\n", - " # Number of windows in batch\n", - " windows_batch_size = self.inference_windows_batch_size\n", - " if windows_batch_size < 0:\n", - " windows_batch_size = n_windows\n", - " n_batches = int(np.ceil(n_windows/windows_batch_size))\n", - "\n", - " y_hats = []\n", - " for i in range(n_batches):\n", - " # Create and normalize windows [Ws, L+H, C]\n", - " w_idxs = np.arange(i*windows_batch_size, \n", - " min((i+1)*windows_batch_size, n_windows))\n", - " windows = self._create_windows(batch, step='predict', w_idxs=w_idxs)\n", - " windows = self._normalization(windows=windows, y_idx=y_idx)\n", - "\n", - " # Parse windows\n", - " insample_y, insample_mask, _, _, _, futr_exog, stat_exog = self._parse_windows(batch, windows)\n", - " windows_batch = dict(insample_y=insample_y, # [Ws, L]\n", - " insample_mask=insample_mask, # [Ws, L]\n", - " futr_exog=futr_exog, # [Ws, L+H]\n", - " stat_exog=stat_exog,\n", - " y_idx=y_idx)\n", - " \n", - " # Model Predictions\n", - " y_hat = self(windows_batch)\n", - " # Monte Carlo already returns y_hat with mean and quantiles\n", - " y_hats.append(y_hat)\n", - " y_hat = torch.cat(y_hats, dim=0)\n", - " return y_hat\n", - "\n", - " def train_forward(self, windows_batch):\n", - " # Parse windows_batch\n", - " x_t = windows_batch['insample_y'].unsqueeze(-1) # [B, L, 1]\n", - " futr_exog = windows_batch['futr_exog'] # [B, L + h, F]\n", - " stat_exog = windows_batch['stat_exog'] # [B, S]\n", - "\n", - " batch_size, seq_len = x_t.shape[:2] # B = batch_size, L = seq_len\n", - "\n", - " # Concatenate x_t with future exogenous\n", - " if self.futr_exog_size > 0: \n", - " futr_exog_t = futr_exog[:, :seq_len] # [B, L + h, F] -> [B, L, F]\n", - " x_t = torch.cat((x_t, futr_exog_t), dim=2) # [B, L, 1] + [B, L, F] -> [B, L, 1 + F] \n", - " \n", - " x_t = x_t.reshape(batch_size, -1) # [B, L, 1 + F] -> [B, L * (1 + F)]\n", - "\n", - " # Concatenate x_t with static exogenous\n", - " if self.stat_exog_size > 0:\n", - " x_t = torch.cat((x_t, stat_exog), dim=1) # [B, L * (1 + F)] + [B, S] -> [B, L * (1 + F) + S]\n", - "\n", - " # Run through DeepNPTSNetwork\n", - " h_t = self.deepnptsnetwork(x_t) # [B, L * (1 + F) + S] -> [B, hidden_size]\n", - " o_t = self.output_layer(h_t) # [B, hidden_size] -> [B, L + 1]\n", - "\n", - " output = self._domain_map(o_t, windows_batch['insample_y']) # [B, L + 1], [B, L] -> [B, 3 * L]\n", - " output = self.loss.domain_map(output) # [B, 3 * L] -> ([B, L], [B, L], [B, L])\n", - "\n", - " return output\n", "\n", " def forward(self, windows_batch):\n", " # Parse windows_batch\n", - " insample_y_t = windows_batch['insample_y'].unsqueeze(-1) # [B, L, 1]\n", + " x = windows_batch['insample_y'].unsqueeze(-1) # [B, L, 1]\n", + " hist_exog = windows_batch['hist_exog'] # [B, L, X]\n", " futr_exog = windows_batch['futr_exog'] # [B, L + h, F]\n", " stat_exog = windows_batch['stat_exog'] # [B, S]\n", - " y_idx = windows_batch['y_idx']\n", - "\n", - " batch_size, seq_len = insample_y_t.shape[:2] # B = batch_size, L = seq_len\n", - " device = insample_y_t.device\n", - " dtype = insample_y_t.dtype\n", - "\n", - " # Repeat insample_y for trajectory samples\n", - " insample_y_t = torch.repeat_interleave(input=insample_y_t, \n", - " repeats=self.trajectory_samples, \n", - " dim=0) # [B, L, 1] -> [B * n_samples, L, 1]\n", - " \n", - " # Input x_t is insample_y at time t\n", - " x_t = insample_y_t\n", "\n", - " # Repeat futr_exog if available for trajectory samples and add to x_t \n", + " batch_size, seq_len = x.shape[:2] # B = batch_size, L = seq_len\n", + " insample_y = windows_batch['insample_y'].unsqueeze(-1) \n", + " \n", + " # Concatenate x_t with future exogenous of input\n", " if self.futr_exog_size > 0: \n", - " futr_exog = torch.repeat_interleave(input=futr_exog, \n", - " repeats=self.trajectory_samples, \n", - " dim=0) # [B, L + h, F] -> [B * n_samples, L + h, F] \n", - " x_t = torch.cat((x_t, futr_exog[:, :seq_len]), dim=2) # [B * n_samples, L, 1] + [B * n_samples, L, F] -> [B * n_samples, L, 1 + F] \n", + " x = torch.cat((x, futr_exog[:, :seq_len]), dim=2) # [B, L, 1] + [B, L, F] -> [B, L, 1 + F] \n", " \n", - " x_t = x_t.reshape(batch_size * self.trajectory_samples, -1) # [B * n_samples, L, 1 + F] -> [B * n_samples, L * (1 + F)]\n", + " # Concatenate x_t with historic exogenous\n", + " if self.hist_exog_size > 0: \n", + " x = torch.cat((x, hist_exog), dim=2) # [B, L, 1 + F] + [B, L, X] -> [B, L, 1 + F + X] \n", "\n", - " # Repeat stat_exog if available for trajectory samples and add to x_t\n", - " if self.stat_exog_size > 0:\n", - " stat_exog = torch.repeat_interleave(\n", - " input=stat_exog, \n", - " repeats=self.trajectory_samples, \n", - " dim=0) # [B, S] -> [B * n_samples, S] \n", - " x_t = torch.cat((x_t, stat_exog), dim=1) # [B * n_samples, L * (1 + F)] + [B * n_samples, S] -> [B * n_samples, L * (1 + F) + S]\n", + " x = x.reshape(batch_size, -1) # [B, L, 1 + F + X] -> [B, L * (1 + F + X)]\n", "\n", - " # Scales for inverse normalization\n", - " y_scale = self.scaler.x_scale[:, :, y_idx]\n", - " y_loc = self.scaler.x_shift[:, :, y_idx]\n", - " y_scale = torch.repeat_interleave(input=y_scale, \n", - " repeats=self.trajectory_samples, \n", - " dim=0)\n", - " y_loc = torch.repeat_interleave(input=y_loc, \n", - " repeats=self.trajectory_samples, \n", - " dim=0)\n", - " # Create forecasts tensor\n", - " forecasts = torch.zeros((batch_size, \n", - " self.h,\n", - " len(self.quantiles) + 1), \n", - " device=device, \n", - " dtype=dtype)\n", - " \n", - " # Recursive predictions\n", - " for t in range(self.h):\n", - " # Run input throught DeepNPTSNetwork\n", - " h_t = self.deepnptsnetwork(x_t) # [B * n_samples, L * (1 + F) + S] -> [B, hidden_size]\n", - " o_t = self.output_layer(h_t) # [B * n_samples, hidden_size] -> [B * n_samples, L (+ 1)]\n", - " output = self._domain_map(o_t, insample_y_t.squeeze(-1)) # [B * n_samples, L + 1], [B * n_samples, L] -> [B * n_samples, 3 * L]\n", - " output = self.loss.domain_map(output) # [B * n_samples, 3 * L] -> ([B * n_samples, L], [B * n_samples, L], [B * n_samples, L])\n", - "\n", - " # Inverse normalization\n", - " distr_args = self.loss.scale_decouple(output=output, \n", - " loc=y_loc, \n", - " scale=y_scale)\n", + " # Concatenate x with static exogenous\n", + " if self.stat_exog_size > 0:\n", + " x = torch.cat((x, stat_exog), dim=1) # [B, L * (1 + F + X)] + [B, S] -> [B, L * (1 + F + X) + S]\n", "\n", - " # Sample and create probabilistic outputs\n", - " samples_t_flat, _, _ = self.loss.sample(distr_args=distr_args, \n", - " num_samples=1)\n", + " # Concatenate x_t with future exogenous of horizon\n", + " if self.futr_exog_size > 0:\n", + " futr_exog = futr_exog[:, seq_len:] # [B, L + h, F] -> [B, h, F]\n", + " futr_exog = futr_exog.reshape(batch_size, -1) # [B, L + h, F] -> [B, h * F]\n", + " x = torch.cat((x, futr_exog), dim=1) # [B, L * (1 + F + X) + S] + [B, h * F] -> [B, L * (1 + F + X) + S + h * F] \n", "\n", - " samples_t_flat = samples_t_flat.squeeze()\n", - " samples_t = samples_t_flat.reshape(batch_size, \n", - " self.trajectory_samples) # [B * n_samples] -> [B, n_samples] \n", - " \n", - " samples_t_mean = torch.mean(samples_t, dim=-1) # [B, n_samples] -> [B] \n", - " quantiles_t = torch.quantile(input=samples_t, \n", - " q=self.quantiles, \n", - " dim=-1) # [B, n_samples] -> [Q, B]\n", - " forecasts[:, t, 0] = samples_t_mean\n", - " forecasts[:, t, 1:] = quantiles_t.permute(1, 0)\n", + " # Run through DeepNPTSNetwork\n", + " weights = self.deepnptsnetwork(x) # [B, L * (1 + F + X) + S + h * F] -> [B, L * h]\n", "\n", - " insample_y_t_next = self.scaler.scaler(samples_t_flat, \n", - " y_loc.squeeze(), \n", - " y_scale.squeeze()) # [B * n_samples] -> [B * n_samples]\n", - " insample_y_t_next = insample_y_t_next.unsqueeze(-1)\\\n", - " .unsqueeze(-1) # [B * n_samples] -> [B * n_samples, 1, 1]\n", + " # Apply softmax for weighted input predictions\n", + " weights = weights.reshape(batch_size, seq_len, -1) # [B, L * h] -> [B, L, h]\n", + " x = F.softmax(weights, dim=1) * insample_y # [B, L, h] * [B, L, 1] = [B, L, h]\n", + " output = torch.sum(x, dim=1).unsqueeze(-1) # [B, L, h] -> [B, h, 1]\n", "\n", - " # Update insample_y_t \n", - " insample_y_t = torch.cat([insample_y_t[:, 1:], \n", - " insample_y_t_next], \n", - " dim=1) # [B * n_samples, L - 1, 1] + [B * n_samples, 1, 1] -> [B * n_samples, L, 1]\n", - " \n", - " # Update input\n", - " x_t = insample_y_t\n", - " # Concatenate x_t with future exogenous\n", - " if self.futr_exog_size > 0: \n", - " x_t = torch.cat((x_t, \n", - " futr_exog[:, t:seq_len + t]), \n", - " dim=2) # [B * n_samples, L, 1] + [B * n_samples, L, F] -> [B * n_samples, L, 1 + F] \n", - " \n", - " x_t = x_t.reshape(batch_size * self.trajectory_samples\n", - " , -1) # [B * n_samples, L, 1 + F] -> [B * n_samples, L * (1 + F)]\n", + " forecast = self.loss.domain_map(output) # [B, h, 1] -> [B, h, 1]\n", "\n", - " # Concatenate x_t with static exogenous\n", - " if self.stat_exog_size > 0:\n", - " x_t = torch.cat((x_t, stat_exog), dim=1) # [B * n_samples, L * (1 + F)] + [B * n_samples, S] -> [B * n_samples, L * (1 + F) + S]\n", - " \n", - " return forecasts\n", - "\n" + " return forecast" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/deepnpts.py#L20){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DeepNPTS\n", - "\n", - "> DeepNPTS (h, input_size:int=-1, hidden_size:int=32, batch_norm:bool=True,\n", - "> dropout:float=0.5, n_layers:int=2, trajectory_samples:int=100,\n", - "> futr_exog_list=None, hist_exog_list=None, stat_exog_list=None,\n", - "> exclude_insample_y=False, loss=GMM(), valid_loss=MQLoss(),\n", - "> max_steps:int=1000, learning_rate:float=0.001,\n", - "> num_lr_decays:int=3, early_stop_patience_steps:int=-1,\n", - "> val_check_steps:int=100, batch_size:int=32,\n", - "> valid_batch_size:Optional[int]=None,\n", - "> windows_batch_size:int=1024,\n", - "> inference_windows_batch_size:int=-1,\n", - "> start_padding_enabled=False, step_size:int=1,\n", - "> scaler_type:str='standard', random_seed:int=1,\n", - "> num_workers_loader=0, drop_last_loader=False, optimizer=None,\n", - "> optimizer_kwargs=None, **trainer_kwargs)\n", - "\n", - "DeepNPTS\n", - "\n", - "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. \n", - "\n", - "**Parameters:**
\n", - "`h`: int, Forecast horizon.
\n", - "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", - "`hidden_size`: int=32, hidden size of dense layers.
\n", - "`batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", - "`dropout`: float=0.1, dropout.
\n", - "`n_layers`: int=2, number of dense layers.
\n", - "`trajectory_samples`: int=100, number of Monte Carlo trajectories during inference.
\n", - "`stat_exog_list`: str list, static exogenous columns.
\n", - "`hist_exog_list`: str list, historic exogenous columns.
\n", - "`futr_exog_list`: str list, future exogenous columns.
\n", - "`exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", - "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", - "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", - "`max_steps`: int=1000, maximum number of training steps.
\n", - "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", - "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", - "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", - "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", - "`batch_size`: int=32, number of different series in each batch.
\n", - "`valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", - "`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", - "`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", - "`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", - "`step_size`: int=1, step size between each window of temporal data.
\n", - "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", - "`random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", - "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", - "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", - "`alias`: str, optional, Custom name of the model.
\n", - "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", - "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", - "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", - "\n", - "**References**
\n", - "- [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/deepnpts.py#L20){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DeepNPTS\n", - "\n", - "> DeepNPTS (h, input_size:int=-1, hidden_size:int=32, batch_norm:bool=True,\n", - "> dropout:float=0.5, n_layers:int=2, trajectory_samples:int=100,\n", - "> futr_exog_list=None, hist_exog_list=None, stat_exog_list=None,\n", - "> exclude_insample_y=False, loss=GMM(), valid_loss=MQLoss(),\n", - "> max_steps:int=1000, learning_rate:float=0.001,\n", - "> num_lr_decays:int=3, early_stop_patience_steps:int=-1,\n", - "> val_check_steps:int=100, batch_size:int=32,\n", - "> valid_batch_size:Optional[int]=None,\n", - "> windows_batch_size:int=1024,\n", - "> inference_windows_batch_size:int=-1,\n", - "> start_padding_enabled=False, step_size:int=1,\n", - "> scaler_type:str='standard', random_seed:int=1,\n", - "> num_workers_loader=0, drop_last_loader=False, optimizer=None,\n", - "> optimizer_kwargs=None, **trainer_kwargs)\n", - "\n", - "DeepNPTS\n", - "\n", - "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. \n", - "\n", - "**Parameters:**
\n", - "`h`: int, Forecast horizon.
\n", - "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", - "`hidden_size`: int=32, hidden size of dense layers.
\n", - "`batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", - "`dropout`: float=0.1, dropout.
\n", - "`n_layers`: int=2, number of dense layers.
\n", - "`trajectory_samples`: int=100, number of Monte Carlo trajectories during inference.
\n", - "`stat_exog_list`: str list, static exogenous columns.
\n", - "`hist_exog_list`: str list, historic exogenous columns.
\n", - "`futr_exog_list`: str list, future exogenous columns.
\n", - "`exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", - "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", - "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", - "`max_steps`: int=1000, maximum number of training steps.
\n", - "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", - "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", - "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", - "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", - "`batch_size`: int=32, number of different series in each batch.
\n", - "`valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", - "`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", - "`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", - "`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", - "`step_size`: int=1, step size between each window of temporal data.
\n", - "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", - "`random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", - "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", - "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", - "`alias`: str, optional, Custom name of the model.
\n", - "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", - "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", - "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", - "\n", - "**References**
\n", - "- [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(DeepNPTS, title_level=3)" ] @@ -751,73 +284,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "### DeepNPTS.fit\n", - "\n", - "> DeepNPTS.fit (dataset, val_size=0, test_size=0, random_seed=None,\n", - "> distributed_config=None)\n", - "\n", - "Fit.\n", - "\n", - "The `fit` method, optimizes the neural network's weights using the\n", - "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n", - "and the `loss` function as defined during the initialization.\n", - "Within `fit` we use a PyTorch Lightning `Trainer` that\n", - "inherits the initialization's `self.trainer_kwargs`, to customize\n", - "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n", - "\n", - "The method is designed to be compatible with SKLearn-like classes\n", - "and in particular to be compatible with the StatsForecast library.\n", - "\n", - "By default the `model` is not saving training checkpoints to protect\n", - "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n", - "\n", - "**Parameters:**
\n", - "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", - "`val_size`: int, validation size for temporal cross-validation.
\n", - "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", - "`test_size`: int, test size for temporal cross-validation.
" - ], - "text/plain": [ - "---\n", - "\n", - "### DeepNPTS.fit\n", - "\n", - "> DeepNPTS.fit (dataset, val_size=0, test_size=0, random_seed=None,\n", - "> distributed_config=None)\n", - "\n", - "Fit.\n", - "\n", - "The `fit` method, optimizes the neural network's weights using the\n", - "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n", - "and the `loss` function as defined during the initialization.\n", - "Within `fit` we use a PyTorch Lightning `Trainer` that\n", - "inherits the initialization's `self.trainer_kwargs`, to customize\n", - "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n", - "\n", - "The method is designed to be compatible with SKLearn-like classes\n", - "and in particular to be compatible with the StatsForecast library.\n", - "\n", - "By default the `model` is not saving training checkpoints to protect\n", - "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n", - "\n", - "**Parameters:**
\n", - "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", - "`val_size`: int, validation size for temporal cross-validation.
\n", - "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", - "`test_size`: int, test size for temporal cross-validation.
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(DeepNPTS.fit, name='DeepNPTS.fit', title_level=3)" ] @@ -826,53 +293,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "### DeepNPTS.predict\n", - "\n", - "> DeepNPTS.predict (dataset, test_size=None, step_size=1, random_seed=None,\n", - "> **data_module_kwargs)\n", - "\n", - "Predict.\n", - "\n", - "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n", - "\n", - "**Parameters:**
\n", - "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", - "`test_size`: int=None, test size for temporal cross-validation.
\n", - "`step_size`: int=1, Step size between each window.
\n", - "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", - "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)." - ], - "text/plain": [ - "---\n", - "\n", - "### DeepNPTS.predict\n", - "\n", - "> DeepNPTS.predict (dataset, test_size=None, step_size=1, random_seed=None,\n", - "> **data_module_kwargs)\n", - "\n", - "Predict.\n", - "\n", - "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n", - "\n", - "**Parameters:**
\n", - "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", - "`test_size`: int=None, test size for temporal cross-validation.
\n", - "`step_size`: int=1, Step size between each window.
\n", - "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", - "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(DeepNPTS.predict, name='DeepNPTS.predict', title_level=3)" ] @@ -891,194 +312,26 @@ "metadata": {}, "outputs": [], "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", "from neuralforecast import NeuralForecast\n", - "from neuralforecast.losses.pytorch import MQLoss, DistributionLoss, GMM\n", - "from neuralforecast.tsdataset import TimeSeriesDataset\n", - "from neuralforecast.utils import AirPassengers, AirPassengersPanel, AirPassengersStatic" + "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Seed set to 1\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b74158f17d254e4884139ee5c48e5706", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Sanity Checking: | | 0/? [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "#| eval: false\n", - "import pandas as pd\n", - "import pytorch_lightning as pl\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from neuralforecast import NeuralForecast\n", - "#from neuralforecast.models import DeepAR\n", - "from neuralforecast.losses.pytorch import DistributionLoss, HuberMQLoss\n", - "from neuralforecast.utils import AirPassengers, AirPassengersPanel, AirPassengersStatic\n", - "\n", - "#AirPassengersPanel['y'] = AirPassengersPanel['y'] + 10\n", "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n", "\n", "nf = NeuralForecast(\n", " models=[DeepNPTS(h=12,\n", - " input_size=12,\n", - " trajectory_samples=100,\n", - " loss=GMM(),\n", - " # learning_rate=1e-5,\n", - " n_layers = 2,\n", - " dropout=0.0,\n", + " input_size=24,\n", " stat_exog_list=['airline1'],\n", " futr_exog_list=['trend'],\n", " max_steps=1000,\n", @@ -1100,29 +353,9 @@ "plot_df = plot_df[plot_df.unique_id=='Airline1'].drop('unique_id', axis=1)\n", "plt.plot(plot_df['ds'], plot_df['y'], c='black', label='True')\n", "plt.plot(plot_df['ds'], plot_df['DeepNPTS'], c='red', label='mean')\n", - "plt.plot(plot_df['ds'], plot_df['DeepNPTS-median'], c='blue', label='median')\n", - "plt.fill_between(x=plot_df['ds'][-12:], \n", - " y1=plot_df['DeepNPTS-lo-90'][-12:].values, \n", - " y2=plot_df['DeepNPTS-hi-90'][-12:].values,\n", - " alpha=0.4, label='level 90')\n", - "plt.legend()\n", "plt.grid()\n", "plt.plot()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/nbs/models.ipynb b/nbs/models.ipynb index d48214601..9e437cea8 100644 --- a/nbs/models.ipynb +++ b/nbs/models.ipynb @@ -1133,18 +1133,7 @@ "execution_count": null, "id": "95850f3c", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-04-06 10:40:24,017\tINFO worker.py:1724 -- Started a local Ray instance.\n", - "2024-04-06 10:40:25,556\tINFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.\n", - "2024-04-06 10:40:25,559\tINFO tune.py:583 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949\n", - "Seed set to 1\n" - ] - } - ], + "outputs": [], "source": [ "%%capture\n", "# Use your own config or AutoNHITS.default_config\n", @@ -1164,139 +1153,7 @@ "execution_count": null, "id": "7c905530", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[36m(_train_tune pid=27632)\u001b[0m c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\ray\\tune\\integration\\pytorch_lightning.py:194: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\pytorch_lightning\\utilities\\parsing.py:199: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\pytorch_lightning\\utilities\\parsing.py:199: Attribute 'valid_loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['valid_loss'])`.\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m Seed set to 11\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m GPU available: True (cuda), used: True\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m TPU available: False, using: 0 TPU cores\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m IPU available: False, using: 0 IPUs\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m HPU available: False, using: 0 HPUs\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m `Trainer(val_check_interval=1)` was configured so validation will run after every batch.\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m Missing logger folder: C:\\Users\\ospra\\ray_results\\_train_tune_2024-04-06_10-40-40\\_train_tune_4d1da_00000\\lightning_logs\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m \n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m | Name | Type | Params\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m -----------------------------------------------\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m 0 | padder_train | ConstantPad1d | 0 \n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m 1 | loss | MAE | 0 \n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m 2 | scaler | TemporalNorm | 0 \n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m 3 | lin_hist | Linear | 16 \n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m 4 | drop_hist | Dropout | 0 \n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m 5 | net_bwd | Sequential | 944 \n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m 6 | feature_lin | Linear | 9 \n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m 7 | temporal_lin | Linear | 156 \n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m -----------------------------------------------\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m 1.1 K Trainable params\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m 0 Non-trainable params\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m 1.1 K Total params\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m 0.004 Total estimated model params size (MB)\n", - "\u001b[36m(_train_tune pid=27632)\u001b[0m c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\pytorch_lightning\\trainer\\connectors\\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Sanity Checking: | | 0/? [00:00 0) * 1) eps = 1e-10 - lambdas, weights = distr_args + lambdas = distr_args[0] B, H, K = lambdas.size() + weights = (1 / K) * torch.ones_like(lambdas, device=lambdas.device) + y = y[:, :, None] mask = mask[:, :, None] @@ -1305,7 +1313,7 @@ def neglog_likelihood( def __call__( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor], mask: Union[torch.Tensor, None] = None, ): @@ -1367,22 +1375,18 @@ def __init__( if self.return_params: mu_names = [f"-mu-{i}" for i in range(1, n_components + 1)] std_names = [f"-std-{i}" for i in range(1, n_components + 1)] - weight_names = [f"-weight-{i}" for i in range(1, n_components + 1)] - self.param_names = [ - i for j in zip(mu_names, std_names, weight_names) for i in j - ] - self.output_names = self.output_names + self.param_names + mu_std_names = [i for j in zip(mu_names, std_names) for i in j] + self.output_names = self.output_names + mu_std_names # Add first output entry for the sample_mean self.output_names.insert(0, "") - self.outputsize_multiplier = 3 * n_components + self.outputsize_multiplier = 2 * n_components self.is_distribution_output = True def domain_map(self, output: torch.Tensor): - means, stds, weights = output.chunk(3, dim=-1) - - return (means, stds, weights) + means, stds = torch.tensor_split(output, 2, dim=-1) + return (means, stds) def scale_decouple( self, @@ -1397,16 +1401,14 @@ def scale_decouple( variance and residual location based on anchoring `loc`, `scale`. Also adds domain protection to the distribution parameters. """ - means, stds, weights = output + means, stds = output stds = F.softplus(stds) - weights = F.softmax(weights, dim=-1) if (loc is not None) and (scale is not None): loc = loc.view(means.size(dim=0), 1, -1) scale = scale.view(means.size(dim=0), 1, -1) means = (means * scale) + loc stds = (stds + eps) * scale - - return (means, stds, weights) + return (means, stds) def sample(self, distr_args, num_samples=None): """ @@ -1428,11 +1430,17 @@ def sample(self, distr_args, num_samples=None): if num_samples is None: num_samples = self.num_samples - means, stds, weights = distr_args + means, stds = distr_args B, H, K = means.size() Q = len(self.quantiles) assert means.shape == stds.shape + # Sample K ~ Mult(weights) + # shared across B, H + # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2) + + weights = (1 / K) * torch.ones_like(means, device=means.device) + # Avoid loop, vectorize weights = weights.reshape(-1, K) means = means.flatten() @@ -1473,16 +1481,18 @@ def sample(self, distr_args, num_samples=None): def neglog_likelihood( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): if mask is None: mask = torch.ones_like(y) - means, stds, weights = distr_args + means, stds = distr_args B, H, K = means.size() + weights = (1 / K) * torch.ones_like(means, device=means.device) + y = y[:, :, None] mask = mask[:, :, None] @@ -1510,7 +1520,7 @@ def neglog_likelihood( def __call__( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): @@ -1568,29 +1578,25 @@ def __init__( f"-total_count-{i}" for i in range(1, n_components + 1) ] probs_names = [f"-probs-{i}" for i in range(1, n_components + 1)] - weight_names = [f"-weight-{i}" for i in range(1, n_components + 1)] - self.param_names = [ - i for j in zip(total_count_names, probs_names, weight_names) for i in j - ] - self.output_names = self.output_names + self.param_names + param_names = [i for j in zip(total_count_names, probs_names) for i in j] + self.output_names = self.output_names + param_names # Add first output entry for the sample_mean self.output_names.insert(0, "") - self.outputsize_multiplier = 3 * n_components + self.outputsize_multiplier = 2 * n_components self.is_distribution_output = True def domain_map(self, output: torch.Tensor): - mu, alpha, weights = output.chunk(3, dim=-1) - - return mu, alpha, weights + mu, alpha = torch.tensor_split(output, 2, dim=-1) + return (mu, alpha) def scale_decouple( self, output, loc: Optional[torch.Tensor] = None, scale: Optional[torch.Tensor] = None, - eps: float = 1e-6, + eps: float = 0.2, ): """Scale Decouple @@ -1599,10 +1605,9 @@ def scale_decouple( Also adds domain protection to the distribution parameters. """ # Efficient NBinomial parametrization - mu, alpha, weights = output - mu = F.softplus(mu) + eps - alpha = F.softplus(alpha) + eps # alpha = 1/total_counts - weights = F.softmax(weights, dim=-1) + mu, alpha = output + mu = F.softplus(mu) + 1e-8 + alpha = F.softplus(alpha) + 1e-8 # alpha = 1/total_counts if (loc is not None) and (scale is not None): loc = loc.view(mu.size(dim=0), 1, -1) mu *= loc @@ -1612,9 +1617,8 @@ def scale_decouple( # => probs = mu / (total_count + mu) # => probs = mu / [total_count * (1 + mu * (1/total_count))] total_count = 1.0 / alpha - probs = mu * alpha / (1.0 + mu * alpha) - probs = torch.clamp(probs, eps, 1 - eps) - return (total_count, probs, weights) + probs = (mu * alpha / (1.0 + mu * alpha)) + 1e-8 + return (total_count, probs) def sample(self, distr_args, num_samples=None): """ @@ -1636,11 +1640,17 @@ def sample(self, distr_args, num_samples=None): if num_samples is None: num_samples = self.num_samples - total_count, probs, weights = distr_args + total_count, probs = distr_args B, H, K = total_count.size() Q = len(self.quantiles) assert total_count.shape == probs.shape + # Sample K ~ Mult(weights) + # shared across B, H + # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2) + + weights = (1 / K) * torch.ones_like(probs, device=probs.device) + # Avoid loop, vectorize weights = weights.reshape(-1, K) total_count = total_count.flatten() @@ -1682,16 +1692,18 @@ def sample(self, distr_args, num_samples=None): def neglog_likelihood( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): if mask is None: mask = torch.ones_like(y) - total_count, probs, weights = distr_args + total_count, probs = distr_args B, H, K = total_count.size() + weights = (1 / K) * torch.ones_like(probs, device=probs.device) + y = y[:, :, None] mask = mask[:, :, None] @@ -1722,7 +1734,7 @@ def neglog_likelihood( def __call__( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): diff --git a/neuralforecast/models/__init__.py b/neuralforecast/models/__init__.py index b4e7153f1..7e474ef62 100644 --- a/neuralforecast/models/__init__.py +++ b/neuralforecast/models/__init__.py @@ -3,7 +3,7 @@ 'TFT', 'VanillaTransformer', 'Informer', 'Autoformer', 'PatchTST', 'FEDformer', 'StemGNN', 'HINT', 'TimesNet', 'TimeLLM', 'TSMixer', 'TSMixerx', 'MLPMultivariate', 'iTransformer', 'BiTCN', - 'DeepNPTS' + 'DeepNPTS', 'DeepNPTSMultivariate' ] from .rnn import RNN @@ -34,3 +34,4 @@ from .itransformer import iTransformer from .bitcn import BiTCN from .deepnpts import DeepNPTS +from .deepnptsmultivariate import DeepNPTSMultivariate diff --git a/neuralforecast/models/deepnpts.py b/neuralforecast/models/deepnpts.py index d4da85974..678f89c11 100644 --- a/neuralforecast/models/deepnpts.py +++ b/neuralforecast/models/deepnpts.py @@ -4,23 +4,21 @@ __all__ = ['DeepNPTS'] # %% ../../nbs/models.deepnpts.ipynb 3 -import numpy as np - import torch import torch.nn as nn +import torch.nn.functional as F import neuralforecast.losses.pytorch as losses from typing import Optional -from functools import partial from ..common._base_windows import BaseWindows -from ..losses.pytorch import MQLoss, GMM, PMM, NBMM +from ..losses.pytorch import MAE # %% ../../nbs/models.deepnpts.ipynb 7 class DeepNPTS(BaseWindows): """DeepNPTS - Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. + Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by (weighted) sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. **Parameters:**
`h`: int, Forecast horizon.
@@ -29,7 +27,6 @@ class DeepNPTS(BaseWindows): `batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
`dropout`: float=0.1, dropout.
`n_layers`: int=2, number of dense layers.
- `trajectory_samples`: int=100, number of Monte Carlo trajectories during inference.
`stat_exog_list`: str list, static exogenous columns.
`hist_exog_list`: str list, historic exogenous columns.
`futr_exog_list`: str list, future exogenous columns.
@@ -72,15 +69,14 @@ def __init__( batch_norm: bool = True, dropout: float = 0.1, n_layers: int = 2, - trajectory_samples: int = 100, futr_exog_list=None, hist_exog_list=None, stat_exog_list=None, exclude_insample_y=False, - loss=GMM(), - valid_loss=MQLoss(level=[80, 90]), + loss=MAE(), + valid_loss=MAE(), max_steps: int = 1000, - learning_rate: float = 1e-5, + learning_rate: float = 1e-3, num_lr_decays: int = 3, early_stop_patience_steps: int = -1, val_check_steps: int = 100, @@ -99,22 +95,13 @@ def __init__( **trainer_kwargs ): - if hist_exog_list is not None: - raise Exception("DeepNPTS does not support historical exogenous variables.") - if exclude_insample_y: raise Exception("DeepNPTS has no possibility for excluding y.") - supported_losses = (losses.GMM, losses.PMM, losses.NBMM) - - if not isinstance(loss, supported_losses): - raise Exception("DeepNPTS only supports GMM, PMM or NBMM as loss function.") - - if not isinstance(valid_loss, losses.MQLoss): - raise Exception("DeepNPTS only supports MQLoss as validation loss.") - - # Overwrite n_components, it has to be the input_size in DeepNPTS - loss.n_components = input_size + if not isinstance(loss, losses.BasePointLoss): + raise Exception( + "DeepNPTS only supports point loss functions (MAE, MSE, etc) as loss function." + ) # Inherit BaseWindows class super(DeepNPTS, self).__init__( @@ -147,16 +134,19 @@ def __init__( ) self.h = h - self.h_backup = self.h # Used because h=1 during training - self.use_softmax = True self.hidden_size = hidden_size self.dropout = dropout - self.trajectory_samples = trajectory_samples self.futr_exog_size = len(self.futr_exog_list) self.stat_exog_size = len(self.stat_exog_list) + self.hist_exog_size = len(self.hist_exog_list) + + input_dim = ( + input_size * (1 + self.futr_exog_size + self.hist_exog_size) + + self.stat_exog_size + + self.h * self.futr_exog_size + ) - input_dim = input_size * (1 + self.futr_exog_size) + self.stat_exog_size # Create DeepNPTSNetwork modules = [] for i in range(n_layers): @@ -167,391 +157,61 @@ def __init__( if dropout > 0.0: modules.append(nn.Dropout(dropout)) + modules.append(nn.Linear(hidden_size, input_size * self.h)) self.deepnptsnetwork = nn.Sequential(*modules) - self.deepnptsnetwork.apply(partial(self._init_weights, scale=0.07)) - - # Add output layers for Mixture distribution - output_modules = [] - if dropout > 0.0: - output_modules.append(nn.Dropout(self.dropout)) - - if isinstance(loss, GMM): - output_modules.append(nn.Linear(hidden_size, input_size + 1)) - elif isinstance(loss, PMM): - output_modules.append(nn.Linear(hidden_size, input_size)) - elif isinstance(loss, NBMM): - output_modules.append(nn.Linear(hidden_size, input_size)) - - self.output_layer = nn.Sequential(*output_modules) - self.output_layer.apply(self._init_weights) - - @staticmethod - def _init_weights(module, scale=1.0): - if type(module) == nn.Linear: - nn.init.uniform_(module.weight, -scale, scale) - nn.init.zeros_(module.bias) - - def _domain_map(self, o_t, insample_y): - if isinstance(self.loss, GMM): - weights = o_t[:, :-1] # [B, L + 1] -> [B, L] - kernel_width = o_t[:, -1:] # [B, L + 1] -> [B, 1] - kernel_width = torch.repeat_interleave( - input=kernel_width, repeats=weights.shape[1], dim=-1 - ) # [B, 1] -> [B, L] - output = torch.cat( - [insample_y, kernel_width, weights], dim=-1 - ) # [B, L] + [B, L] + [B, L] = [B, 3 * L] - output = output.unsqueeze(1) # [B, 3 * L] = [B, 1, 3 * L] - elif isinstance(self.loss, PMM): - weights = o_t # [B, L] -> [B, L] - output = torch.cat( - [insample_y, weights], dim=-1 - ) # [B, L] + [B, L] = [B, 2 * L] - output = output.unsqueeze(1) # [B, 2 * L] = [B, 1, 2 * L] - elif isinstance(self.loss, NBMM): - weights = torch.ones_like(o_t) # [B, L] -> [B, L] - output = torch.cat( - [insample_y, o_t, weights], dim=-1 - ) # [B, L] + [B, L] + [B, L] = [B, 3 * L] - output = output.unsqueeze(1) # [B, 3 * L] = [B, 1, 3 * - - else: - raise NotImplementedError - - return output - - # Override BaseWindows method - def training_step(self, batch, batch_idx): - - # Only train one-step ahead - self.h = 1 - self.quantiles = self.loss.quantiles - - # Create and normalize windows [Ws, L+H, C] - y_idx = batch["y_idx"] - windows = self._create_windows(batch, step="train") - original_outsample_y = torch.clone(windows["temporal"][:, -self.h :, y_idx]) - windows = self._normalization(windows=windows, y_idx=y_idx) - - # Parse windows - ( - insample_y, - insample_mask, - outsample_y, - outsample_mask, - _, - futr_exog, - stat_exog, - ) = self._parse_windows(batch, windows) - - windows_batch = dict( - insample_y=insample_y, # [Ws, L] - insample_mask=insample_mask, # [Ws, L] - futr_exog=futr_exog, # [Ws, L+H] - hist_exog=None, - stat_exog=stat_exog, # [Ws, 1] - y_idx=y_idx, # [Ws, 1] - ) - - # Model Predictions - output = self.train_forward(windows_batch) - - _, y_loc, y_scale = self._inv_normalization( - y_hat=outsample_y, temporal_cols=batch["temporal_cols"], y_idx=y_idx - ) - # outsample_y = original_insample_y - outsample_y = original_outsample_y - distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale) - loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask) - - if torch.isnan(loss): - print("Model Parameters", self.hparams) - print("insample_y", torch.isnan(insample_y).sum()) - print("outsample_y", torch.isnan(outsample_y).sum()) - print("output", torch.isnan(output).sum()) - raise Exception("Loss is NaN, training stopped.") - - self.log("train_loss", loss, prog_bar=True, on_epoch=True) - self.train_trajectories.append((self.global_step, float(loss))) - - self.h = self.h_backup - - return loss - - # Override BaseWindows method - def validation_step(self, batch, batch_idx): - - self.h = self.h_backup - self.quantiles = self.valid_loss.quantiles - - if self.val_size == 0: - return np.nan - - # TODO: Hack to compute number of windows - windows = self._create_windows(batch, step="val") - n_windows = len(windows["temporal"]) - y_idx = batch["y_idx"] - - # Number of windows in batch - windows_batch_size = self.inference_windows_batch_size - if windows_batch_size < 0: - windows_batch_size = n_windows - n_batches = int(np.ceil(n_windows / windows_batch_size)) - - valid_losses = [] - batch_sizes = [] - for i in range(n_batches): - # Create and normalize windows [Ws, L+H, C] - w_idxs = np.arange( - i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows) - ) - windows = self._create_windows(batch, step="val", w_idxs=w_idxs) - original_outsample_y = torch.clone(windows["temporal"][:, -self.h :, 0]) - windows = self._normalization(windows=windows, y_idx=y_idx) - - # Parse windows - ( - insample_y, - insample_mask, - _, - outsample_mask, - _, - futr_exog, - stat_exog, - ) = self._parse_windows(batch, windows) - - windows_batch = dict( - insample_y=insample_y, # [Ws, L] - insample_mask=insample_mask, # [Ws, L] - futr_exog=futr_exog, # [Ws, L+H] - hist_exog=None, # [Ws, L] - stat_exog=stat_exog, - y_idx=y_idx, - ) # [Ws, 1] - - # Model Predictions - output_batch = self(windows_batch) - # Monte Carlo already returns y_hat with mean and quantiles - output_batch = output_batch[:, :, 1:] # Remove mean - valid_loss_batch = self.valid_loss( - y=original_outsample_y, y_hat=output_batch, mask=outsample_mask - ) - valid_losses.append(valid_loss_batch) - batch_sizes.append(len(output_batch)) - - valid_loss = torch.stack(valid_losses) - batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device) - valid_loss = torch.sum(valid_loss * batch_sizes) / torch.sum(batch_sizes) - - if torch.isnan(valid_loss): - raise Exception("Loss is NaN, training stopped.") - - self.log("valid_loss", valid_loss, prog_bar=True, on_epoch=True) - self.validation_step_outputs.append(valid_loss) - return valid_loss - # Override BaseWindows method - def predict_step(self, batch, batch_idx): - - self.h == self.h_backup - self.quantiles = self.loss.quantiles - - # TODO: Hack to compute number of windows - windows = self._create_windows(batch, step="predict") - n_windows = len(windows["temporal"]) - y_idx = batch["y_idx"] - - # Number of windows in batch - windows_batch_size = self.inference_windows_batch_size - if windows_batch_size < 0: - windows_batch_size = n_windows - n_batches = int(np.ceil(n_windows / windows_batch_size)) - - y_hats = [] - for i in range(n_batches): - # Create and normalize windows [Ws, L+H, C] - w_idxs = np.arange( - i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows) - ) - windows = self._create_windows(batch, step="predict", w_idxs=w_idxs) - windows = self._normalization(windows=windows, y_idx=y_idx) - - # Parse windows - insample_y, insample_mask, _, _, _, futr_exog, stat_exog = ( - self._parse_windows(batch, windows) - ) - windows_batch = dict( - insample_y=insample_y, # [Ws, L] - insample_mask=insample_mask, # [Ws, L] - futr_exog=futr_exog, # [Ws, L+H] - stat_exog=stat_exog, - y_idx=y_idx, - ) - - # Model Predictions - y_hat = self(windows_batch) - # Monte Carlo already returns y_hat with mean and quantiles - y_hats.append(y_hat) - y_hat = torch.cat(y_hats, dim=0) - return y_hat - - def train_forward(self, windows_batch): + def forward(self, windows_batch): # Parse windows_batch - x_t = windows_batch["insample_y"].unsqueeze(-1) # [B, L, 1] + x = windows_batch["insample_y"].unsqueeze(-1) # [B, L, 1] + hist_exog = windows_batch["hist_exog"] # [B, L, X] futr_exog = windows_batch["futr_exog"] # [B, L + h, F] stat_exog = windows_batch["stat_exog"] # [B, S] - batch_size, seq_len = x_t.shape[:2] # B = batch_size, L = seq_len + batch_size, seq_len = x.shape[:2] # B = batch_size, L = seq_len + insample_y = windows_batch["insample_y"].unsqueeze(-1) - # Concatenate x_t with future exogenous + # Concatenate x_t with future exogenous of input if self.futr_exog_size > 0: - futr_exog_t = futr_exog[:, :seq_len] # [B, L + h, F] -> [B, L, F] - x_t = torch.cat( - (x_t, futr_exog_t), dim=2 + x = torch.cat( + (x, futr_exog[:, :seq_len]), dim=2 ) # [B, L, 1] + [B, L, F] -> [B, L, 1 + F] - x_t = x_t.reshape(batch_size, -1) # [B, L, 1 + F] -> [B, L * (1 + F)] + # Concatenate x_t with historic exogenous + if self.hist_exog_size > 0: + x = torch.cat( + (x, hist_exog), dim=2 + ) # [B, L, 1 + F] + [B, L, X] -> [B, L, 1 + F + X] - # Concatenate x_t with static exogenous - if self.stat_exog_size > 0: - x_t = torch.cat( - (x_t, stat_exog), dim=1 - ) # [B, L * (1 + F)] + [B, S] -> [B, L * (1 + F) + S] - - # Run through DeepNPTSNetwork - h_t = self.deepnptsnetwork(x_t) # [B, L * (1 + F) + S] -> [B, hidden_size] - o_t = self.output_layer(h_t) # [B, hidden_size] -> [B, L + 1] - - output = self._domain_map( - o_t, windows_batch["insample_y"] - ) # [B, L + 1], [B, L] -> [B, 3 * L] - output = self.loss.domain_map( - output - ) # [B, 3 * L] -> ([B, L], [B, L], [B, L]) - - return output - - def forward(self, windows_batch): - # Parse windows_batch - insample_y_t = windows_batch["insample_y"].unsqueeze(-1) # [B, L, 1] - futr_exog = windows_batch["futr_exog"] # [B, L + h, F] - stat_exog = windows_batch["stat_exog"] # [B, S] - y_idx = windows_batch["y_idx"] + x = x.reshape(batch_size, -1) # [B, L, 1 + F + X] -> [B, L * (1 + F + X)] - batch_size, seq_len = insample_y_t.shape[:2] # B = batch_size, L = seq_len - device = insample_y_t.device - dtype = insample_y_t.dtype - - # Repeat insample_y for trajectory samples - insample_y_t = torch.repeat_interleave( - input=insample_y_t, repeats=self.trajectory_samples, dim=0 - ) # [B, L, 1] -> [B * n_samples, L, 1] - - # Input x_t is insample_y at time t - x_t = insample_y_t - - # Repeat futr_exog if available for trajectory samples and add to x_t - if self.futr_exog_size > 0: - futr_exog = torch.repeat_interleave( - input=futr_exog, repeats=self.trajectory_samples, dim=0 - ) # [B, L + h, F] -> [B * n_samples, L + h, F] - x_t = torch.cat( - (x_t, futr_exog[:, :seq_len]), dim=2 - ) # [B * n_samples, L, 1] + [B * n_samples, L, F] -> [B * n_samples, L, 1 + F] - - x_t = x_t.reshape( - batch_size * self.trajectory_samples, -1 - ) # [B * n_samples, L, 1 + F] -> [B * n_samples, L * (1 + F)] - - # Repeat stat_exog if available for trajectory samples and add to x_t + # Concatenate x with static exogenous if self.stat_exog_size > 0: - stat_exog = torch.repeat_interleave( - input=stat_exog, repeats=self.trajectory_samples, dim=0 - ) # [B, S] -> [B * n_samples, S] - x_t = torch.cat( - (x_t, stat_exog), dim=1 - ) # [B * n_samples, L * (1 + F)] + [B * n_samples, S] -> [B * n_samples, L * (1 + F) + S] - - # Scales for inverse normalization - y_scale = self.scaler.x_scale[:, :, y_idx] - y_loc = self.scaler.x_shift[:, :, y_idx] - y_scale = torch.repeat_interleave( - input=y_scale, repeats=self.trajectory_samples, dim=0 - ) - y_loc = torch.repeat_interleave( - input=y_loc, repeats=self.trajectory_samples, dim=0 - ) - # Create forecasts tensor - forecasts = torch.zeros( - (batch_size, self.h, len(self.quantiles) + 1), device=device, dtype=dtype - ) + x = torch.cat( + (x, stat_exog), dim=1 + ) # [B, L * (1 + F + X)] + [B, S] -> [B, L * (1 + F + X) + S] - # Recursive predictions - for t in range(self.h): - # Run input throught DeepNPTSNetwork - h_t = self.deepnptsnetwork( - x_t - ) # [B * n_samples, L * (1 + F) + S] -> [B, hidden_size] - o_t = self.output_layer( - h_t - ) # [B * n_samples, hidden_size] -> [B * n_samples, L (+ 1)] - output = self._domain_map( - o_t, insample_y_t.squeeze(-1) - ) # [B * n_samples, L + 1], [B * n_samples, L] -> [B * n_samples, 3 * L] - output = self.loss.domain_map( - output - ) # [B * n_samples, 3 * L] -> ([B * n_samples, L], [B * n_samples, L], [B * n_samples, L]) - - # Inverse normalization - distr_args = self.loss.scale_decouple( - output=output, loc=y_loc, scale=y_scale - ) - - # Sample and create probabilistic outputs - samples_t_flat, _, _ = self.loss.sample( - distr_args=distr_args, num_samples=1 - ) - - samples_t_flat = samples_t_flat.squeeze() - samples_t = samples_t_flat.reshape( - batch_size, self.trajectory_samples - ) # [B * n_samples] -> [B, n_samples] - - samples_t_mean = torch.mean(samples_t, dim=-1) # [B, n_samples] -> [B] - quantiles_t = torch.quantile( - input=samples_t, q=self.quantiles, dim=-1 - ) # [B, n_samples] -> [Q, B] - forecasts[:, t, 0] = samples_t_mean - forecasts[:, t, 1:] = quantiles_t.permute(1, 0) - - insample_y_t_next = self.scaler.scaler( - samples_t_flat, y_loc.squeeze(), y_scale.squeeze() - ) # [B * n_samples] -> [B * n_samples] - insample_y_t_next = insample_y_t_next.unsqueeze(-1).unsqueeze( - -1 - ) # [B * n_samples] -> [B * n_samples, 1, 1] - - # Update insample_y_t - insample_y_t = torch.cat( - [insample_y_t[:, 1:], insample_y_t_next], dim=1 - ) # [B * n_samples, L - 1, 1] + [B * n_samples, 1, 1] -> [B * n_samples, L, 1] + # Concatenate x_t with future exogenous of horizon + if self.futr_exog_size > 0: + futr_exog = futr_exog[:, seq_len:] # [B, L + h, F] -> [B, h, F] + futr_exog = futr_exog.reshape( + batch_size, -1 + ) # [B, L + h, F] -> [B, h * F] + x = torch.cat( + (x, futr_exog), dim=1 + ) # [B, L * (1 + F + X) + S] + [B, h * F] -> [B, L * (1 + F + X) + S + h * F] - # Update input - x_t = insample_y_t - # Concatenate x_t with future exogenous - if self.futr_exog_size > 0: - x_t = torch.cat( - (x_t, futr_exog[:, t : seq_len + t]), dim=2 - ) # [B * n_samples, L, 1] + [B * n_samples, L, F] -> [B * n_samples, L, 1 + F] + # Run through DeepNPTSNetwork + weights = self.deepnptsnetwork( + x + ) # [B, L * (1 + F + X) + S + h * F] -> [B, L * h] - x_t = x_t.reshape( - batch_size * self.trajectory_samples, -1 - ) # [B * n_samples, L, 1 + F] -> [B * n_samples, L * (1 + F)] + # Apply softmax for weighted input predictions + weights = weights.reshape(batch_size, seq_len, -1) # [B, L * h] -> [B, L, h] + x = ( + F.softmax(weights, dim=1) * insample_y + ) # [B, L, h] * [B, L, 1] = [B, L, h] + output = torch.sum(x, dim=1).unsqueeze(-1) # [B, L, h] -> [B, h, 1] - # Concatenate x_t with static exogenous - if self.stat_exog_size > 0: - x_t = torch.cat( - (x_t, stat_exog), dim=1 - ) # [B * n_samples, L * (1 + F)] + [B * n_samples, S] -> [B * n_samples, L * (1 + F) + S] + forecast = self.loss.domain_map(output) # [B, h, 1] -> [B, h, 1] - return forecasts + return forecast From 7bfe7bcac0284801af9cbd1a20d4ea1d898b8da9 Mon Sep 17 00:00:00 2001 From: Olivier Sprangers Date: Mon, 6 May 2024 09:40:12 +0200 Subject: [PATCH 03/11] update_model_files --- nbs/models.deepnpts.ipynb | 567 +++++++++++++++++++++++++++++- nbs/models.ipynb | 139 ++++++++ neuralforecast/_modidx.py | 5 + neuralforecast/auto.py | 103 +++++- neuralforecast/models/__init__.py | 5 +- neuralforecast/models/deepnpts.py | 7 +- 6 files changed, 800 insertions(+), 26 deletions(-) diff --git a/nbs/models.deepnpts.ipynb b/nbs/models.deepnpts.ipynb index c1852c18a..7b6cac9e0 100644 --- a/nbs/models.deepnpts.ipynb +++ b/nbs/models.deepnpts.ipynb @@ -32,7 +32,7 @@ ":::{.callout-warning collapse=\"false\"}\n", "#### Losses\n", "\n", - "This implementation differs from the original work in that a weighted sum of the empirical distribution is returned as forecast, rather than a sampled distributional output. Consequently, DeepNPTS only supports point losses as training loss.\n", + "This implementation differs from the original work in that a weighted sum of the empirical distribution is returned as forecast. Therefore, it only supports point losses.\n", "\n", ":::" ] @@ -160,7 +160,7 @@ " batch_size: int = 32,\n", " valid_batch_size: Optional[int] = None,\n", " windows_batch_size: int = 1024,\n", - " inference_windows_batch_size: int = -1,\n", + " inference_windows_batch_size: int = 1024,\n", " start_padding_enabled = False,\n", " step_size: int = 1,\n", " scaler_type: str = 'standard',\n", @@ -177,6 +177,9 @@ " if not isinstance(loss, losses.BasePointLoss):\n", " raise Exception('DeepNPTS only supports point loss functions (MAE, MSE, etc) as loss function.') \n", " \n", + " if not isinstance(valid_loss, losses.BasePointLoss):\n", + " raise Exception('DeepNPTS only supports point loss functions (MAE, MSE, etc) as valid loss function.') \n", + " \n", " # Inherit BaseWindows class\n", " super(DeepNPTS, self).__init__(h=h,\n", " input_size=input_size,\n", @@ -275,7 +278,139 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/deepnpts.py#L18){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DeepNPTS\n", + "\n", + "> DeepNPTS (h, input_size:int=-1, hidden_size:int=32, batch_norm:bool=True,\n", + "> dropout:float=0.1, n_layers:int=2, futr_exog_list=None,\n", + "> hist_exog_list=None, stat_exog_list=None,\n", + "> exclude_insample_y=False, loss=MAE(), valid_loss=MAE(),\n", + "> max_steps:int=1000, learning_rate:float=0.001,\n", + "> num_lr_decays:int=3, early_stop_patience_steps:int=-1,\n", + "> val_check_steps:int=100, batch_size:int=32,\n", + "> valid_batch_size:Optional[int]=None,\n", + "> windows_batch_size:int=1024,\n", + "> inference_windows_batch_size:int=1024,\n", + "> start_padding_enabled=False, step_size:int=1,\n", + "> scaler_type:str='standard', random_seed:int=1,\n", + "> num_workers_loader=0, drop_last_loader=False, optimizer=None,\n", + "> optimizer_kwargs=None, **trainer_kwargs)\n", + "\n", + "DeepNPTS\n", + "\n", + "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by (weighted) sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series.\n", + "\n", + "**Parameters:**
\n", + "`h`: int, Forecast horizon.
\n", + "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", + "`hidden_size`: int=32, hidden size of dense layers.
\n", + "`batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", + "`dropout`: float=0.1, dropout.
\n", + "`n_layers`: int=2, number of dense layers.
\n", + "`stat_exog_list`: str list, static exogenous columns.
\n", + "`hist_exog_list`: str list, historic exogenous columns.
\n", + "`futr_exog_list`: str list, future exogenous columns.
\n", + "`exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", + "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`max_steps`: int=1000, maximum number of training steps.
\n", + "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", + "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", + "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", + "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", + "`batch_size`: int=32, number of different series in each batch.
\n", + "`valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", + "`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", + "`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", + "`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + "`step_size`: int=1, step size between each window of temporal data.
\n", + "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", + "`random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", + "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", + "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", + "`alias`: str, optional, Custom name of the model.
\n", + "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", + "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", + "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", + "\n", + "**References**
\n", + "- [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/deepnpts.py#L18){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DeepNPTS\n", + "\n", + "> DeepNPTS (h, input_size:int=-1, hidden_size:int=32, batch_norm:bool=True,\n", + "> dropout:float=0.1, n_layers:int=2, futr_exog_list=None,\n", + "> hist_exog_list=None, stat_exog_list=None,\n", + "> exclude_insample_y=False, loss=MAE(), valid_loss=MAE(),\n", + "> max_steps:int=1000, learning_rate:float=0.001,\n", + "> num_lr_decays:int=3, early_stop_patience_steps:int=-1,\n", + "> val_check_steps:int=100, batch_size:int=32,\n", + "> valid_batch_size:Optional[int]=None,\n", + "> windows_batch_size:int=1024,\n", + "> inference_windows_batch_size:int=1024,\n", + "> start_padding_enabled=False, step_size:int=1,\n", + "> scaler_type:str='standard', random_seed:int=1,\n", + "> num_workers_loader=0, drop_last_loader=False, optimizer=None,\n", + "> optimizer_kwargs=None, **trainer_kwargs)\n", + "\n", + "DeepNPTS\n", + "\n", + "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by (weighted) sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series.\n", + "\n", + "**Parameters:**
\n", + "`h`: int, Forecast horizon.
\n", + "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", + "`hidden_size`: int=32, hidden size of dense layers.
\n", + "`batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", + "`dropout`: float=0.1, dropout.
\n", + "`n_layers`: int=2, number of dense layers.
\n", + "`stat_exog_list`: str list, static exogenous columns.
\n", + "`hist_exog_list`: str list, historic exogenous columns.
\n", + "`futr_exog_list`: str list, future exogenous columns.
\n", + "`exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", + "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`max_steps`: int=1000, maximum number of training steps.
\n", + "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", + "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", + "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", + "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", + "`batch_size`: int=32, number of different series in each batch.
\n", + "`valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", + "`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", + "`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", + "`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + "`step_size`: int=1, step size between each window of temporal data.
\n", + "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", + "`random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", + "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", + "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", + "`alias`: str, optional, Custom name of the model.
\n", + "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", + "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", + "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", + "\n", + "**References**
\n", + "- [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(DeepNPTS, title_level=3)" ] @@ -284,7 +419,73 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "### DeepNPTS.fit\n", + "\n", + "> DeepNPTS.fit (dataset, val_size=0, test_size=0, random_seed=None,\n", + "> distributed_config=None)\n", + "\n", + "Fit.\n", + "\n", + "The `fit` method, optimizes the neural network's weights using the\n", + "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n", + "and the `loss` function as defined during the initialization.\n", + "Within `fit` we use a PyTorch Lightning `Trainer` that\n", + "inherits the initialization's `self.trainer_kwargs`, to customize\n", + "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n", + "\n", + "The method is designed to be compatible with SKLearn-like classes\n", + "and in particular to be compatible with the StatsForecast library.\n", + "\n", + "By default the `model` is not saving training checkpoints to protect\n", + "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`val_size`: int, validation size for temporal cross-validation.
\n", + "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", + "`test_size`: int, test size for temporal cross-validation.
" + ], + "text/plain": [ + "---\n", + "\n", + "### DeepNPTS.fit\n", + "\n", + "> DeepNPTS.fit (dataset, val_size=0, test_size=0, random_seed=None,\n", + "> distributed_config=None)\n", + "\n", + "Fit.\n", + "\n", + "The `fit` method, optimizes the neural network's weights using the\n", + "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n", + "and the `loss` function as defined during the initialization.\n", + "Within `fit` we use a PyTorch Lightning `Trainer` that\n", + "inherits the initialization's `self.trainer_kwargs`, to customize\n", + "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n", + "\n", + "The method is designed to be compatible with SKLearn-like classes\n", + "and in particular to be compatible with the StatsForecast library.\n", + "\n", + "By default the `model` is not saving training checkpoints to protect\n", + "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`val_size`: int, validation size for temporal cross-validation.
\n", + "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", + "`test_size`: int, test size for temporal cross-validation.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(DeepNPTS.fit, name='DeepNPTS.fit', title_level=3)" ] @@ -293,7 +494,53 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "### DeepNPTS.predict\n", + "\n", + "> DeepNPTS.predict (dataset, test_size=None, step_size=1, random_seed=None,\n", + "> **data_module_kwargs)\n", + "\n", + "Predict.\n", + "\n", + "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`test_size`: int=None, test size for temporal cross-validation.
\n", + "`step_size`: int=1, Step size between each window.
\n", + "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", + "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)." + ], + "text/plain": [ + "---\n", + "\n", + "### DeepNPTS.predict\n", + "\n", + "> DeepNPTS.predict (dataset, test_size=None, step_size=1, random_seed=None,\n", + "> **data_module_kwargs)\n", + "\n", + "Predict.\n", + "\n", + "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`test_size`: int=None, test size for temporal cross-validation.
\n", + "`step_size`: int=1, Step size between each window.
\n", + "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", + "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(DeepNPTS.predict, name='DeepNPTS.predict', title_level=3)" ] @@ -323,7 +570,315 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Seed set to 1\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a683239fc3e5435aad7174b0d136376d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | | 0/? [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "#| eval: false\n", "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds Date: Thu, 18 Apr 2024 15:00:51 +0200 Subject: [PATCH 04/11] deepnpts_firststab --- nbs/common.scalers.ipynb | 4 +- nbs/losses.pytorch.ipynb | 1712 +++++++++++++++++++++++++++-- nbs/models.deepnpts.ipynb | 1137 +++++++++++++++++++ neuralforecast/_modidx.py | 18 + neuralforecast/common/_scalers.py | 4 +- neuralforecast/core.py | 3 + neuralforecast/losses/pytorch.py | 110 +- neuralforecast/models/__init__.py | 4 +- neuralforecast/models/deepnpts.py | 557 ++++++++++ 9 files changed, 3369 insertions(+), 180 deletions(-) create mode 100644 nbs/models.deepnpts.ipynb create mode 100644 neuralforecast/models/deepnpts.py diff --git a/nbs/common.scalers.ipynb b/nbs/common.scalers.ipynb index c06fa0da2..921d5adaf 100644 --- a/nbs/common.scalers.ipynb +++ b/nbs/common.scalers.ipynb @@ -567,8 +567,8 @@ " shape = list(x.shape)\n", " shape[dim] = 1\n", "\n", - " x_shift = torch.zeros(shape)\n", - " x_scale = torch.ones(shape)\n", + " x_shift = torch.zeros(shape, device=x.device)\n", + " x_scale = torch.ones(shape, device=x.device)\n", "\n", " return x_shift, x_scale" ] diff --git a/nbs/losses.pytorch.ipynb b/nbs/losses.pytorch.ipynb index 55cd837b3..36adfaabd 100644 --- a/nbs/losses.pytorch.ipynb +++ b/nbs/losses.pytorch.ipynb @@ -244,7 +244,61 @@ "execution_count": null, "id": "1d004cd0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L85){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MAE.__init__\n", + "\n", + "> MAE.__init__ (horizon_weight=None)\n", + "\n", + "Mean Absolute Error\n", + "\n", + "Calculates Mean Absolute Error between\n", + "`y` and `y_hat`. MAE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the\n", + "deviation of the prediction and the true\n", + "value at a given time and averages these devations\n", + "over the length of the series.\n", + "\n", + "$$ \\mathrm{MAE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} |y_{\\tau} - \\hat{y}_{\\tau}| $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L85){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MAE.__init__\n", + "\n", + "> MAE.__init__ (horizon_weight=None)\n", + "\n", + "Mean Absolute Error\n", + "\n", + "Calculates Mean Absolute Error between\n", + "`y` and `y_hat`. MAE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the\n", + "deviation of the prediction and the true\n", + "value at a given time and averages these devations\n", + "over the length of the series.\n", + "\n", + "$$ \\mathrm{MAE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} |y_{\\tau} - \\hat{y}_{\\tau}| $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MAE, name='MAE.__init__', title_level=3)" ] @@ -254,7 +308,51 @@ "execution_count": null, "id": "0a20a273", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L106){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MAE.__call__\n", + "\n", + "> MAE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies datapoints to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mae`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L106){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MAE.__call__\n", + "\n", + "> MAE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies datapoints to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mae`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MAE.__call__, name='MAE.__call__', title_level=3)" ] @@ -328,7 +426,61 @@ "execution_count": null, "id": "e8c65b82", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L126){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MSE.__init__\n", + "\n", + "> MSE.__init__ (horizon_weight=None)\n", + "\n", + "Mean Squared Error\n", + "\n", + "Calculates Mean Squared Error between\n", + "`y` and `y_hat`. MSE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the \n", + "squared deviation of the prediction and the true\n", + "value at a given time, and averages these devations\n", + "over the length of the series.\n", + "\n", + "$$ \\mathrm{MSE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^{2} $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L126){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MSE.__init__\n", + "\n", + "> MSE.__init__ (horizon_weight=None)\n", + "\n", + "Mean Squared Error\n", + "\n", + "Calculates Mean Squared Error between\n", + "`y` and `y_hat`. MSE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the \n", + "squared deviation of the prediction and the true\n", + "value at a given time, and averages these devations\n", + "over the length of the series.\n", + "\n", + "$$ \\mathrm{MSE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^{2} $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MSE, name='MSE.__init__', title_level=3)" ] @@ -338,7 +490,51 @@ "execution_count": null, "id": "b0126a7f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L147){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MSE.__call__\n", + "\n", + "> MSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies datapoints to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mse`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L147){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MSE.__call__\n", + "\n", + "> MSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies datapoints to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mse`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MSE.__call__, name='MSE.__call__', title_level=3)" ] @@ -416,7 +612,67 @@ "execution_count": null, "id": "d961d383", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L167){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### RMSE.__init__\n", + "\n", + "> RMSE.__init__ (horizon_weight=None)\n", + "\n", + "Root Mean Squared Error\n", + "\n", + "Calculates Root Mean Squared Error between\n", + "`y` and `y_hat`. RMSE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the squared deviation\n", + "of the prediction and the observed value at a given time and\n", + "averages these devations over the length of the series.\n", + "Finally the RMSE will be in the same scale\n", + "as the original time series so its comparison with other\n", + "series is possible only if they share a common scale. \n", + "RMSE has a direct connection to the L2 norm.\n", + "\n", + "$$ \\mathrm{RMSE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\sqrt{\\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^{2}} $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L167){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### RMSE.__init__\n", + "\n", + "> RMSE.__init__ (horizon_weight=None)\n", + "\n", + "Root Mean Squared Error\n", + "\n", + "Calculates Root Mean Squared Error between\n", + "`y` and `y_hat`. RMSE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the squared deviation\n", + "of the prediction and the observed value at a given time and\n", + "averages these devations over the length of the series.\n", + "Finally the RMSE will be in the same scale\n", + "as the original time series so its comparison with other\n", + "series is possible only if they share a common scale. \n", + "RMSE has a direct connection to the L2 norm.\n", + "\n", + "$$ \\mathrm{RMSE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\sqrt{\\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^{2}} $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(RMSE, name='RMSE.__init__', title_level=3)" ] @@ -426,7 +682,51 @@ "execution_count": null, "id": "d398d3e3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L191){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### RMSE.__call__\n", + "\n", + "> RMSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies datapoints to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`rmse`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L191){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### RMSE.__call__\n", + "\n", + "> RMSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies datapoints to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`rmse`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(RMSE.__call__, name='RMSE.__call__', title_level=3)" ] @@ -517,7 +817,69 @@ "execution_count": null, "id": "174e8042", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L212){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MAPE.__init__\n", + "\n", + "> MAPE.__init__ (horizon_weight=None)\n", + "\n", + "Mean Absolute Percentage Error\n", + "\n", + "Calculates Mean Absolute Percentage Error between\n", + "`y` and `y_hat`. MAPE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the percentual deviation\n", + "of the prediction and the observed value at a given time and\n", + "averages these devations over the length of the series.\n", + "The closer to zero an observed value is, the higher penalty MAPE loss\n", + "assigns to the corresponding error.\n", + "\n", + "$$ \\mathrm{MAPE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{|y_{\\tau}|} $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Makridakis S., \"Accuracy measures: theoretical and practical concerns\".](https://www.sciencedirect.com/science/article/pii/0169207093900793)" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L212){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MAPE.__init__\n", + "\n", + "> MAPE.__init__ (horizon_weight=None)\n", + "\n", + "Mean Absolute Percentage Error\n", + "\n", + "Calculates Mean Absolute Percentage Error between\n", + "`y` and `y_hat`. MAPE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the percentual deviation\n", + "of the prediction and the observed value at a given time and\n", + "averages these devations over the length of the series.\n", + "The closer to zero an observed value is, the higher penalty MAPE loss\n", + "assigns to the corresponding error.\n", + "\n", + "$$ \\mathrm{MAPE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{|y_{\\tau}|} $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Makridakis S., \"Accuracy measures: theoretical and practical concerns\".](https://www.sciencedirect.com/science/article/pii/0169207093900793)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MAPE, name='MAPE.__init__', title_level=3)" ] @@ -527,7 +889,51 @@ "execution_count": null, "id": "da63f136", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L237){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MAPE.__call__\n", + "\n", + "> MAPE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mape`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L237){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MAPE.__call__\n", + "\n", + "> MAPE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mape`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MAPE.__call__, name='MAPE.__call__', title_level=3)" ] @@ -609,7 +1015,73 @@ "execution_count": null, "id": "dee99fb8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L259){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### SMAPE.__init__\n", + "\n", + "> SMAPE.__init__ (horizon_weight=None)\n", + "\n", + "Symmetric Mean Absolute Percentage Error\n", + "\n", + "Calculates Symmetric Mean Absolute Percentage Error between\n", + "`y` and `y_hat`. SMAPE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the relative deviation\n", + "of the prediction and the observed value scaled by the sum of the\n", + "absolute values for the prediction and observed value at a\n", + "given time, then averages these devations over the length\n", + "of the series. This allows the SMAPE to have bounds between\n", + "0% and 200% which is desireble compared to normal MAPE that\n", + "may be undetermined when the target is zero.\n", + "\n", + "$$ \\mathrm{sMAPE}_{2}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{|y_{\\tau}|+|\\hat{y}_{\\tau}|} $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Makridakis S., \"Accuracy measures: theoretical and practical concerns\".](https://www.sciencedirect.com/science/article/pii/0169207093900793)" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L259){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### SMAPE.__init__\n", + "\n", + "> SMAPE.__init__ (horizon_weight=None)\n", + "\n", + "Symmetric Mean Absolute Percentage Error\n", + "\n", + "Calculates Symmetric Mean Absolute Percentage Error between\n", + "`y` and `y_hat`. SMAPE measures the relative prediction\n", + "accuracy of a forecasting method by calculating the relative deviation\n", + "of the prediction and the observed value scaled by the sum of the\n", + "absolute values for the prediction and observed value at a\n", + "given time, then averages these devations over the length\n", + "of the series. This allows the SMAPE to have bounds between\n", + "0% and 200% which is desireble compared to normal MAPE that\n", + "may be undetermined when the target is zero.\n", + "\n", + "$$ \\mathrm{sMAPE}_{2}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{|y_{\\tau}|+|\\hat{y}_{\\tau}|} $$\n", + "\n", + "**Parameters:**
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Makridakis S., \"Accuracy measures: theoretical and practical concerns\".](https://www.sciencedirect.com/science/article/pii/0169207093900793)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(SMAPE, name='SMAPE.__init__', title_level=3)" ] @@ -619,7 +1091,51 @@ "execution_count": null, "id": "db62a845", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L286){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### SMAPE.__call__\n", + "\n", + "> SMAPE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`smape`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L286){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### SMAPE.__call__\n", + "\n", + "> SMAPE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`smape`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(SMAPE.__call__, name='SMAPE.__call__', title_level=3)" ] @@ -706,7 +1222,71 @@ "execution_count": null, "id": "b6a4cf21", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L308){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MASE.__init__\n", + "\n", + "> MASE.__init__ (seasonality:int, horizon_weight=None)\n", + "\n", + "Mean Absolute Scaled Error \n", + "Calculates the Mean Absolute Scaled Error between\n", + "`y` and `y_hat`. MASE measures the relative prediction\n", + "accuracy of a forecasting method by comparinng the mean absolute errors\n", + "of the prediction and the observed value against the mean\n", + "absolute errors of the seasonal naive model.\n", + "The MASE partially composed the Overall Weighted Average (OWA), \n", + "used in the M4 Competition.\n", + "\n", + "$$ \\mathrm{MASE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}, \\mathbf{\\hat{y}}^{season}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{\\mathrm{MAE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{season}_{\\tau})} $$\n", + "\n", + "**Parameters:**
\n", + "`seasonality`: int. Main frequency of the time series; Hourly 24, Daily 7, Weekly 52, Monthly 12, Quarterly 4, Yearly 1.\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Rob J. Hyndman, & Koehler, A. B. \"Another look at measures of forecast accuracy\".](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
\n", + "[Spyros Makridakis, Evangelos Spiliotis, Vassilios Assimakopoulos, \"The M4 Competition: 100,000 time series and 61 forecasting methods\".](https://www.sciencedirect.com/science/article/pii/S0169207019301128)" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L308){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MASE.__init__\n", + "\n", + "> MASE.__init__ (seasonality:int, horizon_weight=None)\n", + "\n", + "Mean Absolute Scaled Error \n", + "Calculates the Mean Absolute Scaled Error between\n", + "`y` and `y_hat`. MASE measures the relative prediction\n", + "accuracy of a forecasting method by comparinng the mean absolute errors\n", + "of the prediction and the observed value against the mean\n", + "absolute errors of the seasonal naive model.\n", + "The MASE partially composed the Overall Weighted Average (OWA), \n", + "used in the M4 Competition.\n", + "\n", + "$$ \\mathrm{MASE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}, \\mathbf{\\hat{y}}^{season}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{\\mathrm{MAE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{season}_{\\tau})} $$\n", + "\n", + "**Parameters:**
\n", + "`seasonality`: int. Main frequency of the time series; Hourly 24, Daily 7, Weekly 52, Monthly 12, Quarterly 4, Yearly 1.\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Rob J. Hyndman, & Koehler, A. B. \"Another look at measures of forecast accuracy\".](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
\n", + "[Spyros Makridakis, Evangelos Spiliotis, Vassilios Assimakopoulos, \"The M4 Competition: 100,000 time series and 61 forecasting methods\".](https://www.sciencedirect.com/science/article/pii/S0169207019301128)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MASE, name='MASE.__init__', title_level=3)" ] @@ -716,7 +1296,53 @@ "execution_count": null, "id": "32a2c11b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L335){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MASE.__call__\n", + "\n", + "> MASE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> y_insample:torch.Tensor, mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor (batch_size, output_size), Actual values.
\n", + "`y_hat`: tensor (batch_size, output_size)), Predicted values.
\n", + "`y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mase`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L335){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MASE.__call__\n", + "\n", + "> MASE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> y_insample:torch.Tensor, mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor (batch_size, output_size), Actual values.
\n", + "`y_hat`: tensor (batch_size, output_size)), Predicted values.
\n", + "`y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mase`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MASE.__call__, name='MASE.__call__', title_level=3)" ] @@ -803,7 +1429,69 @@ "execution_count": null, "id": "edeb6f9a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L364){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### relMSE.__init__\n", + "\n", + "> relMSE.__init__ (y_train, horizon_weight=None)\n", + "\n", + "Relative Mean Squared Error\n", + "Computes Relative Mean Squared Error (relMSE), as proposed by Hyndman & Koehler (2006)\n", + "as an alternative to percentage errors, to avoid measure unstability.\n", + "$$ \\mathrm{relMSE}(\\mathbf{y}, \\mathbf{\\hat{y}}, \\mathbf{\\hat{y}}^{naive1}) =\n", + "\\frac{\\mathrm{MSE}(\\mathbf{y}, \\mathbf{\\hat{y}})}{\\mathrm{MSE}(\\mathbf{y}, \\mathbf{\\hat{y}}^{naive1})} $$\n", + "\n", + "**Parameters:**
\n", + "`y_train`: numpy array, Training values.
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "- [Hyndman, R. J and Koehler, A. B. (2006).\n", + " \"Another look at measures of forecast accuracy\",\n", + " International Journal of Forecasting, Volume 22, Issue 4.](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
\n", + "- [Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", + " \"Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. \n", + " Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L364){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### relMSE.__init__\n", + "\n", + "> relMSE.__init__ (y_train, horizon_weight=None)\n", + "\n", + "Relative Mean Squared Error\n", + "Computes Relative Mean Squared Error (relMSE), as proposed by Hyndman & Koehler (2006)\n", + "as an alternative to percentage errors, to avoid measure unstability.\n", + "$$ \\mathrm{relMSE}(\\mathbf{y}, \\mathbf{\\hat{y}}, \\mathbf{\\hat{y}}^{naive1}) =\n", + "\\frac{\\mathrm{MSE}(\\mathbf{y}, \\mathbf{\\hat{y}})}{\\mathrm{MSE}(\\mathbf{y}, \\mathbf{\\hat{y}}^{naive1})} $$\n", + "\n", + "**Parameters:**
\n", + "`y_train`: numpy array, Training values.
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "- [Hyndman, R. J and Koehler, A. B. (2006).\n", + " \"Another look at measures of forecast accuracy\",\n", + " International Journal of Forecasting, Volume 22, Issue 4.](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
\n", + "- [Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", + " \"Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. \n", + " Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(relMSE, name='relMSE.__init__', title_level=3)" ] @@ -813,7 +1501,53 @@ "execution_count": null, "id": "a317b5c5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L391){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### relMSE.__call__\n", + "\n", + "> relMSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor (batch_size, output_size), Actual values.
\n", + "`y_hat`: tensor (batch_size, output_size)), Predicted values.
\n", + "`y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`relMSE`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L391){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### relMSE.__call__\n", + "\n", + "> relMSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor (batch_size, output_size), Actual values.
\n", + "`y_hat`: tensor (batch_size, output_size)), Predicted values.
\n", + "`y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`relMSE`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(relMSE.__call__, name='relMSE.__call__', title_level=3)" ] @@ -898,7 +1632,67 @@ "execution_count": null, "id": "70bd46d9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L418){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### QuantileLoss.__init__\n", + "\n", + "> QuantileLoss.__init__ (q, horizon_weight=None)\n", + "\n", + "Quantile Loss\n", + "\n", + "Computes the quantile loss between `y` and `y_hat`.\n", + "QL measures the deviation of a quantile forecast.\n", + "By weighting the absolute deviation in a non symmetric way, the\n", + "loss pays more attention to under or over estimation.\n", + "A common value for q is 0.5 for the deviation from the median (Pinball loss).\n", + "\n", + "$$ \\mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{(q)}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\Big( (1-q)\\,( \\hat{y}^{(q)}_{\\tau} - y_{\\tau} )_{+} + q\\,( y_{\\tau} - \\hat{y}^{(q)}_{\\tau} )_{+} \\Big) $$\n", + "\n", + "**Parameters:**
\n", + "`q`: float, between 0 and 1. The slope of the quantile loss, in the context of quantile regression, the q determines the conditional quantile level.
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L418){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### QuantileLoss.__init__\n", + "\n", + "> QuantileLoss.__init__ (q, horizon_weight=None)\n", + "\n", + "Quantile Loss\n", + "\n", + "Computes the quantile loss between `y` and `y_hat`.\n", + "QL measures the deviation of a quantile forecast.\n", + "By weighting the absolute deviation in a non symmetric way, the\n", + "loss pays more attention to under or over estimation.\n", + "A common value for q is 0.5 for the deviation from the median (Pinball loss).\n", + "\n", + "$$ \\mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{(q)}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\Big( (1-q)\\,( \\hat{y}^{(q)}_{\\tau} - y_{\\tau} )_{+} + q\\,( y_{\\tau} - \\hat{y}^{(q)}_{\\tau} )_{+} \\Big) $$\n", + "\n", + "**Parameters:**
\n", + "`q`: float, between 0 and 1. The slope of the quantile loss, in the context of quantile regression, the q determines the conditional quantile level.
\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(QuantileLoss, name='QuantileLoss.__init__', title_level=3)" ] @@ -908,7 +1702,51 @@ "execution_count": null, "id": "0b1588e9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L445){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### QuantileLoss.__call__\n", + "\n", + "> QuantileLoss.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies datapoints to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`quantile_loss`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L445){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### QuantileLoss.__call__\n", + "\n", + "> QuantileLoss.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies datapoints to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`quantile_loss`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(QuantileLoss.__call__, name='QuantileLoss.__call__', title_level=3)" ] @@ -1080,7 +1918,87 @@ "execution_count": null, "id": "8f42ec82", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L494){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MQLoss.__init__\n", + "\n", + "> MQLoss.__init__ (level=[80, 90], quantiles=None, horizon_weight=None)\n", + "\n", + "Multi-Quantile loss\n", + "\n", + "Calculates the Multi-Quantile loss (MQL) between `y` and `y_hat`.\n", + "MQL calculates the average multi-quantile Loss for\n", + "a given set of quantiles, based on the absolute \n", + "difference between predicted quantiles and observed values.\n", + "\n", + "$$ \\mathrm{MQL}(\\mathbf{y}_{\\tau},[\\mathbf{\\hat{y}}^{(q_{1})}_{\\tau}, ... ,\\hat{y}^{(q_{n})}_{\\tau}]) = \\frac{1}{n} \\sum_{q_{i}} \\mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{(q_{i})}_{\\tau}) $$\n", + "\n", + "The limit behavior of MQL allows to measure the accuracy \n", + "of a full predictive distribution $\\mathbf{\\hat{F}}_{\\tau}$ with \n", + "the continuous ranked probability score (CRPS). This can be achieved \n", + "through a numerical integration technique, that discretizes the quantiles \n", + "and treats the CRPS integral with a left Riemann approximation, averaging over \n", + "uniformly distanced quantiles. \n", + "\n", + "$$ \\mathrm{CRPS}(y_{\\tau}, \\mathbf{\\hat{F}}_{\\tau}) = \\int^{1}_{0} \\mathrm{QL}(y_{\\tau}, \\hat{y}^{(q)}_{\\tau}) dq $$\n", + "\n", + "**Parameters:**
\n", + "`level`: int list [0,100]. Probability levels for prediction intervals (Defaults median).\n", + "`quantiles`: float list [0., 1.]. Alternative to level, quantiles to estimate from y distribution.\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)
\n", + "[James E. Matheson and Robert L. Winkler, \"Scoring Rules for Continuous Probability Distributions\".](https://www.jstor.org/stable/2629907)" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L494){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MQLoss.__init__\n", + "\n", + "> MQLoss.__init__ (level=[80, 90], quantiles=None, horizon_weight=None)\n", + "\n", + "Multi-Quantile loss\n", + "\n", + "Calculates the Multi-Quantile loss (MQL) between `y` and `y_hat`.\n", + "MQL calculates the average multi-quantile Loss for\n", + "a given set of quantiles, based on the absolute \n", + "difference between predicted quantiles and observed values.\n", + "\n", + "$$ \\mathrm{MQL}(\\mathbf{y}_{\\tau},[\\mathbf{\\hat{y}}^{(q_{1})}_{\\tau}, ... ,\\hat{y}^{(q_{n})}_{\\tau}]) = \\frac{1}{n} \\sum_{q_{i}} \\mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{(q_{i})}_{\\tau}) $$\n", + "\n", + "The limit behavior of MQL allows to measure the accuracy \n", + "of a full predictive distribution $\\mathbf{\\hat{F}}_{\\tau}$ with \n", + "the continuous ranked probability score (CRPS). This can be achieved \n", + "through a numerical integration technique, that discretizes the quantiles \n", + "and treats the CRPS integral with a left Riemann approximation, averaging over \n", + "uniformly distanced quantiles. \n", + "\n", + "$$ \\mathrm{CRPS}(y_{\\tau}, \\mathbf{\\hat{F}}_{\\tau}) = \\int^{1}_{0} \\mathrm{QL}(y_{\\tau}, \\hat{y}^{(q)}_{\\tau}) dq $$\n", + "\n", + "**Parameters:**
\n", + "`level`: int list [0,100]. Probability levels for prediction intervals (Defaults median).\n", + "`quantiles`: float list [0., 1.]. Alternative to level, quantiles to estimate from y distribution.\n", + "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", + "\n", + "**References:**
\n", + "[Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)
\n", + "[James E. Matheson and Robert L. Winkler, \"Scoring Rules for Continuous Probability Distributions\".](https://www.jstor.org/stable/2629907)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MQLoss, name='MQLoss.__init__', title_level=3)" ] @@ -1090,7 +2008,51 @@ "execution_count": null, "id": "bac2237a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L568){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MQLoss.__call__\n", + "\n", + "> MQLoss.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mqloss`: tensor (single value)." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L568){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### MQLoss.__call__\n", + "\n", + "> MQLoss.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "**Parameters:**
\n", + "`y`: tensor, Actual values.
\n", + "`y_hat`: tensor, Predicted values.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns:**
\n", + "`mqloss`: tensor (single value)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(MQLoss.__call__, name='MQLoss.__call__', title_level=3)" ] @@ -1109,7 +2071,17 @@ "execution_count": null, "id": "da37f2ef", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['-lo-98.0', '-lo-80.0', '-median', '-hi-80.0', '-hi-98.0']\n", + "Parameter containing:\n", + "tensor([0.0100, 0.1000, 0.5000, 0.9000, 0.9900])\n" + ] + } + ], "source": [ "# | hide\n", "# Unit tests to check MQLoss' stored quantiles\n", @@ -1654,7 +2626,99 @@ "execution_count": null, "id": "a462101b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L913){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DistributionLoss.__init__\n", + "\n", + "> DistributionLoss.__init__ (distribution, level=[80, 90], quantiles=None,\n", + "> num_samples=1000, return_params=False,\n", + "> **distribution_kwargs)\n", + "\n", + "DistributionLoss\n", + "\n", + "This PyTorch module wraps the `torch.distribution` classes allowing it to \n", + "interact with NeuralForecast models modularly. It shares the negative \n", + "log-likelihood as the optimization objective and a sample method to \n", + "generate empirically the quantiles defined by the `level` list.\n", + "\n", + "Additionally, it implements a distribution transformation that factorizes the\n", + "scale-dependent likelihood parameters into a base scale and a multiplier \n", + "efficiently learnable within the network's non-linearities operating ranges.\n", + "\n", + "Available distributions:
\n", + "- Poisson
\n", + "- Normal
\n", + "- StudentT
\n", + "- NegativeBinomial
\n", + "- Tweedie
\n", + "- Bernoulli (Temporal Classifiers)\n", + "\n", + "**Parameters:**
\n", + "`distribution`: str, identifier of a torch.distributions.Distribution class.
\n", + "`level`: float list [0,100], confidence levels for prediction intervals.
\n", + "`quantiles`: float list [0,1], alternative to level list, target quantiles.
\n", + "`num_samples`: int=500, number of samples for the empirical quantiles.
\n", + "`return_params`: bool=False, wether or not return the Distribution parameters.

\n", + "\n", + "**References:**
\n", + "- [PyTorch Probability Distributions Package: StudentT.](https://pytorch.org/docs/stable/distributions.html#studentt)
\n", + "- [David Salinas, Valentin Flunkert, Jan Gasthaus, Tim Januschowski (2020).\n", + " \"DeepAR: Probabilistic forecasting with autoregressive recurrent networks\". International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207019301888)
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L913){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DistributionLoss.__init__\n", + "\n", + "> DistributionLoss.__init__ (distribution, level=[80, 90], quantiles=None,\n", + "> num_samples=1000, return_params=False,\n", + "> **distribution_kwargs)\n", + "\n", + "DistributionLoss\n", + "\n", + "This PyTorch module wraps the `torch.distribution` classes allowing it to \n", + "interact with NeuralForecast models modularly. It shares the negative \n", + "log-likelihood as the optimization objective and a sample method to \n", + "generate empirically the quantiles defined by the `level` list.\n", + "\n", + "Additionally, it implements a distribution transformation that factorizes the\n", + "scale-dependent likelihood parameters into a base scale and a multiplier \n", + "efficiently learnable within the network's non-linearities operating ranges.\n", + "\n", + "Available distributions:
\n", + "- Poisson
\n", + "- Normal
\n", + "- StudentT
\n", + "- NegativeBinomial
\n", + "- Tweedie
\n", + "- Bernoulli (Temporal Classifiers)\n", + "\n", + "**Parameters:**
\n", + "`distribution`: str, identifier of a torch.distributions.Distribution class.
\n", + "`level`: float list [0,100], confidence levels for prediction intervals.
\n", + "`quantiles`: float list [0,1], alternative to level list, target quantiles.
\n", + "`num_samples`: int=500, number of samples for the empirical quantiles.
\n", + "`return_params`: bool=False, wether or not return the Distribution parameters.

\n", + "\n", + "**References:**
\n", + "- [PyTorch Probability Distributions Package: StudentT.](https://pytorch.org/docs/stable/distributions.html#studentt)
\n", + "- [David Salinas, Valentin Flunkert, Jan Gasthaus, Tim Januschowski (2020).\n", + " \"DeepAR: Probabilistic forecasting with autoregressive recurrent networks\". International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207019301888)
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(DistributionLoss, name='DistributionLoss.__init__', title_level=3)" ] @@ -1664,7 +2728,65 @@ "execution_count": null, "id": "d8c367f8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1040){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DistributionLoss.sample\n", + "\n", + "> DistributionLoss.sample (distr_args:torch.Tensor,\n", + "> num_samples:Optional[int]=None)\n", + "\n", + "Construct the empirical quantiles from the estimated Distribution,\n", + "sampling from it `num_samples` independently.\n", + "\n", + "**Parameters**
\n", + "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", + "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", + " of the resulting distribution.
\n", + "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", + " of the resulting distribution.
\n", + "`num_samples`: int=500, overwrite number of samples for the empirical quantiles.
\n", + "\n", + "**Returns**
\n", + "`samples`: tensor, shape [B,H,`num_samples`].
\n", + "`quantiles`: tensor, empirical quantiles defined by `levels`.
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1040){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DistributionLoss.sample\n", + "\n", + "> DistributionLoss.sample (distr_args:torch.Tensor,\n", + "> num_samples:Optional[int]=None)\n", + "\n", + "Construct the empirical quantiles from the estimated Distribution,\n", + "sampling from it `num_samples` independently.\n", + "\n", + "**Parameters**
\n", + "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", + "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", + " of the resulting distribution.
\n", + "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", + " of the resulting distribution.
\n", + "`num_samples`: int=500, overwrite number of samples for the empirical quantiles.
\n", + "\n", + "**Returns**
\n", + "`samples`: tensor, shape [B,H,`num_samples`].
\n", + "`quantiles`: tensor, empirical quantiles defined by `levels`.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(DistributionLoss.sample, name='DistributionLoss.sample', title_level=3)" ] @@ -1674,7 +2796,75 @@ "execution_count": null, "id": "04e32679", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1083){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DistributionLoss.__call__\n", + "\n", + "> DistributionLoss.__call__ (y:torch.Tensor, distr_args:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "Computes the negative log-likelihood objective function. \n", + "To estimate the following predictive distribution:\n", + "\n", + "$$\\mathrm{P}(\\mathbf{y}_{\\tau}\\,|\\,\\theta) \\quad \\mathrm{and} \\quad -\\log(\\mathrm{P}(\\mathbf{y}_{\\tau}\\,|\\,\\theta))$$\n", + "\n", + "where $\\theta$ represents the distributions parameters. It aditionally \n", + "summarizes the objective signal using a weighted average using the `mask` tensor. \n", + "\n", + "**Parameters**
\n", + "`y`: tensor, Actual values.
\n", + "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", + "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", + " of the resulting distribution.
\n", + "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", + " of the resulting distribution.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns**
\n", + "`loss`: scalar, weighted loss function against which backpropagation will be performed.
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1083){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DistributionLoss.__call__\n", + "\n", + "> DistributionLoss.__call__ (y:torch.Tensor, distr_args:torch.Tensor,\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "Computes the negative log-likelihood objective function. \n", + "To estimate the following predictive distribution:\n", + "\n", + "$$\\mathrm{P}(\\mathbf{y}_{\\tau}\\,|\\,\\theta) \\quad \\mathrm{and} \\quad -\\log(\\mathrm{P}(\\mathbf{y}_{\\tau}\\,|\\,\\theta))$$\n", + "\n", + "where $\\theta$ represents the distributions parameters. It aditionally \n", + "summarizes the objective signal using a weighted average using the `mask` tensor. \n", + "\n", + "**Parameters**
\n", + "`y`: tensor, Actual values.
\n", + "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", + "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", + " of the resulting distribution.
\n", + "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", + " of the resulting distribution.
\n", + "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", + "\n", + "**Returns**
\n", + "`loss`: scalar, weighted loss function against which backpropagation will be performed.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(DistributionLoss.__call__, name='DistributionLoss.__call__', title_level=3)" ] @@ -1684,7 +2874,17 @@ "execution_count": null, "id": "14a7e381", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['', '-lo-98.0', '-lo-80.0', '-median', '-hi-80.0', '-hi-98.0']\n", + "Parameter containing:\n", + "tensor([0.0100, 0.1000, 0.5000, 0.9000, 0.9900])\n" + ] + } + ], "source": [ "# | hide\n", "# Unit tests to check DistributionLoss' stored quantiles\n", @@ -1764,35 +2964,42 @@ " # If True, predict_step will return Distribution's parameters\n", " self.return_params = return_params\n", " if self.return_params:\n", - " self.param_names = [f\"-lambda-{i}\" for i in range(1, n_components + 1)]\n", + " lambda_names = [f\"-lambda-{i}\" for i in range(1, n_components + 1)]\n", + " weight_names = [f\"-weight-{i}\" for i in range(1, n_components + 1)]\n", + " self.param_names = [i for j in zip(lambda_names, weight_names) for i in j]\n", " self.output_names = self.output_names + self.param_names\n", "\n", " # Add first output entry for the sample_mean\n", " self.output_names.insert(0, \"\")\n", "\n", - " self.outputsize_multiplier = n_components\n", + " self.outputsize_multiplier = 2 * n_components\n", " self.is_distribution_output = True\n", "\n", " def domain_map(self, output: torch.Tensor):\n", - " return (output,)#, weights\n", - " \n", - " def scale_decouple(self, \n", - " output,\n", - " loc: Optional[torch.Tensor] = None,\n", - " scale: Optional[torch.Tensor] = None):\n", - " \"\"\" Scale Decouple\n", + " lambdas, weights = output.chunk(2, dim=-1)\n", + " return (lambdas, weights)\n", + "\n", + " def scale_decouple(\n", + " self,\n", + " output,\n", + " loc: Optional[torch.Tensor] = None,\n", + " scale: Optional[torch.Tensor] = None,\n", + " ):\n", + " \"\"\"Scale Decouple\n", "\n", " Stabilizes model's output optimization, by learning residual\n", " variance and residual location based on anchoring `loc`, `scale`.\n", " Also adds domain protection to the distribution parameters.\n", " \"\"\"\n", - " lambdas = output[0]\n", + " lambdas, weights = output\n", + " weights = F.softmax(weights, dim=-1)\n", + "\n", " if (loc is not None) and (scale is not None):\n", " loc = loc.view(lambdas.size(dim=0), 1, -1)\n", " scale = scale.view(lambdas.size(dim=0), 1, -1)\n", " lambdas = (lambdas * scale) + loc\n", " lambdas = F.softplus(lambdas)\n", - " return (lambdas,)\n", + " return (lambdas, weights)\n", "\n", " def sample(self, distr_args, num_samples=None):\n", " \"\"\"\n", @@ -1814,15 +3021,10 @@ " if num_samples is None:\n", " num_samples = self.num_samples\n", "\n", - " lambdas = distr_args[0]\n", + " lambdas, weights = distr_args\n", " B, H, K = lambdas.size()\n", " Q = len(self.quantiles)\n", "\n", - " # Sample K ~ Mult(weights)\n", - " # shared across B, H\n", - " # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)\n", - " weights = (1/K) * torch.ones_like(lambdas, device=lambdas.device)\n", - "\n", " # Avoid loop, vectorize\n", " weights = weights.reshape(-1, K)\n", " lambdas = lambdas.flatten() \n", @@ -1860,7 +3062,7 @@ " \n", " def neglog_likelihood(self,\n", " y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None,):\n", " if mask is None: \n", " mask = (y > 0) * 1\n", @@ -1868,11 +3070,9 @@ " mask = mask * ((y > 0) * 1)\n", "\n", " eps = 1e-10\n", - " lambdas = distr_args[0]\n", + " lambdas, weights = distr_args\n", " B, H, K = lambdas.size()\n", "\n", - " weights = (1/K) * torch.ones_like(lambdas, device=lambdas.device)\n", - "\n", " y = y[:,:,None]\n", " mask = mask[:,:,None]\n", "\n", @@ -1897,7 +3097,7 @@ " return loss\n", "\n", " def __call__(self, y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None):\n", "\n", " return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)\n" @@ -1908,7 +3108,83 @@ "execution_count": null, "id": "62d7daba", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1117){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### PMM.__init__\n", + "\n", + "> PMM.__init__ (n_components=10, level=[80, 90], quantiles=None,\n", + "> num_samples=1000, return_params=False,\n", + "> batch_correlation=False, horizon_correlation=False)\n", + "\n", + "Poisson Mixture Mesh\n", + "\n", + "This Poisson Mixture statistical model assumes independence across groups of \n", + "data $\\mathcal{G}=\\{[g_{i}]\\}$, and estimates relationships within the group.\n", + "\n", + "$$ \\mathrm{P}\\left(\\mathbf{y}_{[b][t+1:t+H]}\\right) = \n", + "\\prod_{ [g_{i}] \\in \\mathcal{G}} \\mathrm{P} \\left(\\mathbf{y}_{[g_{i}][\\tau]} \\right) =\n", + "\\prod_{\\beta\\in[g_{i}]} \n", + "\\left(\\sum_{k=1}^{K} w_k \\prod_{(\\beta,\\tau) \\in [g_i][t+1:t+H]} \\mathrm{Poisson}(y_{\\beta,\\tau}, \\hat{\\lambda}_{\\beta,\\tau,k}) \\right)$$\n", + "\n", + "**Parameters:**
\n", + "`n_components`: int=10, the number of mixture components.
\n", + "`level`: float list [0,100], confidence levels for prediction intervals.
\n", + "`quantiles`: float list [0,1], alternative to level list, target quantiles.
\n", + "`return_params`: bool=False, wether or not return the Distribution parameters.
\n", + "`batch_correlation`: bool=False, wether or not model batch correlations.
\n", + "`horizon_correlation`: bool=False, wether or not model horizon correlations.
\n", + "\n", + "**References:**
\n", + "[Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", + "Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. Submitted to the International \n", + "Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1117){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### PMM.__init__\n", + "\n", + "> PMM.__init__ (n_components=10, level=[80, 90], quantiles=None,\n", + "> num_samples=1000, return_params=False,\n", + "> batch_correlation=False, horizon_correlation=False)\n", + "\n", + "Poisson Mixture Mesh\n", + "\n", + "This Poisson Mixture statistical model assumes independence across groups of \n", + "data $\\mathcal{G}=\\{[g_{i}]\\}$, and estimates relationships within the group.\n", + "\n", + "$$ \\mathrm{P}\\left(\\mathbf{y}_{[b][t+1:t+H]}\\right) = \n", + "\\prod_{ [g_{i}] \\in \\mathcal{G}} \\mathrm{P} \\left(\\mathbf{y}_{[g_{i}][\\tau]} \\right) =\n", + "\\prod_{\\beta\\in[g_{i}]} \n", + "\\left(\\sum_{k=1}^{K} w_k \\prod_{(\\beta,\\tau) \\in [g_i][t+1:t+H]} \\mathrm{Poisson}(y_{\\beta,\\tau}, \\hat{\\lambda}_{\\beta,\\tau,k}) \\right)$$\n", + "\n", + "**Parameters:**
\n", + "`n_components`: int=10, the number of mixture components.
\n", + "`level`: float list [0,100], confidence levels for prediction intervals.
\n", + "`quantiles`: float list [0,1], alternative to level list, target quantiles.
\n", + "`return_params`: bool=False, wether or not return the Distribution parameters.
\n", + "`batch_correlation`: bool=False, wether or not model batch correlations.
\n", + "`horizon_correlation`: bool=False, wether or not model horizon correlations.
\n", + "\n", + "**References:**
\n", + "[Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", + "Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. Submitted to the International \n", + "Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(PMM, name='PMM.__init__', title_level=3)" ] @@ -1918,7 +3194,63 @@ "execution_count": null, "id": "fa8da65c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1206){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### PMM.sample\n", + "\n", + "> PMM.sample (distr_args, num_samples=None)\n", + "\n", + "Construct the empirical quantiles from the estimated Distribution,\n", + "sampling from it `num_samples` independently.\n", + "\n", + "**Parameters**
\n", + "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", + "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", + " of the resulting distribution.
\n", + "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", + " of the resulting distribution.
\n", + "`num_samples`: int=500, overwrites number of samples for the empirical quantiles.
\n", + "\n", + "**Returns**
\n", + "`samples`: tensor, shape [B,H,`num_samples`].
\n", + "`quantiles`: tensor, empirical quantiles defined by `levels`.
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1206){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### PMM.sample\n", + "\n", + "> PMM.sample (distr_args, num_samples=None)\n", + "\n", + "Construct the empirical quantiles from the estimated Distribution,\n", + "sampling from it `num_samples` independently.\n", + "\n", + "**Parameters**
\n", + "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", + "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", + " of the resulting distribution.
\n", + "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", + " of the resulting distribution.
\n", + "`num_samples`: int=500, overwrites number of samples for the empirical quantiles.
\n", + "\n", + "**Returns**
\n", + "`samples`: tensor, shape [B,H,`num_samples`].
\n", + "`quantiles`: tensor, empirical quantiles defined by `levels`.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(PMM.sample, name='PMM.sample', title_level=3)" ] @@ -1928,7 +3260,39 @@ "execution_count": null, "id": "ba75717c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1305){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### PMM.__call__\n", + "\n", + "> PMM.__call__ (y:torch.Tensor, distr_args:Tuple[torch.Tensor],\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "Call self as a function." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1305){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### PMM.__call__\n", + "\n", + "> PMM.__call__ (y:torch.Tensor, distr_args:Tuple[torch.Tensor],\n", + "> mask:Optional[torch.Tensor]=None)\n", + "\n", + "Call self as a function." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(PMM.__call__, name='PMM.__call__', title_level=3)" ] @@ -1947,7 +3311,17 @@ "execution_count": null, "id": "e4a20e21", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['', '-lo-98.0', '-lo-80.0', '-median', '-hi-80.0', '-hi-98.0']\n", + "Parameter containing:\n", + "tensor([0.0100, 0.1000, 0.5000, 0.9000, 0.9900])\n" + ] + } + ], "source": [ "# | hide\n", "# Unit tests to check PMM's stored quantiles\n", @@ -1971,11 +3345,43 @@ "execution_count": null, "id": "a56a2fbe", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "weights.shape (N,H,K) \t torch.Size([2, 2, 3])\n", + "lambdas.shape (N,H,K) \t torch.Size([2, 2, 3])\n", + "samples.shape (N,H,num_samples) torch.Size([2, 2, 1000])\n", + "sample_mean.shape (N,H) torch.Size([2, 2, 1])\n", + "quants.shape (N,H,Q) \t\t torch.Size([2, 2, 5])\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "#| hide\n", - "# Create single mixture and broadcast to N,H,K\n", - "weights = torch.ones((1,3))[None, :, :]\n", + "# Create single mixture and broadcast to N, H, K\n", + "weights = torch.ones((2,3))[None, :, :]\n", "lambdas = torch.Tensor([[5,10,15], [10,20,30]])[None, :, :]\n", "\n", "# Create repetitions for the batch dimension N.\n", @@ -1987,7 +3393,7 @@ "print('lambdas.shape (N,H,K) \\t', lambdas.shape)\n", "\n", "distr = PMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9])\n", - "distr_args = (lambdas,)\n", + "distr_args = (lambdas, weights)\n", "samples, sample_mean, quants = distr.sample(distr_args)\n", "\n", "print('samples.shape (N,H,num_samples) ', samples.shape)\n", @@ -2092,38 +3498,44 @@ " if self.return_params:\n", " mu_names = [f\"-mu-{i}\" for i in range(1, n_components + 1)]\n", " std_names = [f\"-std-{i}\" for i in range(1, n_components + 1)]\n", - " mu_std_names = [i for j in zip(mu_names, std_names) for i in j]\n", - " self.output_names = self.output_names + mu_std_names\n", + " weight_names = [f\"-weight-{i}\" for i in range(1, n_components + 1)]\n", + " self.param_names = [i for j in zip(mu_names, std_names, weight_names) for i in j]\n", + " self.output_names = self.output_names + self.param_names\n", "\n", " # Add first output entry for the sample_mean\n", " self.output_names.insert(0, \"\")\n", "\n", - " self.outputsize_multiplier = 2 * n_components\n", + " self.outputsize_multiplier = 3 * n_components\n", " self.is_distribution_output = True\n", "\n", " def domain_map(self, output: torch.Tensor):\n", - " means, stds = torch.tensor_split(output, 2, dim=-1)\n", - " return (means, stds)\n", + " means, stds, weights = output.chunk(3, dim=-1)\n", + "\n", + " return (means, stds, weights)\n", "\n", - " def scale_decouple(self, \n", - " output,\n", - " loc: Optional[torch.Tensor] = None,\n", - " scale: Optional[torch.Tensor] = None,\n", - " eps: float=0.2):\n", - " \"\"\" Scale Decouple\n", + " def scale_decouple(\n", + " self,\n", + " output,\n", + " loc: Optional[torch.Tensor] = None,\n", + " scale: Optional[torch.Tensor] = None,\n", + " eps: float = 0.2,\n", + " ):\n", + " \"\"\"Scale Decouple\n", "\n", " Stabilizes model's output optimization, by learning residual\n", " variance and residual location based on anchoring `loc`, `scale`.\n", " Also adds domain protection to the distribution parameters.\n", " \"\"\"\n", - " means, stds = output\n", + " means, stds, weights = output\n", " stds = F.softplus(stds)\n", + " weights = F.softmax(weights, dim=-1)\n", " if (loc is not None) and (scale is not None):\n", " loc = loc.view(means.size(dim=0), 1, -1)\n", - " scale = scale.view(means.size(dim=0), 1, -1) \n", + " scale = scale.view(means.size(dim=0), 1, -1)\n", " means = (means * scale) + loc\n", " stds = (stds + eps) * scale\n", - " return (means, stds)\n", + "\n", + " return (means, stds, weights)\n", "\n", " def sample(self, distr_args, num_samples=None):\n", " \"\"\"\n", @@ -2145,17 +3557,11 @@ " if num_samples is None:\n", " num_samples = self.num_samples\n", " \n", - " means, stds = distr_args\n", + " means, stds, weights = distr_args\n", " B, H, K = means.size()\n", " Q = len(self.quantiles)\n", " assert means.shape == stds.shape\n", "\n", - " # Sample K ~ Mult(weights)\n", - " # shared across B, H\n", - " # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)\n", - " \n", - " weights = (1/K) * torch.ones_like(means, device=means.device)\n", - " \n", " # Avoid loop, vectorize\n", " weights = weights.reshape(-1, K)\n", " means = means.flatten()\n", @@ -2195,17 +3601,15 @@ "\n", " def neglog_likelihood(self,\n", " y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None):\n", "\n", " if mask is None: \n", " mask = torch.ones_like(y)\n", " \n", - " means, stds = distr_args\n", + " means, stds, weights = distr_args\n", " B, H, K = means.size()\n", - " \n", - " weights = (1/K) * torch.ones_like(means, device=means.device)\n", - " \n", + " \n", " y = y[:,:, None]\n", " mask = mask[:,:,None]\n", " \n", @@ -2228,7 +3632,7 @@ " return loss\n", " \n", " def __call__(self, y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None,):\n", "\n", " return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)" @@ -2278,7 +3682,17 @@ "execution_count": null, "id": "8ebe4250", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['', '-lo-98.0', '-lo-80.0', '-median', '-hi-80.0', '-hi-98.0']\n", + "Parameter containing:\n", + "tensor([0.0100, 0.1000, 0.5000, 0.9000, 0.9900])\n" + ] + } + ], "source": [ "# | hide\n", "# Unit tests to check PMM's stored quantiles\n", @@ -2302,7 +3716,40 @@ "execution_count": null, "id": "684d2382", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "weights.shape (N,H,K) \t torch.Size([2, 2, 3])\n", + "means.shape (N,H,K) \t torch.Size([2, 2, 3])\n", + "stds.shape (N,H,K) \t torch.Size([2, 2, 3])\n", + "samples.shape (N,H,num_samples) torch.Size([2, 2, 1000])\n", + "sample_mean.shape (N,H) torch.Size([2, 2, 1])\n", + "quants.shape (N,H,Q) \t\t torch.Size([2, 2, 5])\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "#| hide\n", "# Create single mixture and broadcast to N,H,K\n", @@ -2319,7 +3766,7 @@ "print('stds.shape (N,H,K) \\t', stds.shape)\n", "\n", "distr = GMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9])\n", - "distr_args = (means, stds)\n", + "distr_args = (means, stds, weights)\n", "samples, sample_mean, quants = distr.sample(distr_args)\n", "\n", "print('samples.shape (N,H,num_samples) ', samples.shape)\n", @@ -2419,47 +3866,55 @@ " # If True, predict_step will return Distribution's parameters\n", " self.return_params = return_params\n", " if self.return_params:\n", - " total_count_names = [f\"-total_count-{i}\" for i in range(1, n_components + 1)]\n", + " total_count_names = [\n", + " f\"-total_count-{i}\" for i in range(1, n_components + 1)\n", + " ]\n", " probs_names = [f\"-probs-{i}\" for i in range(1, n_components + 1)]\n", - " param_names = [i for j in zip(total_count_names, probs_names) for i in j]\n", - " self.output_names = self.output_names + param_names\n", + " weight_names = [f\"-weight-{i}\" for i in range(1, n_components + 1)]\n", + " self.param_names = [i for j in zip(total_count_names, probs_names, weight_names) for i in j]\n", + " self.output_names = self.output_names + self.param_names\n", "\n", " # Add first output entry for the sample_mean\n", - " self.output_names.insert(0, \"\") \n", + " self.output_names.insert(0, \"\")\n", "\n", - " self.outputsize_multiplier = 2 * n_components\n", + " self.outputsize_multiplier = 3 * n_components\n", " self.is_distribution_output = True\n", "\n", " def domain_map(self, output: torch.Tensor):\n", - " mu, alpha = torch.tensor_split(output, 2, dim=-1)\n", - " return (mu, alpha)\n", + " mu, alpha, weights = output.chunk(3, dim=-1)\n", "\n", - " def scale_decouple(self, \n", - " output,\n", - " loc: Optional[torch.Tensor] = None,\n", - " scale: Optional[torch.Tensor] = None,\n", - " eps: float=0.2):\n", - " \"\"\" Scale Decouple\n", + " return mu, alpha, weights\n", + "\n", + " def scale_decouple(\n", + " self,\n", + " output,\n", + " loc: Optional[torch.Tensor] = None,\n", + " scale: Optional[torch.Tensor] = None,\n", + " eps: float = 1e-6,\n", + " ):\n", + " \"\"\"Scale Decouple\n", "\n", " Stabilizes model's output optimization, by learning residual\n", " variance and residual location based on anchoring `loc`, `scale`.\n", " Also adds domain protection to the distribution parameters.\n", " \"\"\"\n", " # Efficient NBinomial parametrization\n", - " mu, alpha = output\n", - " mu = F.softplus(mu) + 1e-8\n", - " alpha = F.softplus(alpha) + 1e-8 # alpha = 1/total_counts\n", + " mu, alpha, weights = output\n", + " mu = F.softplus(mu) + eps\n", + " alpha = F.softplus(alpha) + eps # alpha = 1/total_counts\n", + " weights = F.softmax(weights, dim=-1)\n", " if (loc is not None) and (scale is not None):\n", " loc = loc.view(mu.size(dim=0), 1, -1)\n", " mu *= loc\n", - " alpha /= (loc + 1.)\n", + " alpha /= loc + 1.0\n", "\n", " # mu = total_count * (probs/(1-probs))\n", " # => probs = mu / (total_count + mu)\n", " # => probs = mu / [total_count * (1 + mu * (1/total_count))]\n", " total_count = 1.0 / alpha\n", - " probs = (mu * alpha / (1.0 + mu * alpha)) + 1e-8 \n", - " return (total_count, probs)\n", + " probs = (mu * alpha / (1.0 + mu * alpha))\n", + " probs = torch.clamp(probs, eps, 1 - eps)\n", + " return (total_count, probs, weights)\n", "\n", " def sample(self, distr_args, num_samples=None):\n", " \"\"\"\n", @@ -2481,16 +3936,10 @@ " if num_samples is None:\n", " num_samples = self.num_samples\n", " \n", - " total_count, probs = distr_args\n", + " total_count, probs, weights = distr_args\n", " B, H, K = total_count.size()\n", " Q = len(self.quantiles)\n", " assert total_count.shape == probs.shape\n", - "\n", - " # Sample K ~ Mult(weights)\n", - " # shared across B, H\n", - " # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)\n", - " \n", - " weights = (1/K) * torch.ones_like(probs, device=probs.device)\n", " \n", " # Avoid loop, vectorize\n", " weights = weights.reshape(-1, K)\n", @@ -2533,17 +3982,15 @@ "\n", " def neglog_likelihood(self,\n", " y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None):\n", "\n", " if mask is None: \n", " mask = torch.ones_like(y)\n", " \n", - " total_count, probs = distr_args\n", + " total_count, probs, weights = distr_args\n", " B, H, K = total_count.size()\n", " \n", - " weights = (1/K) * torch.ones_like(probs, device=probs.device)\n", - " \n", " y = y[:,:, None]\n", " mask = mask[:,:,None]\n", "\n", @@ -2567,7 +4014,7 @@ " return loss\n", " \n", " def __call__(self, y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None,):\n", "\n", " return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)" @@ -2608,7 +4055,40 @@ "execution_count": null, "id": "b67e2931", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "weights.shape (N,H,K) \t torch.Size([2, 2, 3])\n", + "counts.shape (N,H,K) \t torch.Size([2, 2, 3])\n", + "probs.shape (N,H,K) \t torch.Size([2, 2, 3])\n", + "samples.shape (N,H,num_samples) torch.Size([2, 2, 2000])\n", + "sample_mean.shape (N,H) torch.Size([2, 2, 1])\n", + "quants.shape (N,H,Q) \t\t torch.Size([2, 2, 5])\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "#| hide\n", "# Create single mixture and broadcast to N,H,K\n", @@ -2625,7 +4105,7 @@ "print('probs.shape (N,H,K) \\t', probs.shape)\n", "\n", "model = NBMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9])\n", - "distr_args = (counts, probs)\n", + "distr_args = (counts, probs, weights)\n", "samples, sample_mean, quants = model.sample(distr_args, num_samples=2000)\n", "\n", "print('samples.shape (N,H,num_samples) ', samples.shape)\n", diff --git a/nbs/models.deepnpts.ipynb b/nbs/models.deepnpts.ipynb new file mode 100644 index 000000000..6bafac332 --- /dev/null +++ b/nbs/models.deepnpts.ipynb @@ -0,0 +1,1137 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp models.deepnpts" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# DeepNPTS" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a non-parametric baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a tunable strategy. This strategy is learned by exploiting the information across multiple related time series. This model provides a strong, simple baseline for time series forecasting.\n", + "\n", + "\n", + "**References**
\n", + "[Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
\n", + "\n", + "\n", + ":::{.callout-warning collapse=\"false\"}\n", + "#### Exogenous Variables, Losses, and Parameters Availability\n", + "\n", + "Given the sampling procedure during inference, DeepNPTS only supports `DistributionLoss` as training loss.\n", + "\n", + "Note that DeepNPTS generates a non-parametric forecast distribution using Monte Carlo. We use this sampling procedure also during validation to make it closer to the inference procedure. Therefore, only the `MQLoss` is available for validation.\n", + "\n", + "Aditionally, Monte Carlo implies that historic exogenous variables are not available for the model.\n", + ":::" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import numpy as np\n", + "\n", + "import torch\n", + "import torch.nn as nn\n", + "import neuralforecast.losses.pytorch as losses\n", + "from typing import Optional\n", + "from functools import partial\n", + "\n", + "\n", + "from neuralforecast.common._base_windows import BaseWindows\n", + "from neuralforecast.losses.pytorch import MQLoss, GMM, PMM, NBMM\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "import logging\n", + "import warnings\n", + "\n", + "from fastcore.test import test_eq\n", + "from nbdev.showdoc import show_doc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. DeepNPTS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class DeepNPTS(BaseWindows):\n", + " \"\"\" DeepNPTS\n", + "\n", + " Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. \n", + "\n", + " **Parameters:**
\n", + " `h`: int, Forecast horizon.
\n", + " `input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", + " `hidden_size`: int=32, hidden size of dense layers.
\n", + " `batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", + " `dropout`: float=0.1, dropout.
\n", + " `n_layers`: int=2, number of dense layers.
\n", + " `trajectory_samples`: int=100, number of Monte Carlo trajectories during inference.
\n", + " `stat_exog_list`: str list, static exogenous columns.
\n", + " `hist_exog_list`: str list, historic exogenous columns.
\n", + " `futr_exog_list`: str list, future exogenous columns.
\n", + " `exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", + " `loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + " `valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + " `max_steps`: int=1000, maximum number of training steps.
\n", + " `learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", + " `num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", + " `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", + " `val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", + " `batch_size`: int=32, number of different series in each batch.
\n", + " `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", + " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", + " `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", + " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `step_size`: int=1, step size between each window of temporal data.
\n", + " `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", + " `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", + " `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", + " `drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", + " `alias`: str, optional, Custom name of the model.
\n", + " `optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", + " `optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", + " `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", + "\n", + " **References**
\n", + " - [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
\n", + "\n", + " \"\"\"\n", + " # Class attributes\n", + " SAMPLING_TYPE = 'windows'\n", + " \n", + " def __init__(self,\n", + " h,\n", + " input_size: int = -1,\n", + " hidden_size: int = 32,\n", + " batch_norm: bool = True,\n", + " dropout: float = 0.1,\n", + " n_layers: int = 2,\n", + " trajectory_samples: int = 100,\n", + " futr_exog_list = None,\n", + " hist_exog_list = None,\n", + " stat_exog_list = None,\n", + " exclude_insample_y = False,\n", + " loss = GMM(),\n", + " valid_loss = MQLoss(level=[80, 90]),\n", + " max_steps: int = 1000,\n", + " learning_rate: float = 1e-5,\n", + " num_lr_decays: int = 3,\n", + " early_stop_patience_steps: int =-1,\n", + " val_check_steps: int = 100,\n", + " batch_size: int = 32,\n", + " valid_batch_size: Optional[int] = None,\n", + " windows_batch_size: int = 1024,\n", + " inference_windows_batch_size: int = -1,\n", + " start_padding_enabled = False,\n", + " step_size: int = 1,\n", + " scaler_type: str = 'standard',\n", + " random_seed: int = 1,\n", + " num_workers_loader = 0,\n", + " drop_last_loader = False,\n", + " optimizer = None,\n", + " optimizer_kwargs = None,\n", + " **trainer_kwargs):\n", + "\n", + " if hist_exog_list is not None:\n", + " raise Exception('DeepNPTS does not support historical exogenous variables.')\n", + "\n", + " if exclude_insample_y:\n", + " raise Exception('DeepNPTS has no possibility for excluding y.')\n", + " \n", + " supported_losses = (losses.GMM,\n", + " losses.PMM,\n", + " losses.NBMM)\n", + "\n", + " if not isinstance(loss, supported_losses):\n", + " raise Exception('DeepNPTS only supports GMM, PMM or NBMM as loss function.') \n", + " \n", + " if not isinstance(valid_loss, losses.MQLoss):\n", + " raise Exception('DeepNPTS only supports MQLoss as validation loss.')\n", + " \n", + " # Overwrite n_components, it has to be the input_size in DeepNPTS\n", + " loss.n_components = input_size\n", + " \n", + " # Inherit BaseWindows class\n", + " super(DeepNPTS, self).__init__(h=h,\n", + " input_size=input_size,\n", + " futr_exog_list=futr_exog_list,\n", + " hist_exog_list=hist_exog_list,\n", + " stat_exog_list=stat_exog_list,\n", + " exclude_insample_y = exclude_insample_y,\n", + " loss=loss,\n", + " valid_loss=valid_loss,\n", + " max_steps=max_steps,\n", + " learning_rate=learning_rate,\n", + " num_lr_decays=num_lr_decays,\n", + " early_stop_patience_steps=early_stop_patience_steps,\n", + " val_check_steps=val_check_steps,\n", + " batch_size=batch_size,\n", + " windows_batch_size=windows_batch_size,\n", + " valid_batch_size=valid_batch_size,\n", + " inference_windows_batch_size=inference_windows_batch_size,\n", + " start_padding_enabled=start_padding_enabled,\n", + " step_size=step_size,\n", + " scaler_type=scaler_type,\n", + " num_workers_loader=num_workers_loader,\n", + " drop_last_loader=drop_last_loader,\n", + " random_seed=random_seed,\n", + " optimizer=optimizer,\n", + " optimizer_kwargs=optimizer_kwargs,\n", + " **trainer_kwargs)\n", + "\n", + " self.h = h\n", + " self.h_backup = self.h # Used because h=1 during training\n", + " self.use_softmax = True\n", + " self.hidden_size = hidden_size\n", + " self.dropout = dropout\n", + " self.trajectory_samples = trajectory_samples\n", + "\n", + " self.futr_exog_size = len(self.futr_exog_list)\n", + " self.stat_exog_size = len(self.stat_exog_list)\n", + "\n", + " input_dim = input_size * (1 + self.futr_exog_size) + self.stat_exog_size\n", + " # Create DeepNPTSNetwork\n", + " modules = [] \n", + " for i in range(n_layers):\n", + " modules.append(nn.Linear(input_dim if i == 0 else hidden_size, hidden_size))\n", + " modules.append(nn.ReLU())\n", + " if batch_norm:\n", + " modules.append(nn.BatchNorm1d(hidden_size))\n", + " if dropout > 0.0:\n", + " modules.append(nn.Dropout(dropout))\n", + "\n", + " self.deepnptsnetwork = nn.Sequential(*modules)\n", + " self.deepnptsnetwork.apply(partial(self._init_weights, scale=0.07))\n", + "\n", + " # Add output layers for Mixture distribution \n", + " output_modules = []\n", + " if dropout > 0.0:\n", + " output_modules.append(nn.Dropout(self.dropout))\n", + " \n", + " if isinstance(loss, GMM):\n", + " output_modules.append(nn.Linear(hidden_size, input_size + 1))\n", + " elif isinstance(loss, PMM):\n", + " output_modules.append(nn.Linear(hidden_size, input_size))\n", + " elif isinstance(loss, NBMM):\n", + " output_modules.append(nn.Linear(hidden_size, input_size))\n", + "\n", + " self.output_layer = nn.Sequential(*output_modules)\n", + " self.output_layer.apply(self._init_weights)\n", + "\n", + "\n", + " @staticmethod\n", + " def _init_weights(module, scale=1.0):\n", + " if type(module) == nn.Linear:\n", + " nn.init.uniform_(module.weight, -scale, scale)\n", + " nn.init.zeros_(module.bias)\n", + "\n", + " def _domain_map(self, o_t, insample_y):\n", + " if isinstance(self.loss, GMM):\n", + " weights = o_t[:, :-1] # [B, L + 1] -> [B, L]\n", + " kernel_width = o_t[:, -1:] # [B, L + 1] -> [B, 1]\n", + " kernel_width = torch.repeat_interleave(input=kernel_width,\n", + " repeats=weights.shape[1],\n", + " dim=-1) # [B, 1] -> [B, L]\n", + " output = torch.cat([insample_y, kernel_width, weights], dim=-1) # [B, L] + [B, L] + [B, L] = [B, 3 * L]\n", + " output = output.unsqueeze(1) # [B, 3 * L] = [B, 1, 3 * L]\n", + " elif isinstance(self.loss, PMM):\n", + " weights = o_t # [B, L] -> [B, L]\n", + " output = torch.cat([insample_y, weights], dim=-1) # [B, L] + [B, L] = [B, 2 * L]\n", + " output = output.unsqueeze(1) # [B, 2 * L] = [B, 1, 2 * L] \n", + " elif isinstance(self.loss, NBMM):\n", + " weights = torch.ones_like(o_t) # [B, L] -> [B, L]\n", + " output = torch.cat([insample_y, o_t, weights], dim=-1) # [B, L] + [B, L] + [B, L] = [B, 3 * L]\n", + " output = output.unsqueeze(1) # [B, 3 * L] = [B, 1, 3 * \n", + "\n", + " else:\n", + " raise NotImplementedError\n", + " \n", + " return output\n", + "\n", + " # Override BaseWindows method\n", + " def training_step(self, batch, batch_idx):\n", + " \n", + " # Only train one-step ahead\n", + " self.h = 1\n", + " self.quantiles = self.loss.quantiles\n", + "\n", + " # Create and normalize windows [Ws, L+H, C]\n", + " y_idx = batch[\"y_idx\"]\n", + " windows = self._create_windows(batch, step=\"train\")\n", + " original_outsample_y = torch.clone(windows[\"temporal\"][:, -self.h :, y_idx])\n", + " windows = self._normalization(windows=windows, y_idx=y_idx)\n", + "\n", + " # Parse windows\n", + " (\n", + " insample_y,\n", + " insample_mask,\n", + " outsample_y,\n", + " outsample_mask,\n", + " _,\n", + " futr_exog,\n", + " stat_exog,\n", + " ) = self._parse_windows(batch, windows)\n", + "\n", + " windows_batch = dict(\n", + " insample_y=insample_y, # [Ws, L]\n", + " insample_mask=insample_mask, # [Ws, L]\n", + " futr_exog=futr_exog, # [Ws, L+H]\n", + " hist_exog=None, \n", + " stat_exog=stat_exog, # [Ws, 1]\n", + " y_idx=y_idx # [Ws, 1]\n", + " ) \n", + "\n", + " # Model Predictions\n", + " output = self.train_forward(windows_batch)\n", + "\n", + " _, y_loc, y_scale = self._inv_normalization(\n", + " y_hat=outsample_y, \n", + " temporal_cols=batch[\"temporal_cols\"], \n", + " y_idx=y_idx\n", + " )\n", + " # outsample_y = original_insample_y\n", + " outsample_y = original_outsample_y\n", + " distr_args = self.loss.scale_decouple(\n", + " output=output, loc=y_loc, scale=y_scale\n", + " )\n", + " loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n", + "\n", + " if torch.isnan(loss):\n", + " print(\"Model Parameters\", self.hparams)\n", + " print(\"insample_y\", torch.isnan(insample_y).sum())\n", + " print(\"outsample_y\", torch.isnan(outsample_y).sum())\n", + " print(\"output\", torch.isnan(output).sum())\n", + " raise Exception(\"Loss is NaN, training stopped.\")\n", + "\n", + " self.log(\"train_loss\", loss, prog_bar=True, on_epoch=True)\n", + " self.train_trajectories.append((self.global_step, float(loss)))\n", + "\n", + " self.h = self.h_backup \n", + " \n", + " return loss\n", + "\n", + " # Override BaseWindows method\n", + " def validation_step(self, batch, batch_idx):\n", + "\n", + " self.h = self.h_backup\n", + " self.quantiles = self.valid_loss.quantiles\n", + "\n", + " if self.val_size == 0:\n", + " return np.nan\n", + "\n", + " # TODO: Hack to compute number of windows\n", + " windows = self._create_windows(batch, step=\"val\")\n", + " n_windows = len(windows[\"temporal\"])\n", + " y_idx = batch[\"y_idx\"]\n", + "\n", + " # Number of windows in batch\n", + " windows_batch_size = self.inference_windows_batch_size\n", + " if windows_batch_size < 0:\n", + " windows_batch_size = n_windows\n", + " n_batches = int(np.ceil(n_windows / windows_batch_size))\n", + "\n", + " valid_losses = []\n", + " batch_sizes = []\n", + " for i in range(n_batches):\n", + " # Create and normalize windows [Ws, L+H, C]\n", + " w_idxs = np.arange(\n", + " i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows)\n", + " )\n", + " windows = self._create_windows(batch, step=\"val\", w_idxs=w_idxs)\n", + " original_outsample_y = torch.clone(windows[\"temporal\"][:, -self.h:, 0])\n", + " windows = self._normalization(windows=windows, y_idx=y_idx)\n", + "\n", + " # Parse windows\n", + " (\n", + " insample_y,\n", + " insample_mask,\n", + " _,\n", + " outsample_mask,\n", + " _,\n", + " futr_exog,\n", + " stat_exog,\n", + " ) = self._parse_windows(batch, windows)\n", + " \n", + " windows_batch = dict(\n", + " insample_y=insample_y, # [Ws, L]\n", + " insample_mask=insample_mask, # [Ws, L]\n", + " futr_exog=futr_exog, # [Ws, L+H]\n", + " hist_exog=None, # [Ws, L]\n", + " stat_exog=stat_exog,\n", + " y_idx=y_idx,\n", + " ) # [Ws, 1]\n", + "\n", + " # Model Predictions\n", + " output_batch = self(windows_batch)\n", + " # Monte Carlo already returns y_hat with mean and quantiles\n", + " output_batch = output_batch[:,:, 1:] # Remove mean\n", + " valid_loss_batch = self.valid_loss(y=original_outsample_y, y_hat=output_batch, mask=outsample_mask)\n", + " valid_losses.append(valid_loss_batch)\n", + " batch_sizes.append(len(output_batch))\n", + "\n", + " valid_loss = torch.stack(valid_losses)\n", + " batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device)\n", + " valid_loss = torch.sum(valid_loss * batch_sizes) / torch.sum(batch_sizes)\n", + "\n", + " if torch.isnan(valid_loss):\n", + " raise Exception(\"Loss is NaN, training stopped.\")\n", + "\n", + " self.log(\"valid_loss\", valid_loss, prog_bar=True, on_epoch=True)\n", + " self.validation_step_outputs.append(valid_loss)\n", + " return valid_loss\n", + "\n", + " # Override BaseWindows method\n", + " def predict_step(self, batch, batch_idx):\n", + "\n", + " self.h == self.h_backup\n", + " self.quantiles = self.loss.quantiles\n", + "\n", + " # TODO: Hack to compute number of windows\n", + " windows = self._create_windows(batch, step='predict')\n", + " n_windows = len(windows['temporal'])\n", + " y_idx = batch['y_idx']\n", + "\n", + " # Number of windows in batch\n", + " windows_batch_size = self.inference_windows_batch_size\n", + " if windows_batch_size < 0:\n", + " windows_batch_size = n_windows\n", + " n_batches = int(np.ceil(n_windows/windows_batch_size))\n", + "\n", + " y_hats = []\n", + " for i in range(n_batches):\n", + " # Create and normalize windows [Ws, L+H, C]\n", + " w_idxs = np.arange(i*windows_batch_size, \n", + " min((i+1)*windows_batch_size, n_windows))\n", + " windows = self._create_windows(batch, step='predict', w_idxs=w_idxs)\n", + " windows = self._normalization(windows=windows, y_idx=y_idx)\n", + "\n", + " # Parse windows\n", + " insample_y, insample_mask, _, _, _, futr_exog, stat_exog = self._parse_windows(batch, windows)\n", + " windows_batch = dict(insample_y=insample_y, # [Ws, L]\n", + " insample_mask=insample_mask, # [Ws, L]\n", + " futr_exog=futr_exog, # [Ws, L+H]\n", + " stat_exog=stat_exog,\n", + " y_idx=y_idx)\n", + " \n", + " # Model Predictions\n", + " y_hat = self(windows_batch)\n", + " # Monte Carlo already returns y_hat with mean and quantiles\n", + " y_hats.append(y_hat)\n", + " y_hat = torch.cat(y_hats, dim=0)\n", + " return y_hat\n", + "\n", + " def train_forward(self, windows_batch):\n", + " # Parse windows_batch\n", + " x_t = windows_batch['insample_y'].unsqueeze(-1) # [B, L, 1]\n", + " futr_exog = windows_batch['futr_exog'] # [B, L + h, F]\n", + " stat_exog = windows_batch['stat_exog'] # [B, S]\n", + "\n", + " batch_size, seq_len = x_t.shape[:2] # B = batch_size, L = seq_len\n", + "\n", + " # Concatenate x_t with future exogenous\n", + " if self.futr_exog_size > 0: \n", + " futr_exog_t = futr_exog[:, :seq_len] # [B, L + h, F] -> [B, L, F]\n", + " x_t = torch.cat((x_t, futr_exog_t), dim=2) # [B, L, 1] + [B, L, F] -> [B, L, 1 + F] \n", + " \n", + " x_t = x_t.reshape(batch_size, -1) # [B, L, 1 + F] -> [B, L * (1 + F)]\n", + "\n", + " # Concatenate x_t with static exogenous\n", + " if self.stat_exog_size > 0:\n", + " x_t = torch.cat((x_t, stat_exog), dim=1) # [B, L * (1 + F)] + [B, S] -> [B, L * (1 + F) + S]\n", + "\n", + " # Run through DeepNPTSNetwork\n", + " h_t = self.deepnptsnetwork(x_t) # [B, L * (1 + F) + S] -> [B, hidden_size]\n", + " o_t = self.output_layer(h_t) # [B, hidden_size] -> [B, L + 1]\n", + "\n", + " output = self._domain_map(o_t, windows_batch['insample_y']) # [B, L + 1], [B, L] -> [B, 3 * L]\n", + " output = self.loss.domain_map(output) # [B, 3 * L] -> ([B, L], [B, L], [B, L])\n", + "\n", + " return output\n", + "\n", + " def forward(self, windows_batch):\n", + " # Parse windows_batch\n", + " insample_y_t = windows_batch['insample_y'].unsqueeze(-1) # [B, L, 1]\n", + " futr_exog = windows_batch['futr_exog'] # [B, L + h, F]\n", + " stat_exog = windows_batch['stat_exog'] # [B, S]\n", + " y_idx = windows_batch['y_idx']\n", + "\n", + " batch_size, seq_len = insample_y_t.shape[:2] # B = batch_size, L = seq_len\n", + " device = insample_y_t.device\n", + " dtype = insample_y_t.dtype\n", + "\n", + " # Repeat insample_y for trajectory samples\n", + " insample_y_t = torch.repeat_interleave(input=insample_y_t, \n", + " repeats=self.trajectory_samples, \n", + " dim=0) # [B, L, 1] -> [B * n_samples, L, 1]\n", + " \n", + " # Input x_t is insample_y at time t\n", + " x_t = insample_y_t\n", + "\n", + " # Repeat futr_exog if available for trajectory samples and add to x_t \n", + " if self.futr_exog_size > 0: \n", + " futr_exog = torch.repeat_interleave(input=futr_exog, \n", + " repeats=self.trajectory_samples, \n", + " dim=0) # [B, L + h, F] -> [B * n_samples, L + h, F] \n", + " x_t = torch.cat((x_t, futr_exog[:, :seq_len]), dim=2) # [B * n_samples, L, 1] + [B * n_samples, L, F] -> [B * n_samples, L, 1 + F] \n", + " \n", + " x_t = x_t.reshape(batch_size * self.trajectory_samples, -1) # [B * n_samples, L, 1 + F] -> [B * n_samples, L * (1 + F)]\n", + "\n", + " # Repeat stat_exog if available for trajectory samples and add to x_t\n", + " if self.stat_exog_size > 0:\n", + " stat_exog = torch.repeat_interleave(\n", + " input=stat_exog, \n", + " repeats=self.trajectory_samples, \n", + " dim=0) # [B, S] -> [B * n_samples, S] \n", + " x_t = torch.cat((x_t, stat_exog), dim=1) # [B * n_samples, L * (1 + F)] + [B * n_samples, S] -> [B * n_samples, L * (1 + F) + S]\n", + "\n", + " # Scales for inverse normalization\n", + " y_scale = self.scaler.x_scale[:, :, y_idx]\n", + " y_loc = self.scaler.x_shift[:, :, y_idx]\n", + " y_scale = torch.repeat_interleave(input=y_scale, \n", + " repeats=self.trajectory_samples, \n", + " dim=0)\n", + " y_loc = torch.repeat_interleave(input=y_loc, \n", + " repeats=self.trajectory_samples, \n", + " dim=0)\n", + " # Create forecasts tensor\n", + " forecasts = torch.zeros((batch_size, \n", + " self.h,\n", + " len(self.quantiles) + 1), \n", + " device=device, \n", + " dtype=dtype)\n", + " \n", + " # Recursive predictions\n", + " for t in range(self.h):\n", + " # Run input throught DeepNPTSNetwork\n", + " h_t = self.deepnptsnetwork(x_t) # [B * n_samples, L * (1 + F) + S] -> [B, hidden_size]\n", + " o_t = self.output_layer(h_t) # [B * n_samples, hidden_size] -> [B * n_samples, L (+ 1)]\n", + " output = self._domain_map(o_t, insample_y_t.squeeze(-1)) # [B * n_samples, L + 1], [B * n_samples, L] -> [B * n_samples, 3 * L]\n", + " output = self.loss.domain_map(output) # [B * n_samples, 3 * L] -> ([B * n_samples, L], [B * n_samples, L], [B * n_samples, L])\n", + "\n", + " # Inverse normalization\n", + " distr_args = self.loss.scale_decouple(output=output, \n", + " loc=y_loc, \n", + " scale=y_scale)\n", + "\n", + " # Sample and create probabilistic outputs\n", + " samples_t_flat, _, _ = self.loss.sample(distr_args=distr_args, \n", + " num_samples=1)\n", + "\n", + " samples_t_flat = samples_t_flat.squeeze()\n", + " samples_t = samples_t_flat.reshape(batch_size, \n", + " self.trajectory_samples) # [B * n_samples] -> [B, n_samples] \n", + " \n", + " samples_t_mean = torch.mean(samples_t, dim=-1) # [B, n_samples] -> [B] \n", + " quantiles_t = torch.quantile(input=samples_t, \n", + " q=self.quantiles, \n", + " dim=-1) # [B, n_samples] -> [Q, B]\n", + " forecasts[:, t, 0] = samples_t_mean\n", + " forecasts[:, t, 1:] = quantiles_t.permute(1, 0)\n", + "\n", + " insample_y_t_next = self.scaler.scaler(samples_t_flat, \n", + " y_loc.squeeze(), \n", + " y_scale.squeeze()) # [B * n_samples] -> [B * n_samples]\n", + " insample_y_t_next = insample_y_t_next.unsqueeze(-1)\\\n", + " .unsqueeze(-1) # [B * n_samples] -> [B * n_samples, 1, 1]\n", + "\n", + " # Update insample_y_t \n", + " insample_y_t = torch.cat([insample_y_t[:, 1:], \n", + " insample_y_t_next], \n", + " dim=1) # [B * n_samples, L - 1, 1] + [B * n_samples, 1, 1] -> [B * n_samples, L, 1]\n", + " \n", + " # Update input\n", + " x_t = insample_y_t\n", + " # Concatenate x_t with future exogenous\n", + " if self.futr_exog_size > 0: \n", + " x_t = torch.cat((x_t, \n", + " futr_exog[:, t:seq_len + t]), \n", + " dim=2) # [B * n_samples, L, 1] + [B * n_samples, L, F] -> [B * n_samples, L, 1 + F] \n", + " \n", + " x_t = x_t.reshape(batch_size * self.trajectory_samples\n", + " , -1) # [B * n_samples, L, 1 + F] -> [B * n_samples, L * (1 + F)]\n", + "\n", + " # Concatenate x_t with static exogenous\n", + " if self.stat_exog_size > 0:\n", + " x_t = torch.cat((x_t, stat_exog), dim=1) # [B * n_samples, L * (1 + F)] + [B * n_samples, S] -> [B * n_samples, L * (1 + F) + S]\n", + " \n", + " return forecasts\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/deepnpts.py#L20){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DeepNPTS\n", + "\n", + "> DeepNPTS (h, input_size:int=-1, hidden_size:int=32, batch_norm:bool=True,\n", + "> dropout:float=0.5, n_layers:int=2, trajectory_samples:int=100,\n", + "> futr_exog_list=None, hist_exog_list=None, stat_exog_list=None,\n", + "> exclude_insample_y=False, loss=GMM(), valid_loss=MQLoss(),\n", + "> max_steps:int=1000, learning_rate:float=0.001,\n", + "> num_lr_decays:int=3, early_stop_patience_steps:int=-1,\n", + "> val_check_steps:int=100, batch_size:int=32,\n", + "> valid_batch_size:Optional[int]=None,\n", + "> windows_batch_size:int=1024,\n", + "> inference_windows_batch_size:int=-1,\n", + "> start_padding_enabled=False, step_size:int=1,\n", + "> scaler_type:str='standard', random_seed:int=1,\n", + "> num_workers_loader=0, drop_last_loader=False, optimizer=None,\n", + "> optimizer_kwargs=None, **trainer_kwargs)\n", + "\n", + "DeepNPTS\n", + "\n", + "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. \n", + "\n", + "**Parameters:**
\n", + "`h`: int, Forecast horizon.
\n", + "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", + "`hidden_size`: int=32, hidden size of dense layers.
\n", + "`batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", + "`dropout`: float=0.1, dropout.
\n", + "`n_layers`: int=2, number of dense layers.
\n", + "`trajectory_samples`: int=100, number of Monte Carlo trajectories during inference.
\n", + "`stat_exog_list`: str list, static exogenous columns.
\n", + "`hist_exog_list`: str list, historic exogenous columns.
\n", + "`futr_exog_list`: str list, future exogenous columns.
\n", + "`exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", + "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`max_steps`: int=1000, maximum number of training steps.
\n", + "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", + "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", + "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", + "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", + "`batch_size`: int=32, number of different series in each batch.
\n", + "`valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", + "`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", + "`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", + "`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + "`step_size`: int=1, step size between each window of temporal data.
\n", + "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", + "`random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", + "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", + "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", + "`alias`: str, optional, Custom name of the model.
\n", + "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", + "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", + "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", + "\n", + "**References**
\n", + "- [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/deepnpts.py#L20){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DeepNPTS\n", + "\n", + "> DeepNPTS (h, input_size:int=-1, hidden_size:int=32, batch_norm:bool=True,\n", + "> dropout:float=0.5, n_layers:int=2, trajectory_samples:int=100,\n", + "> futr_exog_list=None, hist_exog_list=None, stat_exog_list=None,\n", + "> exclude_insample_y=False, loss=GMM(), valid_loss=MQLoss(),\n", + "> max_steps:int=1000, learning_rate:float=0.001,\n", + "> num_lr_decays:int=3, early_stop_patience_steps:int=-1,\n", + "> val_check_steps:int=100, batch_size:int=32,\n", + "> valid_batch_size:Optional[int]=None,\n", + "> windows_batch_size:int=1024,\n", + "> inference_windows_batch_size:int=-1,\n", + "> start_padding_enabled=False, step_size:int=1,\n", + "> scaler_type:str='standard', random_seed:int=1,\n", + "> num_workers_loader=0, drop_last_loader=False, optimizer=None,\n", + "> optimizer_kwargs=None, **trainer_kwargs)\n", + "\n", + "DeepNPTS\n", + "\n", + "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. \n", + "\n", + "**Parameters:**
\n", + "`h`: int, Forecast horizon.
\n", + "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", + "`hidden_size`: int=32, hidden size of dense layers.
\n", + "`batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", + "`dropout`: float=0.1, dropout.
\n", + "`n_layers`: int=2, number of dense layers.
\n", + "`trajectory_samples`: int=100, number of Monte Carlo trajectories during inference.
\n", + "`stat_exog_list`: str list, static exogenous columns.
\n", + "`hist_exog_list`: str list, historic exogenous columns.
\n", + "`futr_exog_list`: str list, future exogenous columns.
\n", + "`exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", + "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`max_steps`: int=1000, maximum number of training steps.
\n", + "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", + "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", + "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", + "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", + "`batch_size`: int=32, number of different series in each batch.
\n", + "`valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", + "`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", + "`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", + "`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + "`step_size`: int=1, step size between each window of temporal data.
\n", + "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", + "`random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", + "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", + "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", + "`alias`: str, optional, Custom name of the model.
\n", + "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", + "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", + "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", + "\n", + "**References**
\n", + "- [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(DeepNPTS, title_level=3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "### DeepNPTS.fit\n", + "\n", + "> DeepNPTS.fit (dataset, val_size=0, test_size=0, random_seed=None,\n", + "> distributed_config=None)\n", + "\n", + "Fit.\n", + "\n", + "The `fit` method, optimizes the neural network's weights using the\n", + "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n", + "and the `loss` function as defined during the initialization.\n", + "Within `fit` we use a PyTorch Lightning `Trainer` that\n", + "inherits the initialization's `self.trainer_kwargs`, to customize\n", + "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n", + "\n", + "The method is designed to be compatible with SKLearn-like classes\n", + "and in particular to be compatible with the StatsForecast library.\n", + "\n", + "By default the `model` is not saving training checkpoints to protect\n", + "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`val_size`: int, validation size for temporal cross-validation.
\n", + "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", + "`test_size`: int, test size for temporal cross-validation.
" + ], + "text/plain": [ + "---\n", + "\n", + "### DeepNPTS.fit\n", + "\n", + "> DeepNPTS.fit (dataset, val_size=0, test_size=0, random_seed=None,\n", + "> distributed_config=None)\n", + "\n", + "Fit.\n", + "\n", + "The `fit` method, optimizes the neural network's weights using the\n", + "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n", + "and the `loss` function as defined during the initialization.\n", + "Within `fit` we use a PyTorch Lightning `Trainer` that\n", + "inherits the initialization's `self.trainer_kwargs`, to customize\n", + "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n", + "\n", + "The method is designed to be compatible with SKLearn-like classes\n", + "and in particular to be compatible with the StatsForecast library.\n", + "\n", + "By default the `model` is not saving training checkpoints to protect\n", + "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`val_size`: int, validation size for temporal cross-validation.
\n", + "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", + "`test_size`: int, test size for temporal cross-validation.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(DeepNPTS.fit, name='DeepNPTS.fit', title_level=3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "### DeepNPTS.predict\n", + "\n", + "> DeepNPTS.predict (dataset, test_size=None, step_size=1, random_seed=None,\n", + "> **data_module_kwargs)\n", + "\n", + "Predict.\n", + "\n", + "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`test_size`: int=None, test size for temporal cross-validation.
\n", + "`step_size`: int=1, Step size between each window.
\n", + "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", + "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)." + ], + "text/plain": [ + "---\n", + "\n", + "### DeepNPTS.predict\n", + "\n", + "> DeepNPTS.predict (dataset, test_size=None, step_size=1, random_seed=None,\n", + "> **data_module_kwargs)\n", + "\n", + "Predict.\n", + "\n", + "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`test_size`: int=None, test size for temporal cross-validation.
\n", + "`step_size`: int=1, Step size between each window.
\n", + "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", + "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(DeepNPTS.predict, name='DeepNPTS.predict', title_level=3)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage Example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from neuralforecast import NeuralForecast\n", + "from neuralforecast.losses.pytorch import MQLoss, DistributionLoss, GMM\n", + "from neuralforecast.tsdataset import TimeSeriesDataset\n", + "from neuralforecast.utils import AirPassengers, AirPassengersPanel, AirPassengersStatic" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Seed set to 1\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b74158f17d254e4884139ee5c48e5706", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | | 0/? [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#| eval: false\n", + "import pandas as pd\n", + "import pytorch_lightning as pl\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from neuralforecast import NeuralForecast\n", + "#from neuralforecast.models import DeepAR\n", + "from neuralforecast.losses.pytorch import DistributionLoss, HuberMQLoss\n", + "from neuralforecast.utils import AirPassengers, AirPassengersPanel, AirPassengersStatic\n", + "\n", + "#AirPassengersPanel['y'] = AirPassengersPanel['y'] + 10\n", + "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n", + "\n", + "nf = NeuralForecast(\n", + " models=[DeepNPTS(h=12,\n", + " input_size=12,\n", + " trajectory_samples=100,\n", + " loss=GMM(),\n", + " # learning_rate=1e-5,\n", + " n_layers = 2,\n", + " dropout=0.0,\n", + " stat_exog_list=['airline1'],\n", + " futr_exog_list=['trend'],\n", + " max_steps=1000,\n", + " val_check_steps=10,\n", + " early_stop_patience_steps=3,\n", + " scaler_type='robust',\n", + " enable_progress_bar=True),\n", + " ],\n", + " freq='M'\n", + ")\n", + "nf.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)\n", + "Y_hat_df = nf.predict(futr_df=Y_test_df)\n", + "\n", + "# Plot quantile predictions\n", + "Y_hat_df = Y_hat_df.reset_index(drop=False).drop(columns=['unique_id','ds'])\n", + "plot_df = pd.concat([Y_test_df, Y_hat_df], axis=1)\n", + "plot_df = pd.concat([Y_train_df, plot_df])\n", + "\n", + "plot_df = plot_df[plot_df.unique_id=='Airline1'].drop('unique_id', axis=1)\n", + "plt.plot(plot_df['ds'], plot_df['y'], c='black', label='True')\n", + "plt.plot(plot_df['ds'], plot_df['DeepNPTS'], c='red', label='mean')\n", + "plt.plot(plot_df['ds'], plot_df['DeepNPTS-median'], c='blue', label='median')\n", + "plt.fill_between(x=plot_df['ds'][-12:], \n", + " y1=plot_df['DeepNPTS-lo-90'][-12:].values, \n", + " y2=plot_df['DeepNPTS-hi-90'][-12:].values,\n", + " alpha=0.4, label='level 90')\n", + "plt.legend()\n", + "plt.grid()\n", + "plt.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/neuralforecast/_modidx.py b/neuralforecast/_modidx.py index 4bcbdabad..275d7598b 100644 --- a/neuralforecast/_modidx.py +++ b/neuralforecast/_modidx.py @@ -512,6 +512,24 @@ 'neuralforecast/models/deepar.py'), 'neuralforecast.models.deepar.DeepAR.validation_step': ( 'models.deepar.html#deepar.validation_step', 'neuralforecast/models/deepar.py')}, + 'neuralforecast.models.deepnpts': { 'neuralforecast.models.deepnpts.DeepNPTS': ( 'models.deepnpts.html#deepnpts', + 'neuralforecast/models/deepnpts.py'), + 'neuralforecast.models.deepnpts.DeepNPTS.__init__': ( 'models.deepnpts.html#deepnpts.__init__', + 'neuralforecast/models/deepnpts.py'), + 'neuralforecast.models.deepnpts.DeepNPTS._domain_map': ( 'models.deepnpts.html#deepnpts._domain_map', + 'neuralforecast/models/deepnpts.py'), + 'neuralforecast.models.deepnpts.DeepNPTS._init_weights': ( 'models.deepnpts.html#deepnpts._init_weights', + 'neuralforecast/models/deepnpts.py'), + 'neuralforecast.models.deepnpts.DeepNPTS.forward': ( 'models.deepnpts.html#deepnpts.forward', + 'neuralforecast/models/deepnpts.py'), + 'neuralforecast.models.deepnpts.DeepNPTS.predict_step': ( 'models.deepnpts.html#deepnpts.predict_step', + 'neuralforecast/models/deepnpts.py'), + 'neuralforecast.models.deepnpts.DeepNPTS.train_forward': ( 'models.deepnpts.html#deepnpts.train_forward', + 'neuralforecast/models/deepnpts.py'), + 'neuralforecast.models.deepnpts.DeepNPTS.training_step': ( 'models.deepnpts.html#deepnpts.training_step', + 'neuralforecast/models/deepnpts.py'), + 'neuralforecast.models.deepnpts.DeepNPTS.validation_step': ( 'models.deepnpts.html#deepnpts.validation_step', + 'neuralforecast/models/deepnpts.py')}, 'neuralforecast.models.dilated_rnn': { 'neuralforecast.models.dilated_rnn.AttentiveLSTMLayer': ( 'models.dilated_rnn.html#attentivelstmlayer', 'neuralforecast/models/dilated_rnn.py'), 'neuralforecast.models.dilated_rnn.AttentiveLSTMLayer.__init__': ( 'models.dilated_rnn.html#attentivelstmlayer.__init__', diff --git a/neuralforecast/common/_scalers.py b/neuralforecast/common/_scalers.py index 15ddb3bd4..bef76f7e9 100644 --- a/neuralforecast/common/_scalers.py +++ b/neuralforecast/common/_scalers.py @@ -313,8 +313,8 @@ def identity_statistics(x, mask, dim=-1, eps=1e-6): shape = list(x.shape) shape[dim] = 1 - x_shift = torch.zeros(shape) - x_scale = torch.ones(shape) + x_shift = torch.zeros(shape, device=x.device) + x_scale = torch.ones(shape, device=x.device) return x_shift, x_scale diff --git a/neuralforecast/core.py b/neuralforecast/core.py index b13338d4c..9919512c0 100644 --- a/neuralforecast/core.py +++ b/neuralforecast/core.py @@ -58,6 +58,7 @@ iTransformer, BiTCN, TiDE, + DeepNPTS, ) # %% ../nbs/core.ipynb 5 @@ -173,6 +174,8 @@ def _insample_times( "autobitcn": BiTCN, "tide": TiDE, "autotide": TiDE, + "deepnpts": DeepNPTS, + "autodeepnpts": DeepNPTS, } # %% ../nbs/core.ipynb 8 diff --git a/neuralforecast/losses/pytorch.py b/neuralforecast/losses/pytorch.py index d7f29c83b..2e5ede2f5 100644 --- a/neuralforecast/losses/pytorch.py +++ b/neuralforecast/losses/pytorch.py @@ -1166,17 +1166,20 @@ def __init__( # If True, predict_step will return Distribution's parameters self.return_params = return_params if self.return_params: - self.param_names = [f"-lambda-{i}" for i in range(1, n_components + 1)] + lambda_names = [f"-lambda-{i}" for i in range(1, n_components + 1)] + weight_names = [f"-weight-{i}" for i in range(1, n_components + 1)] + self.param_names = [i for j in zip(lambda_names, weight_names) for i in j] self.output_names = self.output_names + self.param_names # Add first output entry for the sample_mean self.output_names.insert(0, "") - self.outputsize_multiplier = n_components + self.outputsize_multiplier = 2 * n_components self.is_distribution_output = True def domain_map(self, output: torch.Tensor): - return (output,) # , weights + lambdas, weights = output.chunk(2, dim=-1) + return (lambdas, weights) def scale_decouple( self, @@ -1190,13 +1193,15 @@ def scale_decouple( variance and residual location based on anchoring `loc`, `scale`. Also adds domain protection to the distribution parameters. """ - lambdas = output[0] + lambdas, weights = output + weights = F.softmax(weights, dim=-1) + if (loc is not None) and (scale is not None): loc = loc.view(lambdas.size(dim=0), 1, -1) scale = scale.view(lambdas.size(dim=0), 1, -1) lambdas = (lambdas * scale) + loc lambdas = F.softplus(lambdas) - return (lambdas,) + return (lambdas, weights) def sample(self, distr_args, num_samples=None): """ @@ -1218,15 +1223,10 @@ def sample(self, distr_args, num_samples=None): if num_samples is None: num_samples = self.num_samples - lambdas = distr_args[0] + lambdas, weights = distr_args B, H, K = lambdas.size() Q = len(self.quantiles) - # Sample K ~ Mult(weights) - # shared across B, H - # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2) - weights = (1 / K) * torch.ones_like(lambdas, device=lambdas.device) - # Avoid loop, vectorize weights = weights.reshape(-1, K) lambdas = lambdas.flatten() @@ -1267,7 +1267,7 @@ def sample(self, distr_args, num_samples=None): def neglog_likelihood( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): if mask is None: @@ -1276,11 +1276,9 @@ def neglog_likelihood( mask = mask * ((y > 0) * 1) eps = 1e-10 - lambdas = distr_args[0] + lambdas, weights = distr_args B, H, K = lambdas.size() - weights = (1 / K) * torch.ones_like(lambdas, device=lambdas.device) - y = y[:, :, None] mask = mask[:, :, None] @@ -1307,7 +1305,7 @@ def neglog_likelihood( def __call__( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): @@ -1369,18 +1367,22 @@ def __init__( if self.return_params: mu_names = [f"-mu-{i}" for i in range(1, n_components + 1)] std_names = [f"-std-{i}" for i in range(1, n_components + 1)] - mu_std_names = [i for j in zip(mu_names, std_names) for i in j] - self.output_names = self.output_names + mu_std_names + weight_names = [f"-weight-{i}" for i in range(1, n_components + 1)] + self.param_names = [ + i for j in zip(mu_names, std_names, weight_names) for i in j + ] + self.output_names = self.output_names + self.param_names # Add first output entry for the sample_mean self.output_names.insert(0, "") - self.outputsize_multiplier = 2 * n_components + self.outputsize_multiplier = 3 * n_components self.is_distribution_output = True def domain_map(self, output: torch.Tensor): - means, stds = torch.tensor_split(output, 2, dim=-1) - return (means, stds) + means, stds, weights = output.chunk(3, dim=-1) + + return (means, stds, weights) def scale_decouple( self, @@ -1395,14 +1397,16 @@ def scale_decouple( variance and residual location based on anchoring `loc`, `scale`. Also adds domain protection to the distribution parameters. """ - means, stds = output + means, stds, weights = output stds = F.softplus(stds) + weights = F.softmax(weights, dim=-1) if (loc is not None) and (scale is not None): loc = loc.view(means.size(dim=0), 1, -1) scale = scale.view(means.size(dim=0), 1, -1) means = (means * scale) + loc stds = (stds + eps) * scale - return (means, stds) + + return (means, stds, weights) def sample(self, distr_args, num_samples=None): """ @@ -1424,17 +1428,11 @@ def sample(self, distr_args, num_samples=None): if num_samples is None: num_samples = self.num_samples - means, stds = distr_args + means, stds, weights = distr_args B, H, K = means.size() Q = len(self.quantiles) assert means.shape == stds.shape - # Sample K ~ Mult(weights) - # shared across B, H - # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2) - - weights = (1 / K) * torch.ones_like(means, device=means.device) - # Avoid loop, vectorize weights = weights.reshape(-1, K) means = means.flatten() @@ -1475,18 +1473,16 @@ def sample(self, distr_args, num_samples=None): def neglog_likelihood( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): if mask is None: mask = torch.ones_like(y) - means, stds = distr_args + means, stds, weights = distr_args B, H, K = means.size() - weights = (1 / K) * torch.ones_like(means, device=means.device) - y = y[:, :, None] mask = mask[:, :, None] @@ -1514,7 +1510,7 @@ def neglog_likelihood( def __call__( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): @@ -1572,25 +1568,29 @@ def __init__( f"-total_count-{i}" for i in range(1, n_components + 1) ] probs_names = [f"-probs-{i}" for i in range(1, n_components + 1)] - param_names = [i for j in zip(total_count_names, probs_names) for i in j] - self.output_names = self.output_names + param_names + weight_names = [f"-weight-{i}" for i in range(1, n_components + 1)] + self.param_names = [ + i for j in zip(total_count_names, probs_names, weight_names) for i in j + ] + self.output_names = self.output_names + self.param_names # Add first output entry for the sample_mean self.output_names.insert(0, "") - self.outputsize_multiplier = 2 * n_components + self.outputsize_multiplier = 3 * n_components self.is_distribution_output = True def domain_map(self, output: torch.Tensor): - mu, alpha = torch.tensor_split(output, 2, dim=-1) - return (mu, alpha) + mu, alpha, weights = output.chunk(3, dim=-1) + + return mu, alpha, weights def scale_decouple( self, output, loc: Optional[torch.Tensor] = None, scale: Optional[torch.Tensor] = None, - eps: float = 0.2, + eps: float = 1e-6, ): """Scale Decouple @@ -1599,9 +1599,10 @@ def scale_decouple( Also adds domain protection to the distribution parameters. """ # Efficient NBinomial parametrization - mu, alpha = output - mu = F.softplus(mu) + 1e-8 - alpha = F.softplus(alpha) + 1e-8 # alpha = 1/total_counts + mu, alpha, weights = output + mu = F.softplus(mu) + eps + alpha = F.softplus(alpha) + eps # alpha = 1/total_counts + weights = F.softmax(weights, dim=-1) if (loc is not None) and (scale is not None): loc = loc.view(mu.size(dim=0), 1, -1) mu *= loc @@ -1611,8 +1612,9 @@ def scale_decouple( # => probs = mu / (total_count + mu) # => probs = mu / [total_count * (1 + mu * (1/total_count))] total_count = 1.0 / alpha - probs = (mu * alpha / (1.0 + mu * alpha)) + 1e-8 - return (total_count, probs) + probs = mu * alpha / (1.0 + mu * alpha) + probs = torch.clamp(probs, eps, 1 - eps) + return (total_count, probs, weights) def sample(self, distr_args, num_samples=None): """ @@ -1634,17 +1636,11 @@ def sample(self, distr_args, num_samples=None): if num_samples is None: num_samples = self.num_samples - total_count, probs = distr_args + total_count, probs, weights = distr_args B, H, K = total_count.size() Q = len(self.quantiles) assert total_count.shape == probs.shape - # Sample K ~ Mult(weights) - # shared across B, H - # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2) - - weights = (1 / K) * torch.ones_like(probs, device=probs.device) - # Avoid loop, vectorize weights = weights.reshape(-1, K) total_count = total_count.flatten() @@ -1686,18 +1682,16 @@ def sample(self, distr_args, num_samples=None): def neglog_likelihood( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): if mask is None: mask = torch.ones_like(y) - total_count, probs = distr_args + total_count, probs, weights = distr_args B, H, K = total_count.size() - weights = (1 / K) * torch.ones_like(probs, device=probs.device) - y = y[:, :, None] mask = mask[:, :, None] @@ -1728,7 +1722,7 @@ def neglog_likelihood( def __call__( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): diff --git a/neuralforecast/models/__init__.py b/neuralforecast/models/__init__.py index fbca72d6e..ee07166ab 100644 --- a/neuralforecast/models/__init__.py +++ b/neuralforecast/models/__init__.py @@ -2,7 +2,7 @@ 'MLP', 'NHITS', 'NBEATS', 'NBEATSx', 'DLinear', 'NLinear', 'TFT', 'VanillaTransformer', 'Informer', 'Autoformer', 'PatchTST', 'FEDformer', 'StemGNN', 'HINT', 'TimesNet', 'TimeLLM', 'TSMixer', 'TSMixerx', 'MLPMultivariate', - 'iTransformer', 'BiTCN', 'TiDE', + 'iTransformer', 'BiTCN', 'TiDE', 'DeepNPTS' ] from .rnn import RNN @@ -33,4 +33,4 @@ from .itransformer import iTransformer from .bitcn import BiTCN from .tide import TiDE - +from .deepnpts import DeepNPTS \ No newline at end of file diff --git a/neuralforecast/models/deepnpts.py b/neuralforecast/models/deepnpts.py new file mode 100644 index 000000000..d4da85974 --- /dev/null +++ b/neuralforecast/models/deepnpts.py @@ -0,0 +1,557 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/models.deepnpts.ipynb. + +# %% auto 0 +__all__ = ['DeepNPTS'] + +# %% ../../nbs/models.deepnpts.ipynb 3 +import numpy as np + +import torch +import torch.nn as nn +import neuralforecast.losses.pytorch as losses +from typing import Optional +from functools import partial + + +from ..common._base_windows import BaseWindows +from ..losses.pytorch import MQLoss, GMM, PMM, NBMM + +# %% ../../nbs/models.deepnpts.ipynb 7 +class DeepNPTS(BaseWindows): + """DeepNPTS + + Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. + + **Parameters:**
+ `h`: int, Forecast horizon.
+ `input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
+ `hidden_size`: int=32, hidden size of dense layers.
+ `batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
+ `dropout`: float=0.1, dropout.
+ `n_layers`: int=2, number of dense layers.
+ `trajectory_samples`: int=100, number of Monte Carlo trajectories during inference.
+ `stat_exog_list`: str list, static exogenous columns.
+ `hist_exog_list`: str list, historic exogenous columns.
+ `futr_exog_list`: str list, future exogenous columns.
+ `exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
+ `loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
+ `valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
+ `max_steps`: int=1000, maximum number of training steps.
+ `learning_rate`: float=1e-3, Learning rate between (0, 1).
+ `num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
+ `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
+ `val_check_steps`: int=100, Number of training steps between every validation loss check.
+ `batch_size`: int=32, number of different series in each batch.
+ `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
+ `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
+ `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
+ `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
+ `step_size`: int=1, step size between each window of temporal data.
+ `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
+ `random_seed`: int, random_seed for pytorch initializer and numpy generators.
+ `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
+ `drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
+ `alias`: str, optional, Custom name of the model.
+ `optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
+ `optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
+ `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
+ + **References**
+ - [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). "Deep Non-Parametric Time Series Forecaster". arXiv.](https://arxiv.org/abs/2312.14657)
+ + """ + + # Class attributes + SAMPLING_TYPE = "windows" + + def __init__( + self, + h, + input_size: int = -1, + hidden_size: int = 32, + batch_norm: bool = True, + dropout: float = 0.1, + n_layers: int = 2, + trajectory_samples: int = 100, + futr_exog_list=None, + hist_exog_list=None, + stat_exog_list=None, + exclude_insample_y=False, + loss=GMM(), + valid_loss=MQLoss(level=[80, 90]), + max_steps: int = 1000, + learning_rate: float = 1e-5, + num_lr_decays: int = 3, + early_stop_patience_steps: int = -1, + val_check_steps: int = 100, + batch_size: int = 32, + valid_batch_size: Optional[int] = None, + windows_batch_size: int = 1024, + inference_windows_batch_size: int = -1, + start_padding_enabled=False, + step_size: int = 1, + scaler_type: str = "standard", + random_seed: int = 1, + num_workers_loader=0, + drop_last_loader=False, + optimizer=None, + optimizer_kwargs=None, + **trainer_kwargs + ): + + if hist_exog_list is not None: + raise Exception("DeepNPTS does not support historical exogenous variables.") + + if exclude_insample_y: + raise Exception("DeepNPTS has no possibility for excluding y.") + + supported_losses = (losses.GMM, losses.PMM, losses.NBMM) + + if not isinstance(loss, supported_losses): + raise Exception("DeepNPTS only supports GMM, PMM or NBMM as loss function.") + + if not isinstance(valid_loss, losses.MQLoss): + raise Exception("DeepNPTS only supports MQLoss as validation loss.") + + # Overwrite n_components, it has to be the input_size in DeepNPTS + loss.n_components = input_size + + # Inherit BaseWindows class + super(DeepNPTS, self).__init__( + h=h, + input_size=input_size, + futr_exog_list=futr_exog_list, + hist_exog_list=hist_exog_list, + stat_exog_list=stat_exog_list, + exclude_insample_y=exclude_insample_y, + loss=loss, + valid_loss=valid_loss, + max_steps=max_steps, + learning_rate=learning_rate, + num_lr_decays=num_lr_decays, + early_stop_patience_steps=early_stop_patience_steps, + val_check_steps=val_check_steps, + batch_size=batch_size, + windows_batch_size=windows_batch_size, + valid_batch_size=valid_batch_size, + inference_windows_batch_size=inference_windows_batch_size, + start_padding_enabled=start_padding_enabled, + step_size=step_size, + scaler_type=scaler_type, + num_workers_loader=num_workers_loader, + drop_last_loader=drop_last_loader, + random_seed=random_seed, + optimizer=optimizer, + optimizer_kwargs=optimizer_kwargs, + **trainer_kwargs + ) + + self.h = h + self.h_backup = self.h # Used because h=1 during training + self.use_softmax = True + self.hidden_size = hidden_size + self.dropout = dropout + self.trajectory_samples = trajectory_samples + + self.futr_exog_size = len(self.futr_exog_list) + self.stat_exog_size = len(self.stat_exog_list) + + input_dim = input_size * (1 + self.futr_exog_size) + self.stat_exog_size + # Create DeepNPTSNetwork + modules = [] + for i in range(n_layers): + modules.append(nn.Linear(input_dim if i == 0 else hidden_size, hidden_size)) + modules.append(nn.ReLU()) + if batch_norm: + modules.append(nn.BatchNorm1d(hidden_size)) + if dropout > 0.0: + modules.append(nn.Dropout(dropout)) + + self.deepnptsnetwork = nn.Sequential(*modules) + self.deepnptsnetwork.apply(partial(self._init_weights, scale=0.07)) + + # Add output layers for Mixture distribution + output_modules = [] + if dropout > 0.0: + output_modules.append(nn.Dropout(self.dropout)) + + if isinstance(loss, GMM): + output_modules.append(nn.Linear(hidden_size, input_size + 1)) + elif isinstance(loss, PMM): + output_modules.append(nn.Linear(hidden_size, input_size)) + elif isinstance(loss, NBMM): + output_modules.append(nn.Linear(hidden_size, input_size)) + + self.output_layer = nn.Sequential(*output_modules) + self.output_layer.apply(self._init_weights) + + @staticmethod + def _init_weights(module, scale=1.0): + if type(module) == nn.Linear: + nn.init.uniform_(module.weight, -scale, scale) + nn.init.zeros_(module.bias) + + def _domain_map(self, o_t, insample_y): + if isinstance(self.loss, GMM): + weights = o_t[:, :-1] # [B, L + 1] -> [B, L] + kernel_width = o_t[:, -1:] # [B, L + 1] -> [B, 1] + kernel_width = torch.repeat_interleave( + input=kernel_width, repeats=weights.shape[1], dim=-1 + ) # [B, 1] -> [B, L] + output = torch.cat( + [insample_y, kernel_width, weights], dim=-1 + ) # [B, L] + [B, L] + [B, L] = [B, 3 * L] + output = output.unsqueeze(1) # [B, 3 * L] = [B, 1, 3 * L] + elif isinstance(self.loss, PMM): + weights = o_t # [B, L] -> [B, L] + output = torch.cat( + [insample_y, weights], dim=-1 + ) # [B, L] + [B, L] = [B, 2 * L] + output = output.unsqueeze(1) # [B, 2 * L] = [B, 1, 2 * L] + elif isinstance(self.loss, NBMM): + weights = torch.ones_like(o_t) # [B, L] -> [B, L] + output = torch.cat( + [insample_y, o_t, weights], dim=-1 + ) # [B, L] + [B, L] + [B, L] = [B, 3 * L] + output = output.unsqueeze(1) # [B, 3 * L] = [B, 1, 3 * + + else: + raise NotImplementedError + + return output + + # Override BaseWindows method + def training_step(self, batch, batch_idx): + + # Only train one-step ahead + self.h = 1 + self.quantiles = self.loss.quantiles + + # Create and normalize windows [Ws, L+H, C] + y_idx = batch["y_idx"] + windows = self._create_windows(batch, step="train") + original_outsample_y = torch.clone(windows["temporal"][:, -self.h :, y_idx]) + windows = self._normalization(windows=windows, y_idx=y_idx) + + # Parse windows + ( + insample_y, + insample_mask, + outsample_y, + outsample_mask, + _, + futr_exog, + stat_exog, + ) = self._parse_windows(batch, windows) + + windows_batch = dict( + insample_y=insample_y, # [Ws, L] + insample_mask=insample_mask, # [Ws, L] + futr_exog=futr_exog, # [Ws, L+H] + hist_exog=None, + stat_exog=stat_exog, # [Ws, 1] + y_idx=y_idx, # [Ws, 1] + ) + + # Model Predictions + output = self.train_forward(windows_batch) + + _, y_loc, y_scale = self._inv_normalization( + y_hat=outsample_y, temporal_cols=batch["temporal_cols"], y_idx=y_idx + ) + # outsample_y = original_insample_y + outsample_y = original_outsample_y + distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale) + loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask) + + if torch.isnan(loss): + print("Model Parameters", self.hparams) + print("insample_y", torch.isnan(insample_y).sum()) + print("outsample_y", torch.isnan(outsample_y).sum()) + print("output", torch.isnan(output).sum()) + raise Exception("Loss is NaN, training stopped.") + + self.log("train_loss", loss, prog_bar=True, on_epoch=True) + self.train_trajectories.append((self.global_step, float(loss))) + + self.h = self.h_backup + + return loss + + # Override BaseWindows method + def validation_step(self, batch, batch_idx): + + self.h = self.h_backup + self.quantiles = self.valid_loss.quantiles + + if self.val_size == 0: + return np.nan + + # TODO: Hack to compute number of windows + windows = self._create_windows(batch, step="val") + n_windows = len(windows["temporal"]) + y_idx = batch["y_idx"] + + # Number of windows in batch + windows_batch_size = self.inference_windows_batch_size + if windows_batch_size < 0: + windows_batch_size = n_windows + n_batches = int(np.ceil(n_windows / windows_batch_size)) + + valid_losses = [] + batch_sizes = [] + for i in range(n_batches): + # Create and normalize windows [Ws, L+H, C] + w_idxs = np.arange( + i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows) + ) + windows = self._create_windows(batch, step="val", w_idxs=w_idxs) + original_outsample_y = torch.clone(windows["temporal"][:, -self.h :, 0]) + windows = self._normalization(windows=windows, y_idx=y_idx) + + # Parse windows + ( + insample_y, + insample_mask, + _, + outsample_mask, + _, + futr_exog, + stat_exog, + ) = self._parse_windows(batch, windows) + + windows_batch = dict( + insample_y=insample_y, # [Ws, L] + insample_mask=insample_mask, # [Ws, L] + futr_exog=futr_exog, # [Ws, L+H] + hist_exog=None, # [Ws, L] + stat_exog=stat_exog, + y_idx=y_idx, + ) # [Ws, 1] + + # Model Predictions + output_batch = self(windows_batch) + # Monte Carlo already returns y_hat with mean and quantiles + output_batch = output_batch[:, :, 1:] # Remove mean + valid_loss_batch = self.valid_loss( + y=original_outsample_y, y_hat=output_batch, mask=outsample_mask + ) + valid_losses.append(valid_loss_batch) + batch_sizes.append(len(output_batch)) + + valid_loss = torch.stack(valid_losses) + batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device) + valid_loss = torch.sum(valid_loss * batch_sizes) / torch.sum(batch_sizes) + + if torch.isnan(valid_loss): + raise Exception("Loss is NaN, training stopped.") + + self.log("valid_loss", valid_loss, prog_bar=True, on_epoch=True) + self.validation_step_outputs.append(valid_loss) + return valid_loss + + # Override BaseWindows method + def predict_step(self, batch, batch_idx): + + self.h == self.h_backup + self.quantiles = self.loss.quantiles + + # TODO: Hack to compute number of windows + windows = self._create_windows(batch, step="predict") + n_windows = len(windows["temporal"]) + y_idx = batch["y_idx"] + + # Number of windows in batch + windows_batch_size = self.inference_windows_batch_size + if windows_batch_size < 0: + windows_batch_size = n_windows + n_batches = int(np.ceil(n_windows / windows_batch_size)) + + y_hats = [] + for i in range(n_batches): + # Create and normalize windows [Ws, L+H, C] + w_idxs = np.arange( + i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows) + ) + windows = self._create_windows(batch, step="predict", w_idxs=w_idxs) + windows = self._normalization(windows=windows, y_idx=y_idx) + + # Parse windows + insample_y, insample_mask, _, _, _, futr_exog, stat_exog = ( + self._parse_windows(batch, windows) + ) + windows_batch = dict( + insample_y=insample_y, # [Ws, L] + insample_mask=insample_mask, # [Ws, L] + futr_exog=futr_exog, # [Ws, L+H] + stat_exog=stat_exog, + y_idx=y_idx, + ) + + # Model Predictions + y_hat = self(windows_batch) + # Monte Carlo already returns y_hat with mean and quantiles + y_hats.append(y_hat) + y_hat = torch.cat(y_hats, dim=0) + return y_hat + + def train_forward(self, windows_batch): + # Parse windows_batch + x_t = windows_batch["insample_y"].unsqueeze(-1) # [B, L, 1] + futr_exog = windows_batch["futr_exog"] # [B, L + h, F] + stat_exog = windows_batch["stat_exog"] # [B, S] + + batch_size, seq_len = x_t.shape[:2] # B = batch_size, L = seq_len + + # Concatenate x_t with future exogenous + if self.futr_exog_size > 0: + futr_exog_t = futr_exog[:, :seq_len] # [B, L + h, F] -> [B, L, F] + x_t = torch.cat( + (x_t, futr_exog_t), dim=2 + ) # [B, L, 1] + [B, L, F] -> [B, L, 1 + F] + + x_t = x_t.reshape(batch_size, -1) # [B, L, 1 + F] -> [B, L * (1 + F)] + + # Concatenate x_t with static exogenous + if self.stat_exog_size > 0: + x_t = torch.cat( + (x_t, stat_exog), dim=1 + ) # [B, L * (1 + F)] + [B, S] -> [B, L * (1 + F) + S] + + # Run through DeepNPTSNetwork + h_t = self.deepnptsnetwork(x_t) # [B, L * (1 + F) + S] -> [B, hidden_size] + o_t = self.output_layer(h_t) # [B, hidden_size] -> [B, L + 1] + + output = self._domain_map( + o_t, windows_batch["insample_y"] + ) # [B, L + 1], [B, L] -> [B, 3 * L] + output = self.loss.domain_map( + output + ) # [B, 3 * L] -> ([B, L], [B, L], [B, L]) + + return output + + def forward(self, windows_batch): + # Parse windows_batch + insample_y_t = windows_batch["insample_y"].unsqueeze(-1) # [B, L, 1] + futr_exog = windows_batch["futr_exog"] # [B, L + h, F] + stat_exog = windows_batch["stat_exog"] # [B, S] + y_idx = windows_batch["y_idx"] + + batch_size, seq_len = insample_y_t.shape[:2] # B = batch_size, L = seq_len + device = insample_y_t.device + dtype = insample_y_t.dtype + + # Repeat insample_y for trajectory samples + insample_y_t = torch.repeat_interleave( + input=insample_y_t, repeats=self.trajectory_samples, dim=0 + ) # [B, L, 1] -> [B * n_samples, L, 1] + + # Input x_t is insample_y at time t + x_t = insample_y_t + + # Repeat futr_exog if available for trajectory samples and add to x_t + if self.futr_exog_size > 0: + futr_exog = torch.repeat_interleave( + input=futr_exog, repeats=self.trajectory_samples, dim=0 + ) # [B, L + h, F] -> [B * n_samples, L + h, F] + x_t = torch.cat( + (x_t, futr_exog[:, :seq_len]), dim=2 + ) # [B * n_samples, L, 1] + [B * n_samples, L, F] -> [B * n_samples, L, 1 + F] + + x_t = x_t.reshape( + batch_size * self.trajectory_samples, -1 + ) # [B * n_samples, L, 1 + F] -> [B * n_samples, L * (1 + F)] + + # Repeat stat_exog if available for trajectory samples and add to x_t + if self.stat_exog_size > 0: + stat_exog = torch.repeat_interleave( + input=stat_exog, repeats=self.trajectory_samples, dim=0 + ) # [B, S] -> [B * n_samples, S] + x_t = torch.cat( + (x_t, stat_exog), dim=1 + ) # [B * n_samples, L * (1 + F)] + [B * n_samples, S] -> [B * n_samples, L * (1 + F) + S] + + # Scales for inverse normalization + y_scale = self.scaler.x_scale[:, :, y_idx] + y_loc = self.scaler.x_shift[:, :, y_idx] + y_scale = torch.repeat_interleave( + input=y_scale, repeats=self.trajectory_samples, dim=0 + ) + y_loc = torch.repeat_interleave( + input=y_loc, repeats=self.trajectory_samples, dim=0 + ) + # Create forecasts tensor + forecasts = torch.zeros( + (batch_size, self.h, len(self.quantiles) + 1), device=device, dtype=dtype + ) + + # Recursive predictions + for t in range(self.h): + # Run input throught DeepNPTSNetwork + h_t = self.deepnptsnetwork( + x_t + ) # [B * n_samples, L * (1 + F) + S] -> [B, hidden_size] + o_t = self.output_layer( + h_t + ) # [B * n_samples, hidden_size] -> [B * n_samples, L (+ 1)] + output = self._domain_map( + o_t, insample_y_t.squeeze(-1) + ) # [B * n_samples, L + 1], [B * n_samples, L] -> [B * n_samples, 3 * L] + output = self.loss.domain_map( + output + ) # [B * n_samples, 3 * L] -> ([B * n_samples, L], [B * n_samples, L], [B * n_samples, L]) + + # Inverse normalization + distr_args = self.loss.scale_decouple( + output=output, loc=y_loc, scale=y_scale + ) + + # Sample and create probabilistic outputs + samples_t_flat, _, _ = self.loss.sample( + distr_args=distr_args, num_samples=1 + ) + + samples_t_flat = samples_t_flat.squeeze() + samples_t = samples_t_flat.reshape( + batch_size, self.trajectory_samples + ) # [B * n_samples] -> [B, n_samples] + + samples_t_mean = torch.mean(samples_t, dim=-1) # [B, n_samples] -> [B] + quantiles_t = torch.quantile( + input=samples_t, q=self.quantiles, dim=-1 + ) # [B, n_samples] -> [Q, B] + forecasts[:, t, 0] = samples_t_mean + forecasts[:, t, 1:] = quantiles_t.permute(1, 0) + + insample_y_t_next = self.scaler.scaler( + samples_t_flat, y_loc.squeeze(), y_scale.squeeze() + ) # [B * n_samples] -> [B * n_samples] + insample_y_t_next = insample_y_t_next.unsqueeze(-1).unsqueeze( + -1 + ) # [B * n_samples] -> [B * n_samples, 1, 1] + + # Update insample_y_t + insample_y_t = torch.cat( + [insample_y_t[:, 1:], insample_y_t_next], dim=1 + ) # [B * n_samples, L - 1, 1] + [B * n_samples, 1, 1] -> [B * n_samples, L, 1] + + # Update input + x_t = insample_y_t + # Concatenate x_t with future exogenous + if self.futr_exog_size > 0: + x_t = torch.cat( + (x_t, futr_exog[:, t : seq_len + t]), dim=2 + ) # [B * n_samples, L, 1] + [B * n_samples, L, F] -> [B * n_samples, L, 1 + F] + + x_t = x_t.reshape( + batch_size * self.trajectory_samples, -1 + ) # [B * n_samples, L, 1 + F] -> [B * n_samples, L * (1 + F)] + + # Concatenate x_t with static exogenous + if self.stat_exog_size > 0: + x_t = torch.cat( + (x_t, stat_exog), dim=1 + ) # [B * n_samples, L * (1 + F)] + [B * n_samples, S] -> [B * n_samples, L * (1 + F) + S] + + return forecasts From 54b2f0ae801f73c72acabbb7f2c3a4702f580a66 Mon Sep 17 00:00:00 2001 From: Olivier Sprangers Date: Mon, 22 Apr 2024 23:01:57 +0200 Subject: [PATCH 05/11] deepnpts_simple --- nbs/losses.pytorch.ipynb | 1714 ++--------------------------- nbs/models.deepnpts.ipynb | 869 +-------------- neuralforecast/_modidx.py | 14 +- neuralforecast/losses/pytorch.py | 118 +- neuralforecast/models/__init__.py | 2 +- neuralforecast/models/deepnpts.py | 454 +------- 6 files changed, 292 insertions(+), 2879 deletions(-) diff --git a/nbs/losses.pytorch.ipynb b/nbs/losses.pytorch.ipynb index 36adfaabd..387da910d 100644 --- a/nbs/losses.pytorch.ipynb +++ b/nbs/losses.pytorch.ipynb @@ -67,7 +67,7 @@ " Normal, \n", " StudentT, \n", " Poisson,\n", - " NegativeBinomial\n", + " NegativeBinomial,\n", ")\n", "\n", "from torch.distributions import constraints" @@ -244,61 +244,7 @@ "execution_count": null, "id": "1d004cd0", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L85){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MAE.__init__\n", - "\n", - "> MAE.__init__ (horizon_weight=None)\n", - "\n", - "Mean Absolute Error\n", - "\n", - "Calculates Mean Absolute Error between\n", - "`y` and `y_hat`. MAE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the\n", - "deviation of the prediction and the true\n", - "value at a given time and averages these devations\n", - "over the length of the series.\n", - "\n", - "$$ \\mathrm{MAE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} |y_{\\tau} - \\hat{y}_{\\tau}| $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L85){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MAE.__init__\n", - "\n", - "> MAE.__init__ (horizon_weight=None)\n", - "\n", - "Mean Absolute Error\n", - "\n", - "Calculates Mean Absolute Error between\n", - "`y` and `y_hat`. MAE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the\n", - "deviation of the prediction and the true\n", - "value at a given time and averages these devations\n", - "over the length of the series.\n", - "\n", - "$$ \\mathrm{MAE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} |y_{\\tau} - \\hat{y}_{\\tau}| $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MAE, name='MAE.__init__', title_level=3)" ] @@ -308,51 +254,7 @@ "execution_count": null, "id": "0a20a273", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L106){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MAE.__call__\n", - "\n", - "> MAE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies datapoints to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mae`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L106){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MAE.__call__\n", - "\n", - "> MAE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies datapoints to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mae`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MAE.__call__, name='MAE.__call__', title_level=3)" ] @@ -426,61 +328,7 @@ "execution_count": null, "id": "e8c65b82", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L126){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MSE.__init__\n", - "\n", - "> MSE.__init__ (horizon_weight=None)\n", - "\n", - "Mean Squared Error\n", - "\n", - "Calculates Mean Squared Error between\n", - "`y` and `y_hat`. MSE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the \n", - "squared deviation of the prediction and the true\n", - "value at a given time, and averages these devations\n", - "over the length of the series.\n", - "\n", - "$$ \\mathrm{MSE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^{2} $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L126){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MSE.__init__\n", - "\n", - "> MSE.__init__ (horizon_weight=None)\n", - "\n", - "Mean Squared Error\n", - "\n", - "Calculates Mean Squared Error between\n", - "`y` and `y_hat`. MSE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the \n", - "squared deviation of the prediction and the true\n", - "value at a given time, and averages these devations\n", - "over the length of the series.\n", - "\n", - "$$ \\mathrm{MSE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^{2} $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MSE, name='MSE.__init__', title_level=3)" ] @@ -490,51 +338,7 @@ "execution_count": null, "id": "b0126a7f", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L147){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MSE.__call__\n", - "\n", - "> MSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies datapoints to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mse`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L147){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MSE.__call__\n", - "\n", - "> MSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies datapoints to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mse`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MSE.__call__, name='MSE.__call__', title_level=3)" ] @@ -612,67 +416,7 @@ "execution_count": null, "id": "d961d383", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L167){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### RMSE.__init__\n", - "\n", - "> RMSE.__init__ (horizon_weight=None)\n", - "\n", - "Root Mean Squared Error\n", - "\n", - "Calculates Root Mean Squared Error between\n", - "`y` and `y_hat`. RMSE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the squared deviation\n", - "of the prediction and the observed value at a given time and\n", - "averages these devations over the length of the series.\n", - "Finally the RMSE will be in the same scale\n", - "as the original time series so its comparison with other\n", - "series is possible only if they share a common scale. \n", - "RMSE has a direct connection to the L2 norm.\n", - "\n", - "$$ \\mathrm{RMSE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\sqrt{\\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^{2}} $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L167){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### RMSE.__init__\n", - "\n", - "> RMSE.__init__ (horizon_weight=None)\n", - "\n", - "Root Mean Squared Error\n", - "\n", - "Calculates Root Mean Squared Error between\n", - "`y` and `y_hat`. RMSE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the squared deviation\n", - "of the prediction and the observed value at a given time and\n", - "averages these devations over the length of the series.\n", - "Finally the RMSE will be in the same scale\n", - "as the original time series so its comparison with other\n", - "series is possible only if they share a common scale. \n", - "RMSE has a direct connection to the L2 norm.\n", - "\n", - "$$ \\mathrm{RMSE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\sqrt{\\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^{2}} $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(RMSE, name='RMSE.__init__', title_level=3)" ] @@ -682,51 +426,7 @@ "execution_count": null, "id": "d398d3e3", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L191){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### RMSE.__call__\n", - "\n", - "> RMSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies datapoints to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`rmse`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L191){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### RMSE.__call__\n", - "\n", - "> RMSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies datapoints to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`rmse`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(RMSE.__call__, name='RMSE.__call__', title_level=3)" ] @@ -817,69 +517,7 @@ "execution_count": null, "id": "174e8042", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L212){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MAPE.__init__\n", - "\n", - "> MAPE.__init__ (horizon_weight=None)\n", - "\n", - "Mean Absolute Percentage Error\n", - "\n", - "Calculates Mean Absolute Percentage Error between\n", - "`y` and `y_hat`. MAPE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the percentual deviation\n", - "of the prediction and the observed value at a given time and\n", - "averages these devations over the length of the series.\n", - "The closer to zero an observed value is, the higher penalty MAPE loss\n", - "assigns to the corresponding error.\n", - "\n", - "$$ \\mathrm{MAPE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{|y_{\\tau}|} $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Makridakis S., \"Accuracy measures: theoretical and practical concerns\".](https://www.sciencedirect.com/science/article/pii/0169207093900793)" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L212){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MAPE.__init__\n", - "\n", - "> MAPE.__init__ (horizon_weight=None)\n", - "\n", - "Mean Absolute Percentage Error\n", - "\n", - "Calculates Mean Absolute Percentage Error between\n", - "`y` and `y_hat`. MAPE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the percentual deviation\n", - "of the prediction and the observed value at a given time and\n", - "averages these devations over the length of the series.\n", - "The closer to zero an observed value is, the higher penalty MAPE loss\n", - "assigns to the corresponding error.\n", - "\n", - "$$ \\mathrm{MAPE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{|y_{\\tau}|} $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Makridakis S., \"Accuracy measures: theoretical and practical concerns\".](https://www.sciencedirect.com/science/article/pii/0169207093900793)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MAPE, name='MAPE.__init__', title_level=3)" ] @@ -889,51 +527,7 @@ "execution_count": null, "id": "da63f136", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L237){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MAPE.__call__\n", - "\n", - "> MAPE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mape`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L237){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MAPE.__call__\n", - "\n", - "> MAPE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mape`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MAPE.__call__, name='MAPE.__call__', title_level=3)" ] @@ -1015,73 +609,7 @@ "execution_count": null, "id": "dee99fb8", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L259){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### SMAPE.__init__\n", - "\n", - "> SMAPE.__init__ (horizon_weight=None)\n", - "\n", - "Symmetric Mean Absolute Percentage Error\n", - "\n", - "Calculates Symmetric Mean Absolute Percentage Error between\n", - "`y` and `y_hat`. SMAPE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the relative deviation\n", - "of the prediction and the observed value scaled by the sum of the\n", - "absolute values for the prediction and observed value at a\n", - "given time, then averages these devations over the length\n", - "of the series. This allows the SMAPE to have bounds between\n", - "0% and 200% which is desireble compared to normal MAPE that\n", - "may be undetermined when the target is zero.\n", - "\n", - "$$ \\mathrm{sMAPE}_{2}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{|y_{\\tau}|+|\\hat{y}_{\\tau}|} $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Makridakis S., \"Accuracy measures: theoretical and practical concerns\".](https://www.sciencedirect.com/science/article/pii/0169207093900793)" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L259){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### SMAPE.__init__\n", - "\n", - "> SMAPE.__init__ (horizon_weight=None)\n", - "\n", - "Symmetric Mean Absolute Percentage Error\n", - "\n", - "Calculates Symmetric Mean Absolute Percentage Error between\n", - "`y` and `y_hat`. SMAPE measures the relative prediction\n", - "accuracy of a forecasting method by calculating the relative deviation\n", - "of the prediction and the observed value scaled by the sum of the\n", - "absolute values for the prediction and observed value at a\n", - "given time, then averages these devations over the length\n", - "of the series. This allows the SMAPE to have bounds between\n", - "0% and 200% which is desireble compared to normal MAPE that\n", - "may be undetermined when the target is zero.\n", - "\n", - "$$ \\mathrm{sMAPE}_{2}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{|y_{\\tau}|+|\\hat{y}_{\\tau}|} $$\n", - "\n", - "**Parameters:**
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Makridakis S., \"Accuracy measures: theoretical and practical concerns\".](https://www.sciencedirect.com/science/article/pii/0169207093900793)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(SMAPE, name='SMAPE.__init__', title_level=3)" ] @@ -1091,51 +619,7 @@ "execution_count": null, "id": "db62a845", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L286){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### SMAPE.__call__\n", - "\n", - "> SMAPE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`smape`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L286){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### SMAPE.__call__\n", - "\n", - "> SMAPE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`smape`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(SMAPE.__call__, name='SMAPE.__call__', title_level=3)" ] @@ -1222,71 +706,7 @@ "execution_count": null, "id": "b6a4cf21", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L308){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MASE.__init__\n", - "\n", - "> MASE.__init__ (seasonality:int, horizon_weight=None)\n", - "\n", - "Mean Absolute Scaled Error \n", - "Calculates the Mean Absolute Scaled Error between\n", - "`y` and `y_hat`. MASE measures the relative prediction\n", - "accuracy of a forecasting method by comparinng the mean absolute errors\n", - "of the prediction and the observed value against the mean\n", - "absolute errors of the seasonal naive model.\n", - "The MASE partially composed the Overall Weighted Average (OWA), \n", - "used in the M4 Competition.\n", - "\n", - "$$ \\mathrm{MASE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}, \\mathbf{\\hat{y}}^{season}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{\\mathrm{MAE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{season}_{\\tau})} $$\n", - "\n", - "**Parameters:**
\n", - "`seasonality`: int. Main frequency of the time series; Hourly 24, Daily 7, Weekly 52, Monthly 12, Quarterly 4, Yearly 1.\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Rob J. Hyndman, & Koehler, A. B. \"Another look at measures of forecast accuracy\".](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
\n", - "[Spyros Makridakis, Evangelos Spiliotis, Vassilios Assimakopoulos, \"The M4 Competition: 100,000 time series and 61 forecasting methods\".](https://www.sciencedirect.com/science/article/pii/S0169207019301128)" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L308){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MASE.__init__\n", - "\n", - "> MASE.__init__ (seasonality:int, horizon_weight=None)\n", - "\n", - "Mean Absolute Scaled Error \n", - "Calculates the Mean Absolute Scaled Error between\n", - "`y` and `y_hat`. MASE measures the relative prediction\n", - "accuracy of a forecasting method by comparinng the mean absolute errors\n", - "of the prediction and the observed value against the mean\n", - "absolute errors of the seasonal naive model.\n", - "The MASE partially composed the Overall Weighted Average (OWA), \n", - "used in the M4 Competition.\n", - "\n", - "$$ \\mathrm{MASE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}_{\\tau}, \\mathbf{\\hat{y}}^{season}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\frac{|y_{\\tau}-\\hat{y}_{\\tau}|}{\\mathrm{MAE}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{season}_{\\tau})} $$\n", - "\n", - "**Parameters:**
\n", - "`seasonality`: int. Main frequency of the time series; Hourly 24, Daily 7, Weekly 52, Monthly 12, Quarterly 4, Yearly 1.\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Rob J. Hyndman, & Koehler, A. B. \"Another look at measures of forecast accuracy\".](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
\n", - "[Spyros Makridakis, Evangelos Spiliotis, Vassilios Assimakopoulos, \"The M4 Competition: 100,000 time series and 61 forecasting methods\".](https://www.sciencedirect.com/science/article/pii/S0169207019301128)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MASE, name='MASE.__init__', title_level=3)" ] @@ -1296,53 +716,7 @@ "execution_count": null, "id": "32a2c11b", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L335){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MASE.__call__\n", - "\n", - "> MASE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> y_insample:torch.Tensor, mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor (batch_size, output_size), Actual values.
\n", - "`y_hat`: tensor (batch_size, output_size)), Predicted values.
\n", - "`y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mase`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L335){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MASE.__call__\n", - "\n", - "> MASE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> y_insample:torch.Tensor, mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor (batch_size, output_size), Actual values.
\n", - "`y_hat`: tensor (batch_size, output_size)), Predicted values.
\n", - "`y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mase`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MASE.__call__, name='MASE.__call__', title_level=3)" ] @@ -1429,69 +803,7 @@ "execution_count": null, "id": "edeb6f9a", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L364){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### relMSE.__init__\n", - "\n", - "> relMSE.__init__ (y_train, horizon_weight=None)\n", - "\n", - "Relative Mean Squared Error\n", - "Computes Relative Mean Squared Error (relMSE), as proposed by Hyndman & Koehler (2006)\n", - "as an alternative to percentage errors, to avoid measure unstability.\n", - "$$ \\mathrm{relMSE}(\\mathbf{y}, \\mathbf{\\hat{y}}, \\mathbf{\\hat{y}}^{naive1}) =\n", - "\\frac{\\mathrm{MSE}(\\mathbf{y}, \\mathbf{\\hat{y}})}{\\mathrm{MSE}(\\mathbf{y}, \\mathbf{\\hat{y}}^{naive1})} $$\n", - "\n", - "**Parameters:**
\n", - "`y_train`: numpy array, Training values.
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "- [Hyndman, R. J and Koehler, A. B. (2006).\n", - " \"Another look at measures of forecast accuracy\",\n", - " International Journal of Forecasting, Volume 22, Issue 4.](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
\n", - "- [Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", - " \"Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. \n", - " Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L364){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### relMSE.__init__\n", - "\n", - "> relMSE.__init__ (y_train, horizon_weight=None)\n", - "\n", - "Relative Mean Squared Error\n", - "Computes Relative Mean Squared Error (relMSE), as proposed by Hyndman & Koehler (2006)\n", - "as an alternative to percentage errors, to avoid measure unstability.\n", - "$$ \\mathrm{relMSE}(\\mathbf{y}, \\mathbf{\\hat{y}}, \\mathbf{\\hat{y}}^{naive1}) =\n", - "\\frac{\\mathrm{MSE}(\\mathbf{y}, \\mathbf{\\hat{y}})}{\\mathrm{MSE}(\\mathbf{y}, \\mathbf{\\hat{y}}^{naive1})} $$\n", - "\n", - "**Parameters:**
\n", - "`y_train`: numpy array, Training values.
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "- [Hyndman, R. J and Koehler, A. B. (2006).\n", - " \"Another look at measures of forecast accuracy\",\n", - " International Journal of Forecasting, Volume 22, Issue 4.](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
\n", - "- [Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", - " \"Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. \n", - " Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(relMSE, name='relMSE.__init__', title_level=3)" ] @@ -1501,53 +813,7 @@ "execution_count": null, "id": "a317b5c5", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L391){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### relMSE.__call__\n", - "\n", - "> relMSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor (batch_size, output_size), Actual values.
\n", - "`y_hat`: tensor (batch_size, output_size)), Predicted values.
\n", - "`y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`relMSE`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L391){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### relMSE.__call__\n", - "\n", - "> relMSE.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor (batch_size, output_size), Actual values.
\n", - "`y_hat`: tensor (batch_size, output_size)), Predicted values.
\n", - "`y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`relMSE`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(relMSE.__call__, name='relMSE.__call__', title_level=3)" ] @@ -1632,67 +898,7 @@ "execution_count": null, "id": "70bd46d9", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L418){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### QuantileLoss.__init__\n", - "\n", - "> QuantileLoss.__init__ (q, horizon_weight=None)\n", - "\n", - "Quantile Loss\n", - "\n", - "Computes the quantile loss between `y` and `y_hat`.\n", - "QL measures the deviation of a quantile forecast.\n", - "By weighting the absolute deviation in a non symmetric way, the\n", - "loss pays more attention to under or over estimation.\n", - "A common value for q is 0.5 for the deviation from the median (Pinball loss).\n", - "\n", - "$$ \\mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{(q)}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\Big( (1-q)\\,( \\hat{y}^{(q)}_{\\tau} - y_{\\tau} )_{+} + q\\,( y_{\\tau} - \\hat{y}^{(q)}_{\\tau} )_{+} \\Big) $$\n", - "\n", - "**Parameters:**
\n", - "`q`: float, between 0 and 1. The slope of the quantile loss, in the context of quantile regression, the q determines the conditional quantile level.
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L418){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### QuantileLoss.__init__\n", - "\n", - "> QuantileLoss.__init__ (q, horizon_weight=None)\n", - "\n", - "Quantile Loss\n", - "\n", - "Computes the quantile loss between `y` and `y_hat`.\n", - "QL measures the deviation of a quantile forecast.\n", - "By weighting the absolute deviation in a non symmetric way, the\n", - "loss pays more attention to under or over estimation.\n", - "A common value for q is 0.5 for the deviation from the median (Pinball loss).\n", - "\n", - "$$ \\mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{(q)}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} \\Big( (1-q)\\,( \\hat{y}^{(q)}_{\\tau} - y_{\\tau} )_{+} + q\\,( y_{\\tau} - \\hat{y}^{(q)}_{\\tau} )_{+} \\Big) $$\n", - "\n", - "**Parameters:**
\n", - "`q`: float, between 0 and 1. The slope of the quantile loss, in the context of quantile regression, the q determines the conditional quantile level.
\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(QuantileLoss, name='QuantileLoss.__init__', title_level=3)" ] @@ -1702,51 +908,7 @@ "execution_count": null, "id": "0b1588e9", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L445){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### QuantileLoss.__call__\n", - "\n", - "> QuantileLoss.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies datapoints to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`quantile_loss`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L445){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### QuantileLoss.__call__\n", - "\n", - "> QuantileLoss.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies datapoints to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`quantile_loss`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(QuantileLoss.__call__, name='QuantileLoss.__call__', title_level=3)" ] @@ -1918,87 +1080,7 @@ "execution_count": null, "id": "8f42ec82", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L494){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MQLoss.__init__\n", - "\n", - "> MQLoss.__init__ (level=[80, 90], quantiles=None, horizon_weight=None)\n", - "\n", - "Multi-Quantile loss\n", - "\n", - "Calculates the Multi-Quantile loss (MQL) between `y` and `y_hat`.\n", - "MQL calculates the average multi-quantile Loss for\n", - "a given set of quantiles, based on the absolute \n", - "difference between predicted quantiles and observed values.\n", - "\n", - "$$ \\mathrm{MQL}(\\mathbf{y}_{\\tau},[\\mathbf{\\hat{y}}^{(q_{1})}_{\\tau}, ... ,\\hat{y}^{(q_{n})}_{\\tau}]) = \\frac{1}{n} \\sum_{q_{i}} \\mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{(q_{i})}_{\\tau}) $$\n", - "\n", - "The limit behavior of MQL allows to measure the accuracy \n", - "of a full predictive distribution $\\mathbf{\\hat{F}}_{\\tau}$ with \n", - "the continuous ranked probability score (CRPS). This can be achieved \n", - "through a numerical integration technique, that discretizes the quantiles \n", - "and treats the CRPS integral with a left Riemann approximation, averaging over \n", - "uniformly distanced quantiles. \n", - "\n", - "$$ \\mathrm{CRPS}(y_{\\tau}, \\mathbf{\\hat{F}}_{\\tau}) = \\int^{1}_{0} \\mathrm{QL}(y_{\\tau}, \\hat{y}^{(q)}_{\\tau}) dq $$\n", - "\n", - "**Parameters:**
\n", - "`level`: int list [0,100]. Probability levels for prediction intervals (Defaults median).\n", - "`quantiles`: float list [0., 1.]. Alternative to level, quantiles to estimate from y distribution.\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)
\n", - "[James E. Matheson and Robert L. Winkler, \"Scoring Rules for Continuous Probability Distributions\".](https://www.jstor.org/stable/2629907)" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L494){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MQLoss.__init__\n", - "\n", - "> MQLoss.__init__ (level=[80, 90], quantiles=None, horizon_weight=None)\n", - "\n", - "Multi-Quantile loss\n", - "\n", - "Calculates the Multi-Quantile loss (MQL) between `y` and `y_hat`.\n", - "MQL calculates the average multi-quantile Loss for\n", - "a given set of quantiles, based on the absolute \n", - "difference between predicted quantiles and observed values.\n", - "\n", - "$$ \\mathrm{MQL}(\\mathbf{y}_{\\tau},[\\mathbf{\\hat{y}}^{(q_{1})}_{\\tau}, ... ,\\hat{y}^{(q_{n})}_{\\tau}]) = \\frac{1}{n} \\sum_{q_{i}} \\mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\\hat{y}}^{(q_{i})}_{\\tau}) $$\n", - "\n", - "The limit behavior of MQL allows to measure the accuracy \n", - "of a full predictive distribution $\\mathbf{\\hat{F}}_{\\tau}$ with \n", - "the continuous ranked probability score (CRPS). This can be achieved \n", - "through a numerical integration technique, that discretizes the quantiles \n", - "and treats the CRPS integral with a left Riemann approximation, averaging over \n", - "uniformly distanced quantiles. \n", - "\n", - "$$ \\mathrm{CRPS}(y_{\\tau}, \\mathbf{\\hat{F}}_{\\tau}) = \\int^{1}_{0} \\mathrm{QL}(y_{\\tau}, \\hat{y}^{(q)}_{\\tau}) dq $$\n", - "\n", - "**Parameters:**
\n", - "`level`: int list [0,100]. Probability levels for prediction intervals (Defaults median).\n", - "`quantiles`: float list [0., 1.]. Alternative to level, quantiles to estimate from y distribution.\n", - "`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n", - "\n", - "**References:**
\n", - "[Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)
\n", - "[James E. Matheson and Robert L. Winkler, \"Scoring Rules for Continuous Probability Distributions\".](https://www.jstor.org/stable/2629907)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MQLoss, name='MQLoss.__init__', title_level=3)" ] @@ -2008,51 +1090,7 @@ "execution_count": null, "id": "bac2237a", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L568){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MQLoss.__call__\n", - "\n", - "> MQLoss.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mqloss`: tensor (single value)." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L568){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### MQLoss.__call__\n", - "\n", - "> MQLoss.__call__ (y:torch.Tensor, y_hat:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "**Parameters:**
\n", - "`y`: tensor, Actual values.
\n", - "`y_hat`: tensor, Predicted values.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns:**
\n", - "`mqloss`: tensor (single value)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(MQLoss.__call__, name='MQLoss.__call__', title_level=3)" ] @@ -2071,17 +1109,7 @@ "execution_count": null, "id": "da37f2ef", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['-lo-98.0', '-lo-80.0', '-median', '-hi-80.0', '-hi-98.0']\n", - "Parameter containing:\n", - "tensor([0.0100, 0.1000, 0.5000, 0.9000, 0.9900])\n" - ] - } - ], + "outputs": [], "source": [ "# | hide\n", "# Unit tests to check MQLoss' stored quantiles\n", @@ -2626,99 +1654,7 @@ "execution_count": null, "id": "a462101b", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L913){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DistributionLoss.__init__\n", - "\n", - "> DistributionLoss.__init__ (distribution, level=[80, 90], quantiles=None,\n", - "> num_samples=1000, return_params=False,\n", - "> **distribution_kwargs)\n", - "\n", - "DistributionLoss\n", - "\n", - "This PyTorch module wraps the `torch.distribution` classes allowing it to \n", - "interact with NeuralForecast models modularly. It shares the negative \n", - "log-likelihood as the optimization objective and a sample method to \n", - "generate empirically the quantiles defined by the `level` list.\n", - "\n", - "Additionally, it implements a distribution transformation that factorizes the\n", - "scale-dependent likelihood parameters into a base scale and a multiplier \n", - "efficiently learnable within the network's non-linearities operating ranges.\n", - "\n", - "Available distributions:
\n", - "- Poisson
\n", - "- Normal
\n", - "- StudentT
\n", - "- NegativeBinomial
\n", - "- Tweedie
\n", - "- Bernoulli (Temporal Classifiers)\n", - "\n", - "**Parameters:**
\n", - "`distribution`: str, identifier of a torch.distributions.Distribution class.
\n", - "`level`: float list [0,100], confidence levels for prediction intervals.
\n", - "`quantiles`: float list [0,1], alternative to level list, target quantiles.
\n", - "`num_samples`: int=500, number of samples for the empirical quantiles.
\n", - "`return_params`: bool=False, wether or not return the Distribution parameters.

\n", - "\n", - "**References:**
\n", - "- [PyTorch Probability Distributions Package: StudentT.](https://pytorch.org/docs/stable/distributions.html#studentt)
\n", - "- [David Salinas, Valentin Flunkert, Jan Gasthaus, Tim Januschowski (2020).\n", - " \"DeepAR: Probabilistic forecasting with autoregressive recurrent networks\". International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207019301888)
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L913){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DistributionLoss.__init__\n", - "\n", - "> DistributionLoss.__init__ (distribution, level=[80, 90], quantiles=None,\n", - "> num_samples=1000, return_params=False,\n", - "> **distribution_kwargs)\n", - "\n", - "DistributionLoss\n", - "\n", - "This PyTorch module wraps the `torch.distribution` classes allowing it to \n", - "interact with NeuralForecast models modularly. It shares the negative \n", - "log-likelihood as the optimization objective and a sample method to \n", - "generate empirically the quantiles defined by the `level` list.\n", - "\n", - "Additionally, it implements a distribution transformation that factorizes the\n", - "scale-dependent likelihood parameters into a base scale and a multiplier \n", - "efficiently learnable within the network's non-linearities operating ranges.\n", - "\n", - "Available distributions:
\n", - "- Poisson
\n", - "- Normal
\n", - "- StudentT
\n", - "- NegativeBinomial
\n", - "- Tweedie
\n", - "- Bernoulli (Temporal Classifiers)\n", - "\n", - "**Parameters:**
\n", - "`distribution`: str, identifier of a torch.distributions.Distribution class.
\n", - "`level`: float list [0,100], confidence levels for prediction intervals.
\n", - "`quantiles`: float list [0,1], alternative to level list, target quantiles.
\n", - "`num_samples`: int=500, number of samples for the empirical quantiles.
\n", - "`return_params`: bool=False, wether or not return the Distribution parameters.

\n", - "\n", - "**References:**
\n", - "- [PyTorch Probability Distributions Package: StudentT.](https://pytorch.org/docs/stable/distributions.html#studentt)
\n", - "- [David Salinas, Valentin Flunkert, Jan Gasthaus, Tim Januschowski (2020).\n", - " \"DeepAR: Probabilistic forecasting with autoregressive recurrent networks\". International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207019301888)
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(DistributionLoss, name='DistributionLoss.__init__', title_level=3)" ] @@ -2728,65 +1664,7 @@ "execution_count": null, "id": "d8c367f8", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1040){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DistributionLoss.sample\n", - "\n", - "> DistributionLoss.sample (distr_args:torch.Tensor,\n", - "> num_samples:Optional[int]=None)\n", - "\n", - "Construct the empirical quantiles from the estimated Distribution,\n", - "sampling from it `num_samples` independently.\n", - "\n", - "**Parameters**
\n", - "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", - "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", - " of the resulting distribution.
\n", - "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", - " of the resulting distribution.
\n", - "`num_samples`: int=500, overwrite number of samples for the empirical quantiles.
\n", - "\n", - "**Returns**
\n", - "`samples`: tensor, shape [B,H,`num_samples`].
\n", - "`quantiles`: tensor, empirical quantiles defined by `levels`.
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1040){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DistributionLoss.sample\n", - "\n", - "> DistributionLoss.sample (distr_args:torch.Tensor,\n", - "> num_samples:Optional[int]=None)\n", - "\n", - "Construct the empirical quantiles from the estimated Distribution,\n", - "sampling from it `num_samples` independently.\n", - "\n", - "**Parameters**
\n", - "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", - "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", - " of the resulting distribution.
\n", - "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", - " of the resulting distribution.
\n", - "`num_samples`: int=500, overwrite number of samples for the empirical quantiles.
\n", - "\n", - "**Returns**
\n", - "`samples`: tensor, shape [B,H,`num_samples`].
\n", - "`quantiles`: tensor, empirical quantiles defined by `levels`.
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(DistributionLoss.sample, name='DistributionLoss.sample', title_level=3)" ] @@ -2796,75 +1674,7 @@ "execution_count": null, "id": "04e32679", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1083){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DistributionLoss.__call__\n", - "\n", - "> DistributionLoss.__call__ (y:torch.Tensor, distr_args:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "Computes the negative log-likelihood objective function. \n", - "To estimate the following predictive distribution:\n", - "\n", - "$$\\mathrm{P}(\\mathbf{y}_{\\tau}\\,|\\,\\theta) \\quad \\mathrm{and} \\quad -\\log(\\mathrm{P}(\\mathbf{y}_{\\tau}\\,|\\,\\theta))$$\n", - "\n", - "where $\\theta$ represents the distributions parameters. It aditionally \n", - "summarizes the objective signal using a weighted average using the `mask` tensor. \n", - "\n", - "**Parameters**
\n", - "`y`: tensor, Actual values.
\n", - "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", - "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", - " of the resulting distribution.
\n", - "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", - " of the resulting distribution.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns**
\n", - "`loss`: scalar, weighted loss function against which backpropagation will be performed.
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1083){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DistributionLoss.__call__\n", - "\n", - "> DistributionLoss.__call__ (y:torch.Tensor, distr_args:torch.Tensor,\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "Computes the negative log-likelihood objective function. \n", - "To estimate the following predictive distribution:\n", - "\n", - "$$\\mathrm{P}(\\mathbf{y}_{\\tau}\\,|\\,\\theta) \\quad \\mathrm{and} \\quad -\\log(\\mathrm{P}(\\mathbf{y}_{\\tau}\\,|\\,\\theta))$$\n", - "\n", - "where $\\theta$ represents the distributions parameters. It aditionally \n", - "summarizes the objective signal using a weighted average using the `mask` tensor. \n", - "\n", - "**Parameters**
\n", - "`y`: tensor, Actual values.
\n", - "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", - "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", - " of the resulting distribution.
\n", - "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", - " of the resulting distribution.
\n", - "`mask`: tensor, Specifies date stamps per serie to consider in loss.
\n", - "\n", - "**Returns**
\n", - "`loss`: scalar, weighted loss function against which backpropagation will be performed.
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(DistributionLoss.__call__, name='DistributionLoss.__call__', title_level=3)" ] @@ -2874,17 +1684,7 @@ "execution_count": null, "id": "14a7e381", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['', '-lo-98.0', '-lo-80.0', '-median', '-hi-80.0', '-hi-98.0']\n", - "Parameter containing:\n", - "tensor([0.0100, 0.1000, 0.5000, 0.9000, 0.9900])\n" - ] - } - ], + "outputs": [], "source": [ "# | hide\n", "# Unit tests to check DistributionLoss' stored quantiles\n", @@ -2964,42 +1764,35 @@ " # If True, predict_step will return Distribution's parameters\n", " self.return_params = return_params\n", " if self.return_params:\n", - " lambda_names = [f\"-lambda-{i}\" for i in range(1, n_components + 1)]\n", - " weight_names = [f\"-weight-{i}\" for i in range(1, n_components + 1)]\n", - " self.param_names = [i for j in zip(lambda_names, weight_names) for i in j]\n", + " self.param_names = [f\"-lambda-{i}\" for i in range(1, n_components + 1)]\n", " self.output_names = self.output_names + self.param_names\n", "\n", " # Add first output entry for the sample_mean\n", " self.output_names.insert(0, \"\")\n", "\n", - " self.outputsize_multiplier = 2 * n_components\n", + " self.outputsize_multiplier = n_components\n", " self.is_distribution_output = True\n", "\n", " def domain_map(self, output: torch.Tensor):\n", - " lambdas, weights = output.chunk(2, dim=-1)\n", - " return (lambdas, weights)\n", - "\n", - " def scale_decouple(\n", - " self,\n", - " output,\n", - " loc: Optional[torch.Tensor] = None,\n", - " scale: Optional[torch.Tensor] = None,\n", - " ):\n", - " \"\"\"Scale Decouple\n", + " return (output,)#, weights\n", + " \n", + " def scale_decouple(self, \n", + " output,\n", + " loc: Optional[torch.Tensor] = None,\n", + " scale: Optional[torch.Tensor] = None):\n", + " \"\"\" Scale Decouple\n", "\n", " Stabilizes model's output optimization, by learning residual\n", " variance and residual location based on anchoring `loc`, `scale`.\n", " Also adds domain protection to the distribution parameters.\n", " \"\"\"\n", - " lambdas, weights = output\n", - " weights = F.softmax(weights, dim=-1)\n", - "\n", + " lambdas = output[0]\n", " if (loc is not None) and (scale is not None):\n", " loc = loc.view(lambdas.size(dim=0), 1, -1)\n", " scale = scale.view(lambdas.size(dim=0), 1, -1)\n", " lambdas = (lambdas * scale) + loc\n", " lambdas = F.softplus(lambdas)\n", - " return (lambdas, weights)\n", + " return (lambdas,)\n", "\n", " def sample(self, distr_args, num_samples=None):\n", " \"\"\"\n", @@ -3021,10 +1814,15 @@ " if num_samples is None:\n", " num_samples = self.num_samples\n", "\n", - " lambdas, weights = distr_args\n", + " lambdas = distr_args[0]\n", " B, H, K = lambdas.size()\n", " Q = len(self.quantiles)\n", "\n", + " # Sample K ~ Mult(weights)\n", + " # shared across B, H\n", + " # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)\n", + " weights = (1/K) * torch.ones_like(lambdas, device=lambdas.device)\n", + "\n", " # Avoid loop, vectorize\n", " weights = weights.reshape(-1, K)\n", " lambdas = lambdas.flatten() \n", @@ -3062,7 +1860,7 @@ " \n", " def neglog_likelihood(self,\n", " y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None,):\n", " if mask is None: \n", " mask = (y > 0) * 1\n", @@ -3070,9 +1868,11 @@ " mask = mask * ((y > 0) * 1)\n", "\n", " eps = 1e-10\n", - " lambdas, weights = distr_args\n", + " lambdas = distr_args[0]\n", " B, H, K = lambdas.size()\n", "\n", + " weights = (1/K) * torch.ones_like(lambdas, device=lambdas.device)\n", + "\n", " y = y[:,:,None]\n", " mask = mask[:,:,None]\n", "\n", @@ -3097,7 +1897,7 @@ " return loss\n", "\n", " def __call__(self, y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None):\n", "\n", " return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)\n" @@ -3108,83 +1908,7 @@ "execution_count": null, "id": "62d7daba", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1117){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### PMM.__init__\n", - "\n", - "> PMM.__init__ (n_components=10, level=[80, 90], quantiles=None,\n", - "> num_samples=1000, return_params=False,\n", - "> batch_correlation=False, horizon_correlation=False)\n", - "\n", - "Poisson Mixture Mesh\n", - "\n", - "This Poisson Mixture statistical model assumes independence across groups of \n", - "data $\\mathcal{G}=\\{[g_{i}]\\}$, and estimates relationships within the group.\n", - "\n", - "$$ \\mathrm{P}\\left(\\mathbf{y}_{[b][t+1:t+H]}\\right) = \n", - "\\prod_{ [g_{i}] \\in \\mathcal{G}} \\mathrm{P} \\left(\\mathbf{y}_{[g_{i}][\\tau]} \\right) =\n", - "\\prod_{\\beta\\in[g_{i}]} \n", - "\\left(\\sum_{k=1}^{K} w_k \\prod_{(\\beta,\\tau) \\in [g_i][t+1:t+H]} \\mathrm{Poisson}(y_{\\beta,\\tau}, \\hat{\\lambda}_{\\beta,\\tau,k}) \\right)$$\n", - "\n", - "**Parameters:**
\n", - "`n_components`: int=10, the number of mixture components.
\n", - "`level`: float list [0,100], confidence levels for prediction intervals.
\n", - "`quantiles`: float list [0,1], alternative to level list, target quantiles.
\n", - "`return_params`: bool=False, wether or not return the Distribution parameters.
\n", - "`batch_correlation`: bool=False, wether or not model batch correlations.
\n", - "`horizon_correlation`: bool=False, wether or not model horizon correlations.
\n", - "\n", - "**References:**
\n", - "[Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", - "Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. Submitted to the International \n", - "Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1117){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### PMM.__init__\n", - "\n", - "> PMM.__init__ (n_components=10, level=[80, 90], quantiles=None,\n", - "> num_samples=1000, return_params=False,\n", - "> batch_correlation=False, horizon_correlation=False)\n", - "\n", - "Poisson Mixture Mesh\n", - "\n", - "This Poisson Mixture statistical model assumes independence across groups of \n", - "data $\\mathcal{G}=\\{[g_{i}]\\}$, and estimates relationships within the group.\n", - "\n", - "$$ \\mathrm{P}\\left(\\mathbf{y}_{[b][t+1:t+H]}\\right) = \n", - "\\prod_{ [g_{i}] \\in \\mathcal{G}} \\mathrm{P} \\left(\\mathbf{y}_{[g_{i}][\\tau]} \\right) =\n", - "\\prod_{\\beta\\in[g_{i}]} \n", - "\\left(\\sum_{k=1}^{K} w_k \\prod_{(\\beta,\\tau) \\in [g_i][t+1:t+H]} \\mathrm{Poisson}(y_{\\beta,\\tau}, \\hat{\\lambda}_{\\beta,\\tau,k}) \\right)$$\n", - "\n", - "**Parameters:**
\n", - "`n_components`: int=10, the number of mixture components.
\n", - "`level`: float list [0,100], confidence levels for prediction intervals.
\n", - "`quantiles`: float list [0,1], alternative to level list, target quantiles.
\n", - "`return_params`: bool=False, wether or not return the Distribution parameters.
\n", - "`batch_correlation`: bool=False, wether or not model batch correlations.
\n", - "`horizon_correlation`: bool=False, wether or not model horizon correlations.
\n", - "\n", - "**References:**
\n", - "[Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", - "Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. Submitted to the International \n", - "Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(PMM, name='PMM.__init__', title_level=3)" ] @@ -3194,63 +1918,7 @@ "execution_count": null, "id": "fa8da65c", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1206){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### PMM.sample\n", - "\n", - "> PMM.sample (distr_args, num_samples=None)\n", - "\n", - "Construct the empirical quantiles from the estimated Distribution,\n", - "sampling from it `num_samples` independently.\n", - "\n", - "**Parameters**
\n", - "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", - "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", - " of the resulting distribution.
\n", - "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", - " of the resulting distribution.
\n", - "`num_samples`: int=500, overwrites number of samples for the empirical quantiles.
\n", - "\n", - "**Returns**
\n", - "`samples`: tensor, shape [B,H,`num_samples`].
\n", - "`quantiles`: tensor, empirical quantiles defined by `levels`.
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1206){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### PMM.sample\n", - "\n", - "> PMM.sample (distr_args, num_samples=None)\n", - "\n", - "Construct the empirical quantiles from the estimated Distribution,\n", - "sampling from it `num_samples` independently.\n", - "\n", - "**Parameters**
\n", - "`distr_args`: Constructor arguments for the underlying Distribution type.
\n", - "`loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n", - " of the resulting distribution.
\n", - "`scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n", - " of the resulting distribution.
\n", - "`num_samples`: int=500, overwrites number of samples for the empirical quantiles.
\n", - "\n", - "**Returns**
\n", - "`samples`: tensor, shape [B,H,`num_samples`].
\n", - "`quantiles`: tensor, empirical quantiles defined by `levels`.
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(PMM.sample, name='PMM.sample', title_level=3)" ] @@ -3260,39 +1928,7 @@ "execution_count": null, "id": "ba75717c", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1305){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### PMM.__call__\n", - "\n", - "> PMM.__call__ (y:torch.Tensor, distr_args:Tuple[torch.Tensor],\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "Call self as a function." - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/losses/pytorch.py#L1305){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### PMM.__call__\n", - "\n", - "> PMM.__call__ (y:torch.Tensor, distr_args:Tuple[torch.Tensor],\n", - "> mask:Optional[torch.Tensor]=None)\n", - "\n", - "Call self as a function." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(PMM.__call__, name='PMM.__call__', title_level=3)" ] @@ -3311,17 +1947,7 @@ "execution_count": null, "id": "e4a20e21", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['', '-lo-98.0', '-lo-80.0', '-median', '-hi-80.0', '-hi-98.0']\n", - "Parameter containing:\n", - "tensor([0.0100, 0.1000, 0.5000, 0.9000, 0.9900])\n" - ] - } - ], + "outputs": [], "source": [ "# | hide\n", "# Unit tests to check PMM's stored quantiles\n", @@ -3345,43 +1971,11 @@ "execution_count": null, "id": "a56a2fbe", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "weights.shape (N,H,K) \t torch.Size([2, 2, 3])\n", - "lambdas.shape (N,H,K) \t torch.Size([2, 2, 3])\n", - "samples.shape (N,H,num_samples) torch.Size([2, 2, 1000])\n", - "sample_mean.shape (N,H) torch.Size([2, 2, 1])\n", - "quants.shape (N,H,Q) \t\t torch.Size([2, 2, 5])\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "#| hide\n", - "# Create single mixture and broadcast to N, H, K\n", - "weights = torch.ones((2,3))[None, :, :]\n", + "# Create single mixture and broadcast to N,H,K\n", + "weights = torch.ones((1,3))[None, :, :]\n", "lambdas = torch.Tensor([[5,10,15], [10,20,30]])[None, :, :]\n", "\n", "# Create repetitions for the batch dimension N.\n", @@ -3393,7 +1987,7 @@ "print('lambdas.shape (N,H,K) \\t', lambdas.shape)\n", "\n", "distr = PMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9])\n", - "distr_args = (lambdas, weights)\n", + "distr_args = (lambdas,)\n", "samples, sample_mean, quants = distr.sample(distr_args)\n", "\n", "print('samples.shape (N,H,num_samples) ', samples.shape)\n", @@ -3498,44 +2092,38 @@ " if self.return_params:\n", " mu_names = [f\"-mu-{i}\" for i in range(1, n_components + 1)]\n", " std_names = [f\"-std-{i}\" for i in range(1, n_components + 1)]\n", - " weight_names = [f\"-weight-{i}\" for i in range(1, n_components + 1)]\n", - " self.param_names = [i for j in zip(mu_names, std_names, weight_names) for i in j]\n", - " self.output_names = self.output_names + self.param_names\n", + " mu_std_names = [i for j in zip(mu_names, std_names) for i in j]\n", + " self.output_names = self.output_names + mu_std_names\n", "\n", " # Add first output entry for the sample_mean\n", " self.output_names.insert(0, \"\")\n", "\n", - " self.outputsize_multiplier = 3 * n_components\n", + " self.outputsize_multiplier = 2 * n_components\n", " self.is_distribution_output = True\n", "\n", " def domain_map(self, output: torch.Tensor):\n", - " means, stds, weights = output.chunk(3, dim=-1)\n", - "\n", - " return (means, stds, weights)\n", + " means, stds = torch.tensor_split(output, 2, dim=-1)\n", + " return (means, stds)\n", "\n", - " def scale_decouple(\n", - " self,\n", - " output,\n", - " loc: Optional[torch.Tensor] = None,\n", - " scale: Optional[torch.Tensor] = None,\n", - " eps: float = 0.2,\n", - " ):\n", - " \"\"\"Scale Decouple\n", + " def scale_decouple(self, \n", + " output,\n", + " loc: Optional[torch.Tensor] = None,\n", + " scale: Optional[torch.Tensor] = None,\n", + " eps: float=0.2):\n", + " \"\"\" Scale Decouple\n", "\n", " Stabilizes model's output optimization, by learning residual\n", " variance and residual location based on anchoring `loc`, `scale`.\n", " Also adds domain protection to the distribution parameters.\n", " \"\"\"\n", - " means, stds, weights = output\n", + " means, stds = output\n", " stds = F.softplus(stds)\n", - " weights = F.softmax(weights, dim=-1)\n", " if (loc is not None) and (scale is not None):\n", " loc = loc.view(means.size(dim=0), 1, -1)\n", - " scale = scale.view(means.size(dim=0), 1, -1)\n", + " scale = scale.view(means.size(dim=0), 1, -1) \n", " means = (means * scale) + loc\n", " stds = (stds + eps) * scale\n", - "\n", - " return (means, stds, weights)\n", + " return (means, stds)\n", "\n", " def sample(self, distr_args, num_samples=None):\n", " \"\"\"\n", @@ -3557,11 +2145,17 @@ " if num_samples is None:\n", " num_samples = self.num_samples\n", " \n", - " means, stds, weights = distr_args\n", + " means, stds = distr_args\n", " B, H, K = means.size()\n", " Q = len(self.quantiles)\n", " assert means.shape == stds.shape\n", "\n", + " # Sample K ~ Mult(weights)\n", + " # shared across B, H\n", + " # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)\n", + " \n", + " weights = (1/K) * torch.ones_like(means, device=means.device)\n", + " \n", " # Avoid loop, vectorize\n", " weights = weights.reshape(-1, K)\n", " means = means.flatten()\n", @@ -3601,15 +2195,17 @@ "\n", " def neglog_likelihood(self,\n", " y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None):\n", "\n", " if mask is None: \n", " mask = torch.ones_like(y)\n", " \n", - " means, stds, weights = distr_args\n", + " means, stds = distr_args\n", " B, H, K = means.size()\n", - " \n", + " \n", + " weights = (1/K) * torch.ones_like(means, device=means.device)\n", + " \n", " y = y[:,:, None]\n", " mask = mask[:,:,None]\n", " \n", @@ -3632,7 +2228,7 @@ " return loss\n", " \n", " def __call__(self, y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None,):\n", "\n", " return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)" @@ -3682,17 +2278,7 @@ "execution_count": null, "id": "8ebe4250", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['', '-lo-98.0', '-lo-80.0', '-median', '-hi-80.0', '-hi-98.0']\n", - "Parameter containing:\n", - "tensor([0.0100, 0.1000, 0.5000, 0.9000, 0.9900])\n" - ] - } - ], + "outputs": [], "source": [ "# | hide\n", "# Unit tests to check PMM's stored quantiles\n", @@ -3716,40 +2302,7 @@ "execution_count": null, "id": "684d2382", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "weights.shape (N,H,K) \t torch.Size([2, 2, 3])\n", - "means.shape (N,H,K) \t torch.Size([2, 2, 3])\n", - "stds.shape (N,H,K) \t torch.Size([2, 2, 3])\n", - "samples.shape (N,H,num_samples) torch.Size([2, 2, 1000])\n", - "sample_mean.shape (N,H) torch.Size([2, 2, 1])\n", - "quants.shape (N,H,Q) \t\t torch.Size([2, 2, 5])\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfgAAAEyCAYAAAAWW8KtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAABcFElEQVR4nO3dd1iTV/sH8G8CCSQs2UO2RK2guCp1FRyoqDi6bG2rtNYurVqrba1txVF81WqHVrsc1Mmvtb6v1klVUOuoUqy4QVBR9pA9EvL8/njMA4EASQgkhPtzXbk0z8o5EHLnnOec+/AYhmFACCGEEKPC13cBCCGEEKJ7FOAJIYQQI0QBnhBCCDFCFOAJIYQQI0QBnhBCCDFCFOAJIYQQI0QBnhBCCDFCFOAJIYQQI0QBnhBCCDFCFOB14MqVK5gxYwa6dOkCkUgEkUgEiUSCt956C5cuXVI6NjIyEjweD3w+H6mpqQ2uVVZWBmtra/B4PERERHDb7969Cx6PBx6Ph8jISJXleP3117ljmqMoh+IhFArh4+ODuXPn4tGjR5pUv0mKcn/55Zc6u2ZcXBx4PB5+++23Zo9V1LOukJAQhISEKG1r6ufamEOHDjV6jre3t9LvryVCQkKUflcikQiBgYH4+uuvIZfLdfIaTVH8vOPi4rhtERER8Pb21vhaGzduxLZt2xpsV7xPVO0jhGiHAnwL/fDDD+jXrx8uXLiAuXPn4o8//sDBgwcxb948XLt2DU8++STu3LnT4DxLS0ts3bq1wfZff/0VUqkUAoFA5etZWVlh27ZtDT7YS0tL8euvv8La2lqj8h85cgTnzp3DwYMHMWnSJKxfvx5hYWEwlgzGb7zxBs6dO9fscefOncMbb7yh0bUPHTqEpUuXqty3b98+fPbZZxpdrym+vr44d+4czp07h5iYGHTu3Bnvv/8+Fi1apLPX0MRnn32Gffv2aXxeYwHe1dUV586dw7hx43RQOkIIAJjquwDt2V9//YV3330X48aNw2+//QahUMjtGz58OGbNmoVff/0VIpGowblTpkxBdHQ0li5dCj6/9nvW5s2bMXnyZOzfv1/la06ZMgU///wzjh8/jtDQUG57TEwMampqMGnSJOzYsUPtOvTr1w8ODg4AgNDQUOTn52P79u04e/YsBg8erPKc8vJyiMVitV9Dn9zd3eHu7t7scU899ZROX7dPnz46vZ5IJFIqY1hYGLp3744NGzZgxYoVKr8QMgyDyspKle+/lurSpYtOr2dmZqbz3wEhHR214FsgKioKJiYm+OGHH5SCe13PP/883NzcGmx//fXXkZ6ejtjYWG7b7du3cebMGbz++uuNvma3bt0waNAgbNmyRWn7li1b8Mwzz8DGxkbL2rAUH7L37t0DwHYPBwQE4NSpUxg0aBDEYjFXvvv37+OVV16Bk5MTzMzM8MQTT2Dt2rUqu43lcjm++OILeHp6wtzcHP3798fx48eVjklJScFrr70GiUQCsViMzp07Izw8HElJSSrLWllZifnz58PFxQUikQjBwcFITExUOkZVF70q9bvoy8vLsWDBAvj4+MDc3Bx2dnbo378/du/eDYDtov7uu++4cxWPu3fvAlDdRf/o0SN88MEH8PX1hZmZGZycnDB27FjcvHmz2fLVJxAI0K9fP5SXlyM3N5crx+zZs/H999/jiSeegJmZGaKjowEAycnJmDp1qtLvSlH+um7evIkxY8ZALBbDwcEBb7/9NkpKShocp6qLXi6XY/369ejduzdEIhE6deqEp556ivuy6u3tjWvXriE+Pp77eSmu0VgX/ZkzZzBixAhYWVlBLBZj0KBBOHjwoNIx27ZtA4/Hw8mTJ/HOO+/AwcEB9vb2eOaZZ5CRkaF07IkTJxASEgJ7e3uIRCJ4enri2WefRXl5udo/e0LaC2rBa6mmpgYnT55E//794erqqvH5EokEQ4cOxZYtWzB69GgAbJD29vbGiBEjmjx3xowZmDVrFgoLC2Fra4tbt27h7NmzWLFiBfbu3atVfRRSUlIAAI6Ojty2zMxMvPLKK/jwww8RFRUFPp+P3NxcDBo0CNXV1Vi+fDm8vb3xxx9/YMGCBbhz5w42btyodN0NGzbAy8uLu2+8evVqhIWFIT4+HgMHDgQAZGRkwN7eHv/5z3/g6OiIgoICREdHIygoCImJiejWrZvSNT/55BP07dsXP//8M4qKihAZGYmQkBAkJibC19e3RT+H+fPnY/v27VixYgX69OmDsrIyXL16Ffn5+QDYLuqysjL89ttvSrcAGnsvlJSUYMiQIbh79y4++ugjBAUFobS0FKdOnUJmZia6d++ucRnv3LkDU1NT2Nractv++9//4vTp0/j888/h4uICJycnXL9+HYMGDYKnpyfWrl0LFxcXHD16FHPmzEFeXh6WLFkCAMjOzkZwcDAEAgE2btwIZ2dn7Ny5E7Nnz1arPBEREdixYwdmzJiBZcuWQSgU4p9//uG+9Ozbtw/PPfccbGxsuPeHmZlZo9eLj49HaGgoevXqhc2bN8PMzAwbN25EeHg4du/ejSlTpigd/8Ybb2DcuHHYtWsX0tPTsXDhQrzyyis4ceIEAPZLxLhx47i/u06dOuHhw4c4cuQIqqur202vFCFqY4hWsrKyGADMiy++2GCfTCZjpFIp95DL5dy+JUuWMACY3NxcZuvWrYyZmRmTn5/PyGQyxtXVlYmMjGQYhmEsLCyY6dOnc+elpaUxAJg1a9YwJSUljKWlJbNhwwaGYRhm4cKFjI+PDyOXy5lZs2Yx6vxaFeXIyspipFIpU1hYyOzYsYMRiUSMh4cHU1FRwTAMwwQHBzMAmOPHjyud//HHHzMAmAsXLihtf+eddxgej8fcunVLqdxubm7cNRmGYYqLixk7Oztm5MiRjZZRJpMx1dXVjEQiYd5//31u+8mTJxkATN++fZV+tnfv3mUEAgHzxhtvNKhnXcHBwUxwcLDSNgDMkiVLuOcBAQHMpEmTGi0bwzBN/qy9vLyUfn/Lli1jADCxsbFNXlOV4OBgxt/fn3s/ZWRkcD//559/XqkONjY2TEFBgdL5o0ePZtzd3ZmioiKl7bNnz2bMzc254z/66COGx+Mxly9fVjouNDSUAcCcPHmS2zZ9+nTGy8uLe37q1CkGALN48eIm6+Lv79/gZ88wte+TrVu3ctueeuopxsnJiSkpKeG2yWQyJiAggHF3d+d+91u3bmUAMO+++67SNVevXs0AYDIzMxmGYZjffvuNAdCgfoQYK+qibwX9+vWDQCDgHmvXrlV53PPPPw+hUIidO3fi0KFDyMrKUmvktaWlJZ5//nls2bIFMpkMv/zyC1577TW1uqLrc3FxgUAggK2tLV555RX07dsXR44cgbm5OXeMra0thg8frnTeiRMn0KNHDwwYMEBpe0REBBiG4VpNCs8884zSNa2srBAeHo5Tp06hpqYGACCTyRAVFYUePXpAKBTC1NQUQqEQycnJuHHjRoOyT506VanOXl5eGDRoEE6ePKnxz6G+AQMG4PDhw/j4448RFxeHioqKFl3v8OHD6Nq1K0aOHKnV+deuXePeT25ubli7di1efvll/PTTT0rHDR8+XKlFX1lZiePHj2Py5MkQi8WQyWTcY+zYsaisrMT58+cBACdPnoS/vz8CAwOVrjl16lS16gcAs2bN0qp+9ZWVleHChQt47rnnYGlpyW03MTHBq6++igcPHuDWrVtK50yYMEHpea9evQDU3m7q3bs3hEIh3nzzTURHR6ucxUKIMaEuei05ODhAJBJxHx517dq1C+Xl5cjMzGzwoVOXhYUFpkyZgi1btsDLywsjR46El5eXWq8/Y8YMDBkyBF988QVyc3O1npL1559/wsbGBgKBAO7u7rC3t29wjKpu5/z8fJXTpBTjDRRd2QouLi4NjnVxcUF1dTVKS0thY2OD+fPn47vvvsNHH32E4OBg2Nrags/n44033lAZYBu75r///ttofdX17bffwt3dHTExMVi1ahXMzc0xevRorFmzBhKJROPr5ebmwtPTU+vydOnSBXv27AGPx4O5uTl8fHxUdinX/13l5+dDJpNh/fr1WL9+vcpr5+Xlccf6+Pg02K/q51xfbm4uTExM1DpWHYWFhWAYRuV7r7H3WP33rqL7X/He6dKlC/7880+sXr0as2bNQllZGXx9fTFnzhzMnTtXJ+UmxJBQgNeSiYkJhg8fjmPHjiEzM1Ppg6hHjx4AwN17bMrrr7+On3/+GVeuXMHOnTvVfv3BgwejW7duWLZsGUJDQ+Hh4aFxHQAgMDCQG0XfGFU9A/b29sjMzGywXTGoqf41s7KyGhyblZUFoVDItdB27NiBadOmISoqSum4vLw8dOrUSeX5qrap+pKiKQsLCyxduhRLly5FdnY215oPDw/XalCco6MjHjx4oHV5FAMTm1P/d2Vra8u1ehtrXSuCur29faM/0+Y4OjqipqYGWVlZWo1JqU/x5U6T95g6hg4diqFDh6KmpgaXLl3C+vXrMW/ePDg7O+PFF19scbkJMSTURd8CixYtQk1NDd5++21IpVKtrjFw4EC8/vrrmDx5MiZPnqzRuZ9++inCw8PxwQcfaPXaLTFixAhcv34d//zzj9L2X375BTweD8OGDVPa/vvvv6OyspJ7XlJSggMHDmDo0KEwMTEBwAan+oOuDh48iIcPH6osw+7du5Xm69+7dw9nz55tkMSmpZydnREREYGXXnoJt27d4kZc128hNiUsLAy3b99ucOuitYnFYgwbNgyJiYno1asX+vfv3+Ch+EI0bNgwXLt2rUEPyK5du5p9nbCwMADApk2bmjzOzMxMrZ+XhYUFgoKC8PvvvysdL5fLsWPHDri7u6Nr167NXqcxJiYmCAoK4mYS1H8fE2IMqAXfAoMHD8Z3332H9957D3379sWbb74Jf39/ruWhGNHeXPKZzZs3a/X6r7zyCl555RWtzm2p999/H7/88gvGjRuHZcuWwcvLCwcPHsTGjRvxzjvvNPjwNTExQWhoKObPnw+5XI5Vq1ahuLhYKVHM+PHjsW3bNnTv3h29evVCQkIC1qxZ0+g89pycHEyePBkzZ85EUVERlixZAnNzc50kfwkKCsL48ePRq1cv2Nra4saNG9i+fTsGDhzIdY337NkTALBq1SqEhYXBxMQEvXr1Ujllct68eYiJicHEiRPx8ccfY8CAAaioqEB8fDzGjx/f4AuRLn3zzTcYMmQIhg4dinfeeQfe3t4oKSlBSkoKDhw4wH3pmDdvHrZs2YJx48ZhxYoV3Ch6dXoshg4dildffRUrVqxAdnY2xo8fDzMzMyQmJkIsFuO9994DwP7M9uzZg5iYGPj6+sLc3Jz7Oda3cuVKhIaGYtiwYViwYAGEQiE2btyIq1evYvfu3RqPOfn+++9x4sQJjBs3Dp6enqisrOSmm2o7NoIQQ0YBvoXefvttDBw4EN988w2++uorZGRkgMfjwd3dHYMGDcLx48cbDFAzBo6Ojjh79iwWLVqERYsWobi4GL6+vli9ejXmz5/f4PjZs2ejsrISc+bMQU5ODvz9/XHw4EGlZDrffPMNBAIBVq5cidLSUvTt2xe///47Pv30U5VliIqKwsWLF/Haa6+huLgYAwYMwJ49e3SShGX48OHYv38/vvrqK5SXl6Nz586YNm0aFi9ezB0zdepU/PXXX9i4cSOWLVsGhmGQlpamcmyClZUVzpw5g8jISPz4449YunQpbG1t8eSTT+LNN99scXmb0qNHD/zzzz9Yvnw5Pv30U+Tk5KBTp06QSCQYO3Ysd5yLiwvi4+Mxd+5cvPPOOxCLxZg8eTI2bNiAiRMnNvs627ZtQ9++fbF582Zs27YNIpEIPXr0wCeffMIds3TpUmRmZmLmzJkoKSmBl5dXo7eygoODceLECSxZsgQRERGQy+UIDAzE/v37MX78eI1/Dr1798axY8ewZMkSZGVlwdLSEgEBAdi/fz9GjRql8fUIMXQ8hjGSnKSEEEII4dA9eEIIIcQIUYAnhBBCjBAFeEIIIcQIUYAnhBBCjBAFeEIIIcQIGf00OblcjoyMDFhZWWmVq50QQrTBMAxKSkrg5uYGPp/aUqTtGX2Az8jI0DqNKyGEtFR6enqjyZoIaU1GH+CtrKwAsH9kzWWUU5BKpTh27BhGjRoFgUDQmsVrE1Qfw2Zs9QGMr07a1Ke4uBgeHh7cZxAhbc3oA7yiW97a2lqjAC8Wi2FtbW00H05UH8NlbPUBjK9OLakP3Rok+kI3hgghhBAjRAGeEEIIMUIU4AkhhBAjZPT34AkhxJDV1NRAKpXquxiknRAIBDAxMVHrWArwhBCiBwzDICsrC48ePdJ3UUg706lTJ7i4uDQ7gJMCPCGE6IEiuDs5OUEsFtNoe9IshmFQXl6OnJwcAICrq2uTx1OAJ4SQNlZTU8MFd3t7e30Xh7QjIpEIAJCTkwMnJ6cmu+tpkB0hhDSjokK311PccxeLxbq9MOkQFO+b5sZuUAueEEJUkMuBvDwgJwdQc0yTxqhbnmhD3feNXlvwmzZtQq9evbgscwMHDsThw4e5/QzDIDIyEm5ubhCJRAgJCcG1a9f0WGJCiLGTSoGHD4ErV4D0dKCqSt8lIkQ7eg3w7u7u+M9//oNLly7h0qVLGD58OCZOnMgF8dWrV2PdunXYsGEDLl68CBcXF4SGhqKkpESfxSaEGKHyciAtDUhKArKygJoafZeIkJbRaxd9eHi40vMvvvgCmzZtwvnz59GjRw98/fXXWLx4MZ555hkAQHR0NJydnbFr1y689dZbKq9ZVVWFqjpfuYuLiwGw9yrUnWuqOM5Y5qZSfQybsdUHaF91Ki5mu+FLSxs/pqZG8/q0h7obspCQEPTu3Rtff/01AMDb2xvz5s3DvHnzWu014+LiMGzYMADAxIkT8d///rfVXqsxiu53GxubFk+hNJh78DU1Nfj1119RVlaGgQMHIi0tDVlZWRg1ahR3jJmZGYKDg3H27NlGA/zKlSuxdOnSBtuPHTum8YCW2NhYzSph4Kg+hs3Y6gMYX500qU95eXkrlqTjuXjxIiwsLNrktW7dugUnJyeNzomIiEB0dLTStqCgIJw/f557XlVVhQULFmD37t2oqKjAiBEjsHHjRqXlhDMzMxETE4MlS5a0rBIwgACflJSEgQMHorKyEpaWlti3bx969OiBs2fPAgCcnZ2Vjnd2dsa9e/cavd6iRYswf/587rliycZRo0ZptJpcbGwsQkNDjWYlLKqP4TK2+gCGWyeZjB04l5fH/l9dIpEUd+5oVh9F7yHRDUdHxzZ7LScnJ3Tq1Enj88aMGYOtW7dyz4VCodL+efPm4cCBA9izZw/s7e3xwQcfYPz48UhISOCmu7m4uMDGxqZF5VfQe4Dv1q0bLl++jEePHmHv3r2YPn064uPjuf31RwsyDNPkCEIzMzOYmZk12C4QCDT+oNHmHENG9TFsxlYfwHDqVFkJZGcDBQXs6HgA4GswAkkxil6T+mhSb0UCE33QJMlOSEgIevbsCRMTE0RHR0MoFGL58uV4+eWXMXv2bPz2229wcnLChg0bEBYWxp13/fp1LFiwAKdOnYKFhQVGjRqFr776Cg4ODgCAsrIyvPPOO/j9999hZWWFBQsWNHjt+l3069atw9atW5Gamgo7OzuEh4dj9erVsLS0BABs27YN8+bNQ0xMDObNm4f09HQMGTIEW7dubTZBTH2qynfgwAGlWwgAG39cXFxUXqOoqAibN2/G9u3bMXLkSADAjh074OHhgT///BOjR4/WqEzq0Ps8eKFQCD8/P/Tv3x8rV65EYGAgvvnmG+6HlJWVpXR8Tk5Og1Y9IYSoUlICpKQA166xrXZFcDc05eXlsLS01MtD0y8W0dHRcHBwwN9//4333nsP77zzDp5//nkMGjQI//zzD0aPHo1XX32Vu25mZiaCg4PRu3dvXLp0CUeOHEF2djZeeOEF7poLFy7EyZMnsW/fPhw7dgxxcXFISEhoshx8Ph/ffvstrl69iujoaJw4cQIffvhhg5/rl19+ie3bt+PUqVO4f/++yi8PzVG3fHFxcXByckLXrl0xc+ZMLuMcACQkJEAqlSrddnZzc0NAQADXY61reg/w9TEMg6qqKvj4+MDFxUXpnld1dTXi4+MxaNAgPZaQEGLIGAbIzwdu3ABu3waKivRdIuMSGBiITz/9FBKJBIsWLYJIJIKDgwNmzpwJiUSCzz//HPn5+bhy5QoAdjp03759ERUVhe7du6NPnz7YsmULTp48idu3b6O0tBSbN2/Gl19+idDQUPTs2RPR0dGoaWYaw7x58zBs2DD4+Phg+PDhWL58Of7v//5P6RipVIrvv/8e/fv3R9++fTF79mwcP35co/qqW76wsDDs3LkTJ06cwNq1a3Hx4kUMHz6cG/SdlZUFoVAIW1tbpfOcnZ0bNGR1Ra9d9J988gnCwsLg4eGBkpIS7NmzB3FxcThy5Ah4PB7mzZuHqKgoSCQSSCQSREVFQSwWY+rUqfosNiHEANXUALm57Ij49jaAXSwWo7SpYfyt/Nqa6NWrF/d/ExMT2Nvbo2fPntw2RQ+rovWakJCAkydPcl3ndd25cwcVFRWorq7GwIEDue12dnbo1q1bk+U4efIkoqKicP36dRQXF0Mmk6GyshJlZWXcYDyxWIwuXbpw57i6uiq1qtVx584dtco3ZcoU7v8BAQHo378/vLy8cPDgQW4mmCrN3XZuCb0G+OzsbLz66qvIzMyEjY0NevXqhSNHjiA0NBQA8OGHH6KiogLvvvsuCgsLERQUhGPHjsHKykqfxSaEGJCqKjaoG3IXfHN4PF6bjRBvqfpjC3g8ntI2RbCSP/5lyOVyhIeHY9WqVQ2u5erqiuTkZI3LcO/ePYwdOxZvv/02li9fDjs7O5w5cwYzZsxQmp6oqqwMw2j0Wpoer+Dq6govLy+ufi4uLqiurkZhYaFSKz4nJ6fVeqX1GuA3b97c5H4ej4fIyEhERka2TYEIIe1GaSk7cI5WWzVsffv2xd69e+Ht7Q1T04Yhx8/PDwKBAOfPn4enpycAoLCwELdv30ZwcLDKa166dAkymQxr164F//Foyfrd87qiTfkAID8/H+np6dyAvn79+kEgECA2NpYbf5CZmYmrV69i9erVrVJ2g7sHTwghjWEYoLAQuHkTuHWLgnt7MGvWLBQUFOCll17C33//jdTUVBw7dgyvv/46ampqYGlpiRkzZmDhwoU4fvw4rl69ioiICC5wq9KlSxfIZDKsX78eqamp2L59O77//vtWKb865SstLcWCBQtw7tw53L17F3FxcQgPD4eDgwMmT54MgE1cM2PGDHzwwQc4fvw4EhMT8corr6Bnz57cqHpd0/s0OUIIaU7dhV8oN3z74ubmhr/++gsfffQRRo8ejaqqKnh5eWHMmDFckFyzZg1KS0sxYcIEWFlZ4YMPPkBRE6Mje/fujXXr1mHVqlVYtGgRnn76aaxcuRLTpk1rlTo0Vz4TExMkJSXhl19+waNHj+Dq6ophw4YhJiZG6ZbyV199BVNTU7zwwgtcoptt27Y1ueRrS1CAJ4QYrOrq2vvrlBte/+Li4hpsu3v3boNt9e9bSyQS/P77741e19LSEtu3b8f27du5bQsXLmzydd5//328//77StteffVV7v8RERGIiIhQ2j9p0iSt7qmrKt/Bgwe5/4tEIhw9erTZ65ibm2P9+vVYv369xmXQBgV4QojBKS9n768XFrLd8oS0JXd3d4SHh2P37t1t/tqWlpaQyWQwNzdv8bUowBNCDEZRERvYacFIog9BQUHcqHdV0/rawuXLlwFAJ932FOAJIXoll7OJaXJy2JSyhOiLSCSCn5+fWsequl2hC+q+vjoowBNC9EIqZRPT5OZqtvALIUQ9FOAJIW2qooJtrefn0/11QloTBXhCSJsoLmbvr9MqqoS0DQrwhJBWo1j4JTubbbkTQtoOBXhCiM4p7qlfv07z1wnRFwrwhBCdqapiW+u5uexzqRRoIuMoIaQVUYAnhLRY/YVf2uuqbvr2449t+3pvvqnZ8SEhIYiPjwcAJCYmonfv3rovlIFSrJJnY2ODR+1kEQT6bk0I0Qot/NIxzZw5E5mZmQgICFDr+Li4OEycOBGurq6wsLBA7969sXPnzgbH8Hi8Bo+bN2+2uLyqrsvj8bBmzRrumJCQkAb7X3zxRaXrZGZm4uuvv25xedoSteAJIRqpqald+KW6Wt+lIW1NLBbDxcVF7ePPnj2LXr164aOPPoKzszMOHjyIadOmwdraGuHh4UrH3rp1C9bW1txzR0fHFpc3MzNT6fnhw4cxY8YMPPvss0rbZ86ciWXLlnHPRSKR0n4XFxfY2Ni0uDxtiQI8IUQttPALqS8uLg7Dhg3DH3/8gU8++QS3bt1CYGAgfv75Z/Ts2RMA8MknnyidM2fOHBw9ehT79u1rEOCdnJzQqVMntV8/JCSE60nYsWMHTExM8M4772D58uVcl3r9LyP/+9//MGzYMPj6+ipt1/SLS3tAXfSEkCaVlwNpacDVq+x9dgrupL6FCxfiyy+/xMWLF+Hk5IQJEyZAKpU2enxRURHs7OwabO/Tpw9cXV0xYsQInDx5Uq3Xjo6OhqmpKS5cuIBvv/0WX331FX7++WeVx2ZnZ+PgwYOYMWNGg307d+6Eg4MD/P39sWDBApQYwYII1IInhKj06BEb0EtL9V0SYuiWLFmC0NBQAGzAdXd3x759+/DCCy80OPa3337DxYsX8cMPP3DbXF1d8eOPP6Jfv36oqqrC9u3bMWLECMTFxeHpp59u8rU9PDzw1VdfgcfjoVu3bkhKSsJXX32FmTNnNjg2OjoaVlZWeOaZZ5S2v/zyy/Dx8YGLiwuuXr2KRYsW4d9//0VsbKw2Pw6DQQGeEMJRLPySnc1OeSNEHQMHDuT+b2dnh27duuHGjRsNjouLi0NERAR++ukn+Pv7c9u7deuGbt26KV0vPT0dX375JZ5++mmcPn0aYWFh3P4ffvgBL7/8MgDgqaee4rrjFeeuXbsWNTU1DVZk27JlC15++eUGS7HW/TIQEBAAiUSC/v37459//kHfvn01/XEYDArwhBBIpbX312nhF6ILdYMuAMTHxyM8PBzr1q3DtGnTmj3/qaeewo4dOwAA/fv355ZRBQBnZ2eNy3P69GncunULMTExzR7bt29fCAQCJCcnU4AnhLRPFRVsa72ggBZ+Ido7f/48PD09AQCFhYW4ffs2unfvzu2Pi4vD+PHjsWrVKryp5uT7xMREuLq6Amh6Gdfz5883eC6RSBq03jdv3ox+/fohMDCw2de+du0apFIp9/rtFQV4QjogWviF6NKyZctgb28PZ2dnLF68GA4ODpg0aRIANriPGzcOc+fOxbPPPousrCwAgFAo5Abaff311/D29oa/vz+qq6uxY8cO7N27F3v37m32tdPT0zF//ny89dZb+Oeff7B+/XqsXbtW6Zji4mL8+uuvDbYDwJ07d7Bz506MHTsWDg4OuH79Oj744AP06dMHgwcPbuFPRr8owBPSQSgWfsnJoYVfDJWmmeUMxX/+8x/MnTsXycnJCAwMxP79+yEUCgEA27ZtQ3l5OVauXImVK1dy5wQHByMuLg4AUF1djQULFuDhw4cQiUTw9/fHwYMHMXbs2GZfe9q0aaioqMCAAQNgYmKC9957r0EvwZ49e8AwDF566aUG5wuFQhw/fhzffPMNSktL4eHhgXHjxmHJkiUNegHaGwrwhBg5mYzNDZ+by95rJ0TXhgwZgqtXr6rct23bNmzbtq3J8z/88EN8+OGHWr22QCDA119/jU2bNjV6zJtvvtnorQEPDw8u/a6xoQBPiJGqrGRb6/n5lBue6M7GjRvx888/49y5c/ouSpuytLSETCZrMALfkFGAJ8TIlJSw99eLivRdEmJsdu7ciYrH93c8PT1x9uxZPZeo7ShG8benbnsK8IQYAcXCL9nZbOY5QlpD586dlZ6HhISA0eP0C8U9/LbQ2Ch+Q0YBnpB2jBZ+IYQ0Rq+56FeuXIknn3wSVlZWcHJywqRJk3Dr1i2lYyIiIhos4/fUU0/pqcSEGIbqaiA9HUhKAh48oOBOCGlIrwE+Pj4es2bNwvnz5xEbGwuZTIZRo0ahrKxM6bgxY8YgMzOTexw6dEhPJSZEv8rKgNRUduGXnBxa+IUQ0ji9dtEfOXJE6fnWrVvh5OSEhIQEpQUGzMzM1F7Gr6qqClV1kmgXP87kIZVKm1zdqC7Fceoeb+ioPoZNnfoUFbEBvd53X4Mll0uV/m3vamo0f88Zy/uTtF8GdQ++6PGw3/rLCMbFxXHrBAcHB+OLL76Ak5OTymusXLkSS5cubbD92LFjEIvFGpWnva8kVB/Vx7AZW30AICvLuOqkye+onEY7Ej3jMfocAlkHwzCYOHEiCgsLcfr0aW57TEwMLC0t4eXlhbS0NHz22WeQyWRISEiAmZlZg+uoasF7eHggLy8P1tbWapVFKpUiNjYWoaGhEAgELa+cnlF9DFv9+kil7MC5vLz22wUvl0uRlRULF5dQ8Pnt/3ckEklx545m77ni4mI4ODigqKiowWdPZWUl0tLS4OPj067mVRPDoO77x2Ba8LNnz8aVK1dw5swZpe1Tpkzh/h8QEID+/fvDy8sLBw8ebLCmL8B256sK/AKBQONgoM05hozqY9hqagTIyREoLfzC1+somZbj8wVGEeAVU581ec9p895MSND4lBbp10+z40NCQrisb4mJiejdu7fuC6UH3t7euHfvHgB2sZxOnTrpt0A6YhAfH++99x7279+PkydPwt3dvcljXV1d4eXlheTk5DYqHSGtS7Hgy82bbNY5w+hTI0S1mTNnIjMzEwEBAWodX1lZiYiICPTs2ROmpqbcIjS6UFJSgnnz5sHLywsikQiDBg3CxYsXlY7Jzs5GREQE3NzcIBaLMWbMmAbx4+LFi2otbNPe6DXAMwyD2bNn4/fff8eJEyfg4+PT7Dn5+flIT09v98v4kY6NYdgu+GvX2FHxhLQXYrEYLi4uMDVVrwO4pqYGIpEIc+bMwciRI3ValjfeeAOxsbHYvn07kpKSMGrUKIwcORIPHz4EwMaYSZMmITU1Ff/73/+QmJgILy8vjBw5Umm2lqOjY4OxX8ZArwF+1qxZ2LFjB3bt2gUrKytkZWUhKyuLS4VYWlqKBQsW4Ny5c7h79y7i4uIQHh4OBwcHTJ48WZ9FJ0QrMhmQmQlcuQLcu8fmiyekvYqLiwOPx8PBgwcRGBgIc3NzBAUFISkpiTvGwsICmzZtwsyZM9WeDQWwOVAmTZqEpUuXwsnJCdbW1njrrbdQ/TjpQ0VFBfbu3YvVq1fj6aefhp+fHyIjI+Hj48MtPJOcnIzz589j06ZNePLJJ9GtWzds3LgRpaWl2L17t25/GAZIrwF+06ZNKCoqQkhICFxdXblHTEwMADbnb1JSEiZOnIiuXbti+vTp6Nq1K86dOwcrKyt9Fp0QjVRWsgE9KQnIyGADPSHGYuHChfjyyy9x8eJFODk5YcKECTqZJnj8+HHcuHEDJ0+exO7du7Fv3z5ulpRMJkNNTU2DQWYikYgby6UYcF33GBMTEwiFwgbjvYyRXgfZNTeAXyQS4ejRo21UGkJ0jxZ+IR3BkiVLEBoaCgCIjo6Gu7s79u3bhxdeeKFF1xUKhdiyZQvEYjH8/f2xbNkyLFy4EMuXL4eVlRUGDhyI5cuX44knnoCzszN2796NCxcuQCKRAAC6d+8OLy8vLFq0CD/88AMsLCywbt06ZGVlITMzs8X1NnQGMciOEGPCMEBBAXDjBnD7NgV3YvwGDhzI/d/Ozg7dunXDjRs31Dr3/v37sLS05B5RUVHcvsDAQKX8JQMHDkRpaSnS09MBANu3bwfDMOjcuTPMzMzw7bffYurUqdyKbwKBAHv37sXt27dhZ2cHsViMuLg4hIWFtatV4bRlMNPkCGnvamqA3Fw24xwlMSMdHY/HU+s4Nzc3bilWoGGis6au3aVLF8THx6OsrAzFxcVwdXXFlClTlAZs9+vXD5cvX0ZRURGqq6vh6OiIoKAg9O/fX7MKtUMU4AlpoaoqNqjn5QFyub5LQ0jbO3/+PDw9PQGw88hv376N7t27q3Wuqalpo0ux/vvvv6ioqIBIJOJex9LSssF0agsLC1hYWKCwsBBHjx7F6tWrG1zLxsYGADvw7tKlS1i+fLna9WuvKMAToqWyMvb++qNHNHeddGzLli2Dvb09nJ2dsXjxYjg4OCjNd79+/Tqqq6tRUFCAkpISrsXeXKKc6upqzJgxA59++inu3buHJUuWYPbs2eA/zgB19OhRMAyDbt26ISUlBQsXLkS3bt3w2muvcdf49ddf4ejoCE9PTyQlJWHu3LmYNGkSRo0apesfg8GhAE+IBhiGDejZ2e1n4RfSfmiaWc5Q/Oc//8HcuXORnJyMwMBA7N+/H0KhkNs/duxYLlMcAPTp0wdA8wOtR4wYAYlEgqeffhpVVVV48cUXERkZye0vKirCokWL8ODBA9jZ2eHZZ5/FF198oZRFMDMzE/Pnz0d2djZcXV0xbdo0fPbZZzqquWGjAE+IGuRytgs+J4ftkieE1BoyZAiuXr3a6P67d+9qfe2lS5eqXEAMAF544YVmR+rPmTMHc+bM0fr12zMaRU9IE6RS4OFDNjFNejoFd0I2btwIS0tLpWQ27Z2/vz/CwsL0XQydoxY8ISqUl7Pd8IWFdH+dEIWdO3dymUY9PT1x9uxZPZdINw4dOsQl5lF31dH2gAI8IXUUFbGBvaRE3yUhxPB07txZ6XlISEiz99G1tW3btla5ripeXl5t9lptiQI86fDkcnYVt5wcyg1PCDEeFOBJhyWTsUE9N5dywxP9kFPiBKIFdd83FOBJh1NZyXbDFxRQYhqiH0KhEHw+HxkZGXB0dIRQKFQ78xvpuBiGQXV1NXJzc8Hn85WmIqpCAZ50GMXFbIudcsMTfePz+fDx8UFmZiYyMjL0XRzSzojFYnh6enIJfxpDAZ4YNcXCL9nZwOPBv4QYBKFQCE9PT27ZU0LUYWJiAlNTU7V6fCjAE6NEC7+Q9oDH40EgEChlXiNEVyjAE6NSVcW21vPz6f46IaRjowBPjEJpae3CL4QQQijAk3aMYdhMc7TwCyGENEQBnrQ7ivFIN27Q/HVCCGkMBXjSblRXs4PmcnJqnzczS4QQQjosCvDE4NVf+IUGzxFCSPMowBOD9egRG9hLS/VdEkIIaX8owBODQgu/EEKIbrQ4wNfU1CApKQleXl6wtbXVRZlIBySVsolpaOEXQgjRDY2HKM2bNw+bN28GwAb34OBg9O3bFx4eHoiLi9N1+YiRq6gA7t4FkpKAzEwK7oQQoisaB/jffvsNgYGBAIADBw4gLS0NN2/exLx587B48WKdF5AYp+JiIDkZuH6d7ZJnGH2XiJCG5HK2VyktTd8lIURzGnfR5+XlwcXFBQBw6NAhPP/88+jatStmzJiBb7/9VucFJMaDFn4h7UF1Nfsezcxk/5VKAS8vwNlZ3yUjRDMat+CdnZ1x/fp11NTU4MiRIxg5ciQAoLy8HCYmJhpda+XKlXjyySdhZWUFJycnTJo0Cbdu3VI6hmEYREZGws3NDSKRCCEhIbh27ZqmxSZ6JJOxH5ZJSWx3PAV3YmhKS9kepVOngEOHgIsXgQcPaKEi0r5pHOBfe+01vPDCCwgICACPx0NoaCgA4MKFC+jevbtG14qPj8esWbNw/vx5xMbGQiaTYdSoUSirk3d09erVWLduHTZs2ICLFy/CxcUFoaGhKCkp0bTopI1VVQH377OBPSODPiyJ4WAYIC+PfW8eO8Y+kpLYbZRngRgLjbvoIyMjERAQgPT0dDz//PMwMzMDwK5R+/HHH2t0rSNHjig937p1K5ycnJCQkICnn34aDMPg66+/xuLFi/HMM88AAKKjo+Hs7Ixdu3bhrbfe0rT4pA3Qwi/EEEmlyl3v1dX6LhEhrUuraXLPPfccAKCyzkTl6dOnt7gwRUVFAAA7OzsAQFpaGrKysjBq1CjuGDMzMwQHB+Ps2bMqA3xVVRWqqqq458XFxQAAqVQKqZpNSMVx6h5v6NqiPgzDBvScnNbvgpfLpUr/tnfGVh/AcOpUVsYG86wsdvxH3da5ummO5fIapKaeRX7+da7HUh3G8vlB2i+NA3xNTQ2ioqLw/fffIzs7G7dv34avry8+++wzeHt7Y8aMGVoVhGEYzJ8/H0OGDEFAQAAAICsrCwB7378uZ2dn3Lt3T+V1Vq5ciaVLlzbYfuzYMYjFYo3KFBsbq9Hxhs7Y6pOVRfUxdIZQJ3NzwNubfairvLwcly9fxsWLF5GQkIDi4mL07NkTPXr00OgahOiTxgH+iy++QHR0NFavXo2ZM2dy23v27ImvvvpK6wA/e/ZsXLlyBWfOnGmwj8fjKT1nGKbBNoVFixZh/vz53PPi4mJ4eHhg1KhRsLa2VqssUqkUsbGxCA0NhUAg0KAWhqk16lNdzd6vzM+vXd2trcjlUmRlxcLFJRR8fvv//RhbfYC2rZNMxvYcZWe3rOu9sDANyckHkZx8CPfuxSv1PohENrC3t8fIkSMhFArVup6i95AQfdE4wP/yyy/48ccfMWLECLz99tvc9l69euHmzZtaFeK9997D/v37cerUKbi7u3PbFdPxsrKy4Orqym3Pyclp0KpXMDMz48YF1CUQCDQObtqcY8h0UR9Fl+ejR7Vz1/W1ohufLzCagAgYX32A1qtTeTl7Lz0zU/uBcXJ5DR4+PIeUlD+QnHwAeXnXlfbb2Ung5xcOiSQcgwcPgJtbLIRCodp/Q8b02UHaJ40D/MOHD+Hn59dgu1wu1/ieE8MweO+997Bv3z7ExcXBx8dHab+Pjw9cXFwQGxuLPn36AACqq6sRHx+PVatWaVp00gK08AvRt4KC2qCubeO4srIIqalHkZLyB+7cOYSKinxuH49nAg+PoZBIxsPPLxz29l25fSYmdD+dtD8aB3h/f3+cPn0aXl5eStt//fVXLgira9asWdi1axf+97//wcrKirvnbmNjA5FIBB6Ph3nz5iEqKgoSiQQSiQRRUVEQi8WYOnWqpkUnGlIs/JKdzU55I6QtKbreMzPZQXLavgcLC+8gOfkAUlL+wP378ZDLa/Mhm5vbokuXMPj5jYev7xiIRLSeBjEeGgf4JUuW4NVXX8XDhw8hl8vx+++/49atW/jll1/wxx9/aHStTZs2AQBCQkKUtm/duhUREREAgA8//BAVFRV49913UVhYiKCgIBw7dgxWVlaaFp2oSSplP1jz8ig3PGlbFRW1AT0nR9uudxkePDiHlJQDSE7+A/n5N5T229l1g0QSDj+/8fDwGAw+nxbVJMZJ43d2eHg4YmJiEBUVBR6Ph88//xx9+/bFgQMHNJpCArBd9M3h8XiIjIxEZGSkpkUlGqqoYFvrBQWUG560ncLC2qCube4Etuv9CJKTDyA19TAqKgq4fXy+6eOudzao29lJNLo2jwdYWmpXLkL0SauvrqNHj8bo0aN1XRaiJ0VFbGuJBv2StiCTsb1DivvpddJpaKSgIOVxK/0A0tNPq+h6HwuJJBy+vqNhbt5Jo2vz+YCTE+DmBri4AHZ2wO3b2pWTEH2hvqkOimFq769r+wFLiLoqK9kWemYm+2VSm6mVcrkM6el/caPeCwqU162wt+/+uJUeDnf3gRp3vQsE7IIyiqBuSp+OpJ3T+C3M5/MbnYMOsIlwiOGSydjlL3NzKTc8aX23b7NBvbBQu/MrKgqRmnrk8aj3w6isrL0Qn28KT89g+PmNf9z13nB2T3PMzQFXV/bh5KS/KZ+EtAaNA/y+ffuUnkulUiQmJiI6OlplBjliOB48YO9x0mIapDXU1LBfHBUD5Hr0AG7e1Pz9lp9/mxsgl55+GgxT22gQiezRpcvYx6PeR8Pc3Ebjclpasq10V1fA3l7j0wlpNzQO8BMnTmyw7bnnnoO/vz9iYmK0zmRHWkdJCbuSG8De96QWCtElRde7IqgrZl1o8j6rqZHiwYO/uKlsBQXKN7sdHHpwXe+dOz8FPl+zZakBwNaWDehuboCaCS0Jafd0dpcpKChIKXUt0R+GYbtEs7PZjF/UYie6VFRUez+9sFC7GRcVFQV1Rr0fQWXlI24fny+Ap2cwN+rd1tZX4+vz+WzrXNFS13AZCkKMgk4CfEVFBdavX6+UZpa0PUUXaW4uLYVJdEcur+16z8xkvzRqimEY5Off4gbIPXjwV72udwf4+Y2Fn184fH1HwcxM82a2iQk7SE5xT13NlPGEGC2NA7ytra3SIDuGYVBSUgKxWIwdO3botHBEPdXVbGtd25zchNRXVVW7dnpOjnYDMmUyGdLSTiI5mW2pFxamKO13dAyAn994SCThcHML0qrrXShkR7y7ubGD5GjkOyG1NP5z+Oqrr5QCPJ/Ph6OjI4KCgmBrS2ke25KqhV8I0VZJSe3cdG2THZWX5yM19TBSUvZj3bqDSkum8vkCeHkNexzUx6NTJ58mrtQ4sbj2frqDA5uIhhDSkMYBXpFCluiP4v56WZm+S0LaM8VaA4qgrs37ie16v4nkZDbhzMOHZ8Ewtd1IYrEj/PzGwc8vHD4+oTAz0y7FtLU1G9Dd3IBOnbS6BCEdjloB/sqVK2pfsFevXloXhjROLme74HNyaOEXoj2ptPZeena2dl3vNTXVuH//NJdF7tGjVKX9jo490bXrWEyYYIeiojkAzDV+DR6PzR6nGCRHqWIJ0ZxaAb53797g8XjN5o7n8XiU6EbHFAu/5OZql/2LkNLS2lzv+fnajdMoL8/DnTuHkZx8AGlpR1FVVZvX2MRE+LjrPRwSyXjY2HiBz5eiW7dDuHTJRO3X4/MBR0egc2f2vrq55t8LCCF1qBXg09LSWrscpJ7ycraFpe00JNJxKdIQK1rqJSXaXINBXt51btT7w4fn6nW9O8HPbxwkErbrXSjUrolN6WEJaT1q/TnVX/udtJ6iIjawa/OhTDouqZR93yiSzmgzTZLteo9HcvIfSEk5gEePlL/YOzkF1hn1/iR4PO2yJlF6WELahtbfl69fv4779++jut4nyYQJE1pcqI5GLmdHLdPCL0QTZWW1Xe/aTpEsK8vFnTuHkJLyB1JTj6K6uvabpYmJGby9h8PPLxx+fuNgY+OpdVktLNiud1dX9t46jXwnpPVpHOBTU1MxefJkJCUlKd2XV0ydo3vw6pPJau+vy2TNH086NoZhvwgqgro2y/syDIPc3GvcALmHD88DqL0HZGHhzLXSvb1HQii0aFGZu3en9LCE6IvGAX7u3Lnw8fHBn3/+CV9fX/z999/Iz8/HBx98gC+//LI1ymh0KivZ1npBASWmIU2TyWoTzmjb9S6TVeH+/bjHXe9/oKjortJ+Z+c+XFB3de2ndde7Ij2sovu9sBDo2pW64AnRF40D/Llz53DixAk4OjqCz+eDz+djyJAhWLlyJebMmYPExMTWKKdRKClhP6yLivRdEmLIystrB8jl5mrb9Z6DlJSDSEn5A2lpx1BdXcrtMzU1h7f3CG6ZVWtr7VNMN5YeVi7XfolYQohuaBzga2pqYPl4UqqDgwMyMjLQrVs3eHl54datWzovYHun6FbNydEuhzfpOG7eZIO6Nl8A2a73JG5FtocPL6Bu17ulpSsX0L29R7So653SwxLSPmj8pxkQEIArV67A19cXQUFBWL16NYRCIX788Uf4+mq+6pOxUiz8om0eb2LcZDL2/aHI9R4QANy+rVlrXSarxL17cVxQLy6+r7TfxaUvNzfdxaWv1l3vgHJ6WHt76nYnpD3QOMB/+umnKHuc03LFihUYP348hg4dCnt7e8TExOi8gO1NVRX7gU0Lv5D6Kitr08LWTVykSbAsLc3GnTsHHyeciYVUWptflu16H/l4mdVxsLLq3KLyUnpYQto3jQP86NGjuf/7+vri+vXrKCgoaLDKXEdTWlq78AshCo8e1QZ1bd4bDMMgJ+dfbm56RsbfSvstLd3qjHofDoGgZQufK9LDurlRelhC2juNA3x0dDSee+45WFjU3sOzs7PTaaHaC4ZhP7Rp4ReioLg1oxj1XlGh+TVkskrcvXsCKSl/PO56T1fa7+ranwvqzs59WvTFWpEeVpHzndLDEmI8NA7wCxYswLvvvovw8HC88sorGDNmDEw72Cibmho2FWh2tnbTlohxqayszSCXna3dmgEFBQVITNyC5OTDj7vea0dkmpqK4OMT+niQ3DhYWbm1qLymprWD5Jyd2XSxhBDjo3FkzszMxJEjR7B79268+OKLEIlEeP755/HKK69g0KBBrVFGg1FdXXt/nfL5dGxFRbWt9IICzc9nGAbZ2ZeRnHwAd+4cQEbGJaX9VladuQFyXl7DIRCIWlReM7PaQXKUHpaQjkHjAG9qaorx48dj/PjxKC8vx759+7Br1y4MGzYM7u7uuHPnTmuUU69o4Rcilyt3vWsz5VEqrcC9eye4Ue8lJQ+V9ru59Yef3wT4+Y2Hs3PvFo9psbCo7Xq3t6f0sIR0NC3qWxeLxRg9ejQKCwtx79493LhxQ1flMgjFxWxXPC380jFVVdUmnMnJ0S6dcElJxuOEMweQlvYnZLLam/ICgRg+PqGQSMbiueeEuHPnZcjlLesv79SpNqjb2LToUoSQdk6rAK9oue/cuRN//vknPDw88NJLL+HXX3/Vdfn0KjWVujI7muLi2lHv2vTYMAyDrKx/uGVWs7ISlPZbW3twA+S8vIbB1NQcfL4UtraHtCovjwc4ONR2v4tbNoieEGJENA7wL730Eg4cOACxWIznn38ecXFxWt97P3XqFNasWYOEhARkZmZi3759mDRpErc/IiIC0dHRSucEBQXh/PnzWr0eIfXJ5eyYCkXXuzazIaTScty9e/xx1/tBlJZm1NnLg5vbgMdz08fDyalXi7ve66aHdXFh768TQkh9Ggd4Ho+HmJgYjB49usWj58vKyhAYGIjXXnsNzz77rMpjxowZg61bt3LPhYpk14RoqbqaHVORkaF9psGSkofc3PS7d49DJqtd51cgsICPzyhIJOHo0mUsLC2dW1xmSg9LCNGUxh8Tu3bt0tmLh4WFISwsrMljzMzM4OLiorPXJB1TSUnt/fT8fG263uXIyvoHycnsMqvZ2cqLKllbe0IiCYdEEg5Pz2CYmrZ8QrlYzAb1zp0pPSwhRHMG3w6Ii4uDk5MTOnXqhODgYHzxxRdwcnJq9PiqqipUVVVxz4sfL5otlUohVbOppjhOLjeOJPKKenSk+ihWM1PMTS+tXUwNPJ56I8qrq8tw9+4J3L59ECkph1FamllnLw+dOw+ARDIOXbuOg6NjQL2ud/V/1ny+lPvX2rq2+71+etj2lPrY2N5zNTVsPdT9DNH0WEJaA49hDGPiF4/Ha3APPiYmBpaWlvDy8kJaWho+++wzyGQyJCQkwKyRG4+RkZFYunRpg+27du2CmEYgkWbk5uYiISEBFy9eRFJSEqrrZDIyNzdH79698eSTT6Jfv37oRAnaSRPKy8sxdepUFBUVwdraWt/FIR2Q2gH+wYMHcHfXft3oZguiIsDXl5mZCS8vL+zZswfPPPOMymNUteA9PDyQl5en9h+ZVCpFbGwsXFxCwee3/zRfcrkUWVnGWZ+KCgGys2sTzmjaymUYOTIyEpCcfBDJyQeRnf2v0n4bGy9IJOMgkYyDl9fTMDVt+Yg2Pp/tcldkkhMKjev3Axjfe04kkuLOnViEhoZCoGbqv+LiYjg4OFCAJ3qjdhd9QEAA1q9fj1dffbU1y9MkV1dXeHl5ITk5udFjzMzMVLbuBQKB2n+YCny+wCg+nBSMoT4Mw3a9A0B8vABFRZrXp7q6DGlpsUhJYUe9l5Vl19nLg7v7QPj5saPeHR39lbrete0mNzVlg7mbG3tfve5bUXFNY/j91GcsdTIxYf/V5HNE088bQnRN7QAfFRWFWbNm4b///S9+/PFH2Nvbt2a5VMrPz0d6ejpcXV3b/LWJ/shktbnes7LY5/37a5aAqKjoPjc3/d69k6ipqe3lEQqt4Os7GhJJOHx9w2Bh4aiTctdND+voWBskCCGkLagd4N99912EhYVhxowZ8Pf3x48//ogJEya06MVLS0uRkpLCPU9LS8Ply5dhZ2cHOzs7REZG4tlnn4Wrqyvu3r2LTz75BA4ODpg8eXKLXpcYvvJydhpbVhY7T71uy1md0eRs1/vFx3PTDyAn54rS/k6dfB7neg+Hp+fTMDHRzfRLSg9LCDEUGo2i9/HxwYkTJ7BhwwY8++yzeOKJJxrMhf/nn3/Uvt6lS5cwbNgw7vn8+fMBANOnT8emTZuQlJSEX375BY8ePYKrqyuGDRuGmJgYWFlZaVJs0g4out4VWeQeT37QSHV1KVJTjz1eZvUgystzuH08Hh+dOw+CRDIefn7hcHB4osUJZxQoPSwhxBBpPE3u3r172Lt3L+zs7DBx4sQWJbsJCQlBU2P8jh49qvW1ieGTydhEM4oscnXGRqqtqOget3gL2/VeO+rdzMwavr5j4Oc3Hl26hEEsdtBJuSk9LCGkPdAoOv/000/44IMPMHLkSFy9ehWOjrq5V0k6joqK2lZ6bq7mg9bk8hpkZJzHjRvbcfr0p8jJuaq039a2CzdAztNzqM663k1M2AxyikFylB6WEGLo1A7wY8aMwd9//40NGzZg2rRprVkmYmTqdr0XFWl+flVVCdLSjj1eO/0QystzuX08Hh/u7oO5XO/29t111vWuSA/r6sqOgKf0sISQ9kTtj6yamhpcuXKlVefCE+MgkymvnV5Z2fw59T16lMbler93L04pI5qZmQ2efLIn7O3fgK9vOEQiO52VXSSq7Xp3cKD0sISQ9kvtAB8bG9ua5SDtXGWl8trpNTWanS+X1+Dhw/PcVLa8vGtK+21t/R630sPh5RWEoKBYXLo0tsXrpwOAlRUb0N3cAFvbFl+OEEIMAnU6Eq09elQb1BXJZzRRVVWM1NSjj0e9H0JFRR63j8czgYfHEG7tdHv7btw+Re72lrCzY1vqnTsDlpYtvhwhhBgcCvBEbTU1bNe7IuFMebnm1ygsTOVGvd+/H6/U9W5u3gm+vmGQSMbD13eMTrve+Xy2y13RUjdv+WJvhBBi0CjAkyYput4Vq7Jp1/V+jgvqeXnXlfbb2XXlBsi5uw+GiYnu0ns2lR6WEEKMHQV40kBRUe0AucJCzddOr6wsetz1zo56r6go4PaxXe9D64x676rTslN6WEIIYVGAJ5DLa7veMzO163ovKEjhBsilp5+CXC7j9pmb26JLlzD4+YWjS5cxMDfvpLvCg9LDEkKIKhTgO6iqKuWud5ms+XPqkstlePDgLBfU8/NvKu23t+9ep+t9EPh83b/Vunen9LCEENIYCvAdSGlpbSu9oECbrvdHuHPnyOOu98OorKwdOs/nm8LD42kuqNvZ+em07Dxe7RrqLi7sCP6uXWmeOiGENIYCvBGTy4H8fDag29kBJ05onhq2oCAZyckHHne9nwbD1I6yE4ns0KXLWPj5hcPXd5TOu975fHaQnKsr+1Ckh5XL2QBPCCGkcRTgjUx1NdvlnpnJ/iuVsoHSTs0ZZ3K5DOnpfyElhQ3qBQW3lfbb2z8BiYRdZrVz56d03vUuENQGdEoPSwgh2qOPTyNQWlqb672gQPNWekVFIVJTjyA5+QBSUw+jsvIRt4/PN4WnZ/DjtdPHw9a2i24LD0oPSwghrYECfDvEMLVd71lZQEmJ5tfIz7/FzU1PTz9Tr+vdHl26jIVEEg4fn1EwN9f9KDYrq9qgrm7vAiGEEPVRgG8npFLlrvfq6ubPqUsmk+Hu3Tjcvn0YKSl/oKAgWWm/g4M/N0CO7XrX/QRyRXpYNzc2wBNCCGk9FOANmGLUe1YWkJenTdd7Ae7cOYyUlP9h3bqDKK8zwZ3PF8DLK+Tx2unjYGvrq+PSU3pYQgjRJwrwBoRh2Hvoiq734mJNz2eQn3+LGyD34MFfYJjabwVisQO6dBn3uOs9FGZm1jquAaWHJYQQQ0EBXs9kstqu96wszbvea2qkSE8//fh++gEUFt5R2u/o2BMSSRgmTLBDcfFcALpvRguFta10Sg9LCCGGgQK8HpSX145616brvbw8H6mphx+Pej+Cqqrapr6JiRBeXsPg5zcefn7j0amTN/h8Kbp3P4RLl0w0fq3GiMW1QZ3SwxJCiOGhAN9GFF3vmZnadb3n5d143PX+Bx4+PFuv690Jfn7j4Oc3/nHXe+uMYLOxqc353qlTq7wEIYQQHaEA30pkMiAnp3aQXGWlZufX1FTj/v1T3FS2R49SlfY7OfXi5qa7uQ0Aj6f7yeN108O6urKLuhBCCGkfKMDrUEVFba73nBzNu97LynLrdL0fRXV17QR3tut9ODeVzcbGU8elZ/H5gJNTbVBXpIclhBDSvlCAb6FHj2q73jXNj84wDHJzryEl5Q+kpBzAgwfnANSuAGNh4fy46z0cPj4jIRRa6rLoHIGAHfHu6sr+S+lhCSGk/aOPcg3V1LBrpytGvVdUaHa+TFaF+/fjuWVWi4ruKu13du4NP7/xkEjC4erav1W63gF2Trqile7oSOlhCSHE2FCAV0NlpXLXe01N8+fUVVaWgzt3DiE5+Q+kpR1FdXUpt8/ExAze3iMeB/XxsLb20HHpa1lZ1c5Rp/SwhBBi3CjAN+H2bTawFxRodh7b9X6VGyD38OF5KHe9u0AiYaexeXuPhFDYeqPXFOlhAWDYMGqpE0JIR0EBvgk3b6o/UI7teo/jgnpR0T2l/c7OfbgBcq6u/Vqt671uelhXV3alNrkcyMholZcjhBBioPQa4E+dOoU1a9YgISEBmZmZ2LdvHyZNmsTtZxgGS5cuxY8//ojCwkIEBQXhu+++g7+/v/4KXUdpafbjrvcDSEs7Bqm0jNtnamr+uOudzfVube3eauUwMWG73jt3pvSwhBBCWHoN8GVlZQgMDMRrr72GZ599tsH+1atXY926ddi2bRu6du2KFStWIDQ0FLdu3YKVHpYjYxgGOTlXuAFyGRl/o27Xu6WlKzdAztt7BAQCcauVRSisXZnNyYnSwxJCCFGm1wAfFhaGsLAwlfsYhsHXX3+NxYsX45lnngEAREdHw9nZGbt27cJbb72l8ryqqipUVVVxz4sfp42TSqWQSqVqlUtxHJ8vhUxWibt345CcfAjJyYdQXHxf6VhX176QSMZBIhkHF5c+4CnlbFXv9dQlFtdOZ7OzU04P29StBLlcqvRve0f1MXzGVqeaGrYe6n6GaHosIa3BYO/Bp6WlISsrC6NGjeK2mZmZITg4GGfPnm00wK9cuRJLly5tsP3YsWMQi9VrURcWFuLSpUu4eDEK//77r9IXBqFQiMDAQDz55JPo378/7Ljh6FkADqtdv5aoqmJH9GsqKytW94XRI6qP4TO2OsXGql+fusszE6IPBhvgs7KyAADOzs5K252dnXHv3j1VpwAAFi1ahPnz53PPi4uL4eHhgVGjRsHauvnlUTMyMuDt7a20zcqqMySSsZBIxsHbexgEAhEAIDWVfegSjwfY2tYut6rmd5ImyeVSZGXFwsUlFHx++79BT/UxfMZWJ5FIijt3YhEaGgqBmoNcijVddIIQHTPYAK/Aq7dMGcMwDbbVZWZmBjMV+VUFAoFaf5heXl7w9/dHVVUV3Nymws9vEpydeyu9pq5WZFNoq/SwfL7AKD5sFag+hs9Y6qQY46Lu54jiWEL0yWADvIuLCwC2Je+qmMgNICcnp0GrXtfOnj2LkydP4tKlsZDLW+ePlNLDEmL4TE3ZrI+00BJpjww2rPj4+MDFxQWxsbHo06cPAKC6uhrx8fFYtWpVq762SCRqletSelhCDA+Px/aamZs3fCha7jRejrRHeg3wpaWlSElJ4Z6npaXh8uXLsLOzg6enJ+bNm4eoqChIJBJIJBJERUVBLBZj6tSpeiy1ZqysaqezUXpYQvRHIFAdyIVC5RkphBgLvQb4S5cuYdiwYdxzxeC46dOnY9u2bfjwww9RUVGBd999l0t0c+zYMb3MgdeEYpCcmxsb4AkhbYPHqw3c9YM55YogHY1eA3xISAgYhml0P4/HQ2RkJCIjI9uuUFpQpIdVtNRbqYefEPKYQKA6kLfWAFVC2iODvQdv6BTpYRX31GnALCG6xec3bIUrnlNrnJDmUYDXAKWHJUT36rbG6wZyao0T0jIU4JshFtfeT7e3p8E4hGhDVWtcEcjpizIhrYMCfBOCg9kBc4QQ9Sha4wIBu0Rxly6ApSXb+0UIaVsU4JtgY6PvEhBiePh81aPUzc1rcztIpcCVK+wsEhqfQoh+UIAnhKgkFKoO5NQaJ6R9oABPSAemaI2rCuSUaZGQ9o0CPCEdgKI1Xj+YU2ucEONFAZ4QI1G3NV4/kFNrnJCOhwI8Ie1M/da44kGD2QghdVGAJ8QAmZg0Pm+cWuOEEHVQgCdEj8zM2DXHAcDdnZ0zTq1xQoguUIAnpJWZmKgepa5ojUulwK1b7IJFFNgJIbpCAZ4QHVEE8PqBnII2IUQfKMATogFFa1zV3HFap4AQYkgowBNSD4/X+Lxxao0TQtoLCvCkw6rfGq8bzKk1Tghp7yjAE6OmqjWueJjSu58QYsToI44YBVPTxueNU2ucENIRUYAn7QaPVztvPCMD8PConTdOrXFCCFFGH4vE4JiaNj5vnMdj543fvAnY29OgN0IIaQwFeKIXita4qulm1BonhJCWo49S0qoUrfH6gZzujRNCSOuiAE9arH5rvG4wp9Y4IYToB338ErXVb40rHkIhtcYJIcTQUIAnShprjZubs4lhCCGEtA8U4DsogUB1IKfWOCGEGAcK8EaMx6vNn56RAXh61s4bp9Y4IYQYN76+C9CUyMhI8Hg8pYeLi4u+i2VwBALAygpwdATc3QE/PyAgAOjbF+jRA/D2Zo+zswMsLCi4E0JIR2DwLXh/f3/8+eef3HOTDhqd+PzG54130B8JIYSQJhh8gDc1NdWo1V5VVYWqqirueXFxMQBAKpVCKpWqdQ3FcXK5esfrUt1743X/FQpVHy+Xs4+mKOqjbv0NHdXH8BlbnbSpj7HUnbRfPIZhGH0XojGRkZFYs2YNbGxsYGZmhqCgIERFRcHX17fJc5YuXdpg+65duyAWi1uzuIQQwikvL8fUqVNRVFQEa2trfReHdEAGHeAPHz6M8vJydO3aFdnZ2VixYgVu3ryJa9euwd7eXuU5qlrwHh4eyMvLU/uPTCqVIjY2Fi4uoeDztU92rmlrvLUo6hMaGgqBESRvp/oYPmOrkzb1KS4uhoODAwV4ojcG3UUfFhbG/b9nz54YOHAgunTpgujoaMyfP1/lOWZmZjAzM2uwXSAQaPxBw+cLmg3w9e+N133wDWwIozY/A0NG9TF8xlYnTepjTPUm7ZNBB/j6LCws0LNnTyQnJ7f5awuFqlc4a+vWOCGEEKKOdhXgq6qqcOPGDQwdOrRNXs/Lq3beuKG1xgkhhJCmGHTYWrBgAeLj45GWloYLFy7gueeeQ3FxMaZPn94mr29rC4jFFNwJIYS0Pwbdgn/w4AFeeukl5OXlwdHREU899RTOnz8PLy8vfReNEEIIMWgGHeD37Nmj7yIQQggh7RJ1PhNCCCFGiAI8IYQQYoQowBNCCCFGiAI8IYQQYoQowBNCCCFGiAI8IYQQYoQMepqcLijW0lEsG6sOqVSK8vJyFBcXG0U+aaqPYTO2+gDGVydt6qP4zDHg9byIkTP6AF9SUgIA8PDw0HNJCCEdUUlJCWxsbPRdDNIBGfRysbogl8uRkZEBKysr8Hg8tc5RLDGbnp5uFMs8Un0Mm7HVBzC+OmlTH4ZhUFJSAjc3N/Ap3zXRA6NvwfP5fLi7u2t1rrW1tVF8OClQfQybsdUHML46aVofarkTfaKvlYQQQogRogBPCCGEGCEK8CqYmZlhyZIlMDMz03dRdILqY9iMrT6A8dXJ2OpDOgajH2RHCCGEdETUgieEEEKMEAV4QgghxAhRgCeEEEKMEAV4QgghxAh12AC/ceNG+Pj4wNzcHP369cPp06ebPD4+Ph79+vWDubk5fH198f3337dRSdWjSX1+//13hIaGwtHREdbW1hg4cCCOHj3ahqVtnqa/H4W//voLpqam6N27d+sWUEOa1qeqqgqLFy+Gl5cXzMzM0KVLF2zZsqWNSts8Teuzc+dOBAYGQiwWw9XVFa+99hry8/PbqLRNO3XqFMLDw+Hm5gYej4f//ve/zZ5j6J8HhAAAmA5oz549jEAgYH766Sfm+vXrzNy5cxkLCwvm3r17Ko9PTU1lxGIxM3fuXOb69evMTz/9xAgEAua3335r45Krpml95s6dy6xatYr5+++/mdu3bzOLFi1iBAIB888//7RxyVXTtD4Kjx49Ynx9fZlRo0YxgYGBbVNYNWhTnwkTJjBBQUFMbGwsk5aWxly4cIH566+/2rDUjdO0PqdPn2b4fD7zzTffMKmpqczp06cZf39/ZtKkSW1cctUOHTrELF68mNm7dy8DgNm3b1+Txxv65wEhCh0ywA8YMIB5++23lbZ1796d+fjjj1Ue/+GHHzLdu3dX2vbWW28xTz31VKuVUROa1keVHj16MEuXLtV10bSibX2mTJnCfPrpp8ySJUsMKsBrWp/Dhw8zNjY2TH5+flsUT2Oa1mfNmjWMr6+v0rZvv/2WcXd3b7UyakudAG/onweEKHS4Lvrq6mokJCRg1KhRSttHjRqFs2fPqjzn3LlzDY4fPXo0Ll26BKlU2mplVYc29alPLpejpKQEdnZ2rVFEjWhbn61bt+LOnTtYsmRJaxdRI9rUZ//+/ejfvz9Wr16Nzp07o2vXrliwYAEqKiraoshN0qY+gwYNwoMHD3Do0CEwDIPs7Gz89ttvGDduXFsUWecM+fOAkLqMfrGZ+vLy8lBTUwNnZ2el7c7OzsjKylJ5TlZWlsrjZTIZ8vLy4Orq2mrlbY429alv7dq1KCsrwwsvvNAaRdSINvVJTk7Gxx9/jNOnT8PU1LDe0trUJzU1FWfOnIG5uTn27duHvLw8vPvuuygoKND7fXht6jNo0CDs3LkTU6ZMQWVlJWQyGSZMmID169e3RZF1zpA/Dwipq8O14BXqLx3LMEyTy8mqOl7Vdn3RtD4Ku3fvRmRkJGJiYuDk5NRaxdOYuvWpqanB1KlTsXTpUnTt2rWtiqcxTX4/crkcPB4PO3fuxIABAzB27FisW7cO27ZtM4hWPKBZfa5fv445c+bg888/R0JCAo4cOYK0tDS8/fbbbVHUVmHonweEAB2wBe/g4AATE5MGrY2cnJwG38oVXFxcVB5vamoKe3v7ViurOrSpj0JMTAxmzJiBX3/9FSNHjmzNYqpN0/qUlJTg0qVLSExMxOzZswGwAZJhGJiamuLYsWMYPnx4m5RdFW1+P66urujcubPSUqNPPPEEGIbBgwcPIJFIWrXMTdGmPitXrsTgwYOxcOFCAECvXr1gYWGBoUOHYsWKFe2uxWvInweE1NXhWvBCoRD9+vVDbGys0vbY2FgMGjRI5TkDBw5scPyxY8fQv39/CASCViurOrSpD8C23CMiIrBr1y6DuheqaX2sra2RlJSEy5cvc4+3334b3bp1w+XLlxEUFNRWRVdJm9/P4MGDkZGRgdLSUm7b7du3wefz4e7u3qrlbY429SkvLwefr/xRY2JiAqC25dueGPLnASFK9DS4T68U03w2b97MXL9+nZk3bx5jYWHB3L17l2EYhvn444+ZV199lTteMS3m/fffZ65fv85s3rzZoKbFaFqfXbt2Maampsx3333HZGZmco9Hjx7pqwpKNK1PfYY2il7T+pSUlDDu7u7Mc889x1y7do2Jj49nJBIJ88Ybb+irCko0rc/WrVsZU1NTZuPGjcydO3eYM2fOMP3792cGDBigryooKSkpYRITE5nExEQGALNu3TomMTGRm/bX3j4PCFHokAGeYRjmu+++Y7y8vBihUMj07duXiY+P5/ZNnz6dCQ4OVjo+Li6O6dOnDyMUChlvb29m06ZNbVzipmlSn+DgYAZAg8f06dPbvuCN0PT3U5ehBXiG0bw+N27cYEaOHMmIRCLG3d2dmT9/PlNeXt7GpW6cpvX59ttvmR49ejAikYhxdXVlXn75ZebBgwdtXGrVTp482eTfQ3v8PCCEYRiGloslhBBCjFCHuwdPCCGEdAQU4AkhhBAjRAGeEEIIMUIU4AkhhBAjRAGeEEIIMUIU4AkhhBAjRAGeEEIIMUIU4AkhhBAjRAGeEBXu3r0LHo+Hy5cv67sohBCiFQrwpN2KiIjApEmTGmyPi4sDj8fDo0ePtL62h4cHMjMzERAQoH0BCSFEjzrccrGENKe6uhpCoRAuLi76LgohhGiNWvDE6O3duxf+/v4wMzODt7c31q5dq7Tf29sbK1asQEREBGxsbDBz5swGXfQRERHg8XgNHnFxcQCAwsJCTJs2Dba2thCLxQgLC0NycjL3Gtu2bUOnTp1w9OhRPPHEE7C0tMSYMWOQmZnZVj8GQkgHQwGeGLWEhAS88MILePHFF5GUlITIyEh89tln2LZtm9Jxa9asQUBAABISEvDZZ581uM4333yDzMxM7jF37lw4OTmhe/fuANgvAJcuXcL+/ftx7tw5MAyDsWPHQiqVctcoLy/Hl19+ie3bt+PUqVO4f/8+FixY0Kr1J4R0YHpezY4QrU2fPp0xMTFhLCwslB7m5uYMAKawsJCZOnUqExoaqnTewoULmR49enDPvby8mEmTJikdk5aWxgBgEhMTG7zu3r17GTMzM+b06dMMwzDM7du3GQDMX3/9xR2Tl5fHiEQi5v/+7/8YhmHXRAfApKSkcMd89913jLOzc4t/DoQQogq14Em7NmzYMFy+fFnp8fPPP3P7b9y4gcGDByudM3jwYCQnJ6Ompobb1r9/f7VeLzExEdOmTcN3332HIUOGcK9hamqKoKAg7jh7e3t069YNN27c4LaJxWJ06dKFe+7q6oqcnBzNKkwIIWqiQXakXbOwsICfn5/StgcPHnD/ZxgGPB5PaT/DMCqv05ysrCxMmDABM2bMwIwZM5q8nqrXFggESvt5PF6j5xJCSEtRC54YtR49euDMmTNK286ePYuuXbvCxMRE7etUVlZi4sSJ6N69O9atW9fgNWQyGS5cuMBty8/Px+3bt/HEE0+0rAKEEKIlasETo/bBBx/gySefxPLlyzFlyhScO3cOGzZswMaNGzW6zltvvYX09HQcP34cubm53HY7OztIJBJMnDgRM2fOxA8//AArKyt8/PHH6Ny5MyZOnKjrKhFCiFqoBU+MWt++ffF///d/2LNnDwICAvD5559j2bJliIiI0Og68fHxyMzMRI8ePeDq6so9zp49CwDYunUr+vXrh/Hjx2PgwIFgGAaHDh1q0C1PCCFthcfQTUBCCCHE6FALnhBCCDFCFOAJIYQQI0QBnhBCCDFCFOAJIYQQI0QBnhBCCDFCFOAJIYQQI0QBnhBCCDFCFOAJIYQQI0QBnhBCCDFCFOAJIYQQI0QBnhBCCDFC/w9tmPyLlQOVXwAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "#| hide\n", "# Create single mixture and broadcast to N,H,K\n", @@ -3766,7 +2319,7 @@ "print('stds.shape (N,H,K) \\t', stds.shape)\n", "\n", "distr = GMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9])\n", - "distr_args = (means, stds, weights)\n", + "distr_args = (means, stds)\n", "samples, sample_mean, quants = distr.sample(distr_args)\n", "\n", "print('samples.shape (N,H,num_samples) ', samples.shape)\n", @@ -3866,55 +2419,47 @@ " # If True, predict_step will return Distribution's parameters\n", " self.return_params = return_params\n", " if self.return_params:\n", - " total_count_names = [\n", - " f\"-total_count-{i}\" for i in range(1, n_components + 1)\n", - " ]\n", + " total_count_names = [f\"-total_count-{i}\" for i in range(1, n_components + 1)]\n", " probs_names = [f\"-probs-{i}\" for i in range(1, n_components + 1)]\n", - " weight_names = [f\"-weight-{i}\" for i in range(1, n_components + 1)]\n", - " self.param_names = [i for j in zip(total_count_names, probs_names, weight_names) for i in j]\n", - " self.output_names = self.output_names + self.param_names\n", + " param_names = [i for j in zip(total_count_names, probs_names) for i in j]\n", + " self.output_names = self.output_names + param_names\n", "\n", " # Add first output entry for the sample_mean\n", - " self.output_names.insert(0, \"\")\n", + " self.output_names.insert(0, \"\") \n", "\n", - " self.outputsize_multiplier = 3 * n_components\n", + " self.outputsize_multiplier = 2 * n_components\n", " self.is_distribution_output = True\n", "\n", " def domain_map(self, output: torch.Tensor):\n", - " mu, alpha, weights = output.chunk(3, dim=-1)\n", + " mu, alpha = torch.tensor_split(output, 2, dim=-1)\n", + " return (mu, alpha)\n", "\n", - " return mu, alpha, weights\n", - "\n", - " def scale_decouple(\n", - " self,\n", - " output,\n", - " loc: Optional[torch.Tensor] = None,\n", - " scale: Optional[torch.Tensor] = None,\n", - " eps: float = 1e-6,\n", - " ):\n", - " \"\"\"Scale Decouple\n", + " def scale_decouple(self, \n", + " output,\n", + " loc: Optional[torch.Tensor] = None,\n", + " scale: Optional[torch.Tensor] = None,\n", + " eps: float=0.2):\n", + " \"\"\" Scale Decouple\n", "\n", " Stabilizes model's output optimization, by learning residual\n", " variance and residual location based on anchoring `loc`, `scale`.\n", " Also adds domain protection to the distribution parameters.\n", " \"\"\"\n", " # Efficient NBinomial parametrization\n", - " mu, alpha, weights = output\n", - " mu = F.softplus(mu) + eps\n", - " alpha = F.softplus(alpha) + eps # alpha = 1/total_counts\n", - " weights = F.softmax(weights, dim=-1)\n", + " mu, alpha = output\n", + " mu = F.softplus(mu) + 1e-8\n", + " alpha = F.softplus(alpha) + 1e-8 # alpha = 1/total_counts\n", " if (loc is not None) and (scale is not None):\n", " loc = loc.view(mu.size(dim=0), 1, -1)\n", " mu *= loc\n", - " alpha /= loc + 1.0\n", + " alpha /= (loc + 1.)\n", "\n", " # mu = total_count * (probs/(1-probs))\n", " # => probs = mu / (total_count + mu)\n", " # => probs = mu / [total_count * (1 + mu * (1/total_count))]\n", " total_count = 1.0 / alpha\n", - " probs = (mu * alpha / (1.0 + mu * alpha))\n", - " probs = torch.clamp(probs, eps, 1 - eps)\n", - " return (total_count, probs, weights)\n", + " probs = (mu * alpha / (1.0 + mu * alpha)) + 1e-8 \n", + " return (total_count, probs)\n", "\n", " def sample(self, distr_args, num_samples=None):\n", " \"\"\"\n", @@ -3936,10 +2481,16 @@ " if num_samples is None:\n", " num_samples = self.num_samples\n", " \n", - " total_count, probs, weights = distr_args\n", + " total_count, probs = distr_args\n", " B, H, K = total_count.size()\n", " Q = len(self.quantiles)\n", " assert total_count.shape == probs.shape\n", + "\n", + " # Sample K ~ Mult(weights)\n", + " # shared across B, H\n", + " # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)\n", + " \n", + " weights = (1/K) * torch.ones_like(probs, device=probs.device)\n", " \n", " # Avoid loop, vectorize\n", " weights = weights.reshape(-1, K)\n", @@ -3982,15 +2533,17 @@ "\n", " def neglog_likelihood(self,\n", " y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None):\n", "\n", " if mask is None: \n", " mask = torch.ones_like(y)\n", " \n", - " total_count, probs, weights = distr_args\n", + " total_count, probs = distr_args\n", " B, H, K = total_count.size()\n", " \n", + " weights = (1/K) * torch.ones_like(probs, device=probs.device)\n", + " \n", " y = y[:,:, None]\n", " mask = mask[:,:,None]\n", "\n", @@ -4014,7 +2567,7 @@ " return loss\n", " \n", " def __call__(self, y: torch.Tensor,\n", - " distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],\n", + " distr_args: Tuple[torch.Tensor, torch.Tensor],\n", " mask: Union[torch.Tensor, None] = None,):\n", "\n", " return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)" @@ -4055,40 +2608,7 @@ "execution_count": null, "id": "b67e2931", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "weights.shape (N,H,K) \t torch.Size([2, 2, 3])\n", - "counts.shape (N,H,K) \t torch.Size([2, 2, 3])\n", - "probs.shape (N,H,K) \t torch.Size([2, 2, 3])\n", - "samples.shape (N,H,num_samples) torch.Size([2, 2, 2000])\n", - "sample_mean.shape (N,H) torch.Size([2, 2, 1])\n", - "quants.shape (N,H,Q) \t\t torch.Size([2, 2, 5])\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "#| hide\n", "# Create single mixture and broadcast to N,H,K\n", @@ -4105,7 +2625,7 @@ "print('probs.shape (N,H,K) \\t', probs.shape)\n", "\n", "model = NBMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9])\n", - "distr_args = (counts, probs, weights)\n", + "distr_args = (counts, probs)\n", "samples, sample_mean, quants = model.sample(distr_args, num_samples=2000)\n", "\n", "print('samples.shape (N,H,num_samples) ', samples.shape)\n", diff --git a/nbs/models.deepnpts.ipynb b/nbs/models.deepnpts.ipynb index 6bafac332..c1852c18a 100644 --- a/nbs/models.deepnpts.ipynb +++ b/nbs/models.deepnpts.ipynb @@ -22,7 +22,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a non-parametric baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a tunable strategy. This strategy is learned by exploiting the information across multiple related time series. This model provides a strong, simple baseline for time series forecasting.\n", + "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a non-parametric baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a tunable strategy. This strategy is learned by exploiting the information across multiple related time series. This model provides a strong, simple baseline for time series forecasting. \n", "\n", "\n", "**References**
\n", @@ -30,13 +30,10 @@ "\n", "\n", ":::{.callout-warning collapse=\"false\"}\n", - "#### Exogenous Variables, Losses, and Parameters Availability\n", + "#### Losses\n", "\n", - "Given the sampling procedure during inference, DeepNPTS only supports `DistributionLoss` as training loss.\n", + "This implementation differs from the original work in that a weighted sum of the empirical distribution is returned as forecast, rather than a sampled distributional output. Consequently, DeepNPTS only supports point losses as training loss.\n", "\n", - "Note that DeepNPTS generates a non-parametric forecast distribution using Monte Carlo. We use this sampling procedure also during validation to make it closer to the inference procedure. Therefore, only the `MQLoss` is available for validation.\n", - "\n", - "Aditionally, Monte Carlo implies that historic exogenous variables are not available for the model.\n", ":::" ] }, @@ -47,17 +44,15 @@ "outputs": [], "source": [ "#| export\n", - "import numpy as np\n", - "\n", "import torch\n", "import torch.nn as nn\n", + "import torch.nn.functional as F\n", "import neuralforecast.losses.pytorch as losses\n", "from typing import Optional\n", - "from functools import partial\n", "\n", "\n", "from neuralforecast.common._base_windows import BaseWindows\n", - "from neuralforecast.losses.pytorch import MQLoss, GMM, PMM, NBMM\n" + "from neuralforecast.losses.pytorch import MAE\n" ] }, { @@ -102,7 +97,7 @@ "class DeepNPTS(BaseWindows):\n", " \"\"\" DeepNPTS\n", "\n", - " Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. \n", + " Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by (weighted) sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series.\n", "\n", " **Parameters:**
\n", " `h`: int, Forecast horizon.
\n", @@ -111,7 +106,6 @@ " `batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", " `dropout`: float=0.1, dropout.
\n", " `n_layers`: int=2, number of dense layers.
\n", - " `trajectory_samples`: int=100, number of Monte Carlo trajectories during inference.
\n", " `stat_exog_list`: str list, static exogenous columns.
\n", " `hist_exog_list`: str list, historic exogenous columns.
\n", " `futr_exog_list`: str list, future exogenous columns.
\n", @@ -152,15 +146,14 @@ " batch_norm: bool = True,\n", " dropout: float = 0.1,\n", " n_layers: int = 2,\n", - " trajectory_samples: int = 100,\n", " futr_exog_list = None,\n", " hist_exog_list = None,\n", " stat_exog_list = None,\n", " exclude_insample_y = False,\n", - " loss = GMM(),\n", - " valid_loss = MQLoss(level=[80, 90]),\n", + " loss = MAE(),\n", + " valid_loss = MAE(),\n", " max_steps: int = 1000,\n", - " learning_rate: float = 1e-5,\n", + " learning_rate: float = 1e-3,\n", " num_lr_decays: int = 3,\n", " early_stop_patience_steps: int =-1,\n", " val_check_steps: int = 100,\n", @@ -178,25 +171,12 @@ " optimizer_kwargs = None,\n", " **trainer_kwargs):\n", "\n", - " if hist_exog_list is not None:\n", - " raise Exception('DeepNPTS does not support historical exogenous variables.')\n", - "\n", " if exclude_insample_y:\n", " raise Exception('DeepNPTS has no possibility for excluding y.')\n", - " \n", - " supported_losses = (losses.GMM,\n", - " losses.PMM,\n", - " losses.NBMM)\n", "\n", - " if not isinstance(loss, supported_losses):\n", - " raise Exception('DeepNPTS only supports GMM, PMM or NBMM as loss function.') \n", - " \n", - " if not isinstance(valid_loss, losses.MQLoss):\n", - " raise Exception('DeepNPTS only supports MQLoss as validation loss.')\n", + " if not isinstance(loss, losses.BasePointLoss):\n", + " raise Exception('DeepNPTS only supports point loss functions (MAE, MSE, etc) as loss function.') \n", " \n", - " # Overwrite n_components, it has to be the input_size in DeepNPTS\n", - " loss.n_components = input_size\n", - " \n", " # Inherit BaseWindows class\n", " super(DeepNPTS, self).__init__(h=h,\n", " input_size=input_size,\n", @@ -226,16 +206,15 @@ " **trainer_kwargs)\n", "\n", " self.h = h\n", - " self.h_backup = self.h # Used because h=1 during training\n", - " self.use_softmax = True\n", " self.hidden_size = hidden_size\n", " self.dropout = dropout\n", - " self.trajectory_samples = trajectory_samples\n", "\n", " self.futr_exog_size = len(self.futr_exog_list)\n", " self.stat_exog_size = len(self.stat_exog_list)\n", + " self.hist_exog_size = len(self.hist_exog_list)\n", "\n", - " input_dim = input_size * (1 + self.futr_exog_size) + self.stat_exog_size\n", + " input_dim = input_size * (1 + self.futr_exog_size + self.hist_exog_size) + self.stat_exog_size + self.h * self.futr_exog_size\n", + " \n", " # Create DeepNPTSNetwork\n", " modules = [] \n", " for i in range(n_layers):\n", @@ -246,503 +225,57 @@ " if dropout > 0.0:\n", " modules.append(nn.Dropout(dropout))\n", "\n", + " modules.append(nn.Linear(hidden_size, input_size * self.h))\n", " self.deepnptsnetwork = nn.Sequential(*modules)\n", - " self.deepnptsnetwork.apply(partial(self._init_weights, scale=0.07))\n", - "\n", - " # Add output layers for Mixture distribution \n", - " output_modules = []\n", - " if dropout > 0.0:\n", - " output_modules.append(nn.Dropout(self.dropout))\n", - " \n", - " if isinstance(loss, GMM):\n", - " output_modules.append(nn.Linear(hidden_size, input_size + 1))\n", - " elif isinstance(loss, PMM):\n", - " output_modules.append(nn.Linear(hidden_size, input_size))\n", - " elif isinstance(loss, NBMM):\n", - " output_modules.append(nn.Linear(hidden_size, input_size))\n", - "\n", - " self.output_layer = nn.Sequential(*output_modules)\n", - " self.output_layer.apply(self._init_weights)\n", - "\n", - "\n", - " @staticmethod\n", - " def _init_weights(module, scale=1.0):\n", - " if type(module) == nn.Linear:\n", - " nn.init.uniform_(module.weight, -scale, scale)\n", - " nn.init.zeros_(module.bias)\n", - "\n", - " def _domain_map(self, o_t, insample_y):\n", - " if isinstance(self.loss, GMM):\n", - " weights = o_t[:, :-1] # [B, L + 1] -> [B, L]\n", - " kernel_width = o_t[:, -1:] # [B, L + 1] -> [B, 1]\n", - " kernel_width = torch.repeat_interleave(input=kernel_width,\n", - " repeats=weights.shape[1],\n", - " dim=-1) # [B, 1] -> [B, L]\n", - " output = torch.cat([insample_y, kernel_width, weights], dim=-1) # [B, L] + [B, L] + [B, L] = [B, 3 * L]\n", - " output = output.unsqueeze(1) # [B, 3 * L] = [B, 1, 3 * L]\n", - " elif isinstance(self.loss, PMM):\n", - " weights = o_t # [B, L] -> [B, L]\n", - " output = torch.cat([insample_y, weights], dim=-1) # [B, L] + [B, L] = [B, 2 * L]\n", - " output = output.unsqueeze(1) # [B, 2 * L] = [B, 1, 2 * L] \n", - " elif isinstance(self.loss, NBMM):\n", - " weights = torch.ones_like(o_t) # [B, L] -> [B, L]\n", - " output = torch.cat([insample_y, o_t, weights], dim=-1) # [B, L] + [B, L] + [B, L] = [B, 3 * L]\n", - " output = output.unsqueeze(1) # [B, 3 * L] = [B, 1, 3 * \n", - "\n", - " else:\n", - " raise NotImplementedError\n", - " \n", - " return output\n", - "\n", - " # Override BaseWindows method\n", - " def training_step(self, batch, batch_idx):\n", - " \n", - " # Only train one-step ahead\n", - " self.h = 1\n", - " self.quantiles = self.loss.quantiles\n", - "\n", - " # Create and normalize windows [Ws, L+H, C]\n", - " y_idx = batch[\"y_idx\"]\n", - " windows = self._create_windows(batch, step=\"train\")\n", - " original_outsample_y = torch.clone(windows[\"temporal\"][:, -self.h :, y_idx])\n", - " windows = self._normalization(windows=windows, y_idx=y_idx)\n", - "\n", - " # Parse windows\n", - " (\n", - " insample_y,\n", - " insample_mask,\n", - " outsample_y,\n", - " outsample_mask,\n", - " _,\n", - " futr_exog,\n", - " stat_exog,\n", - " ) = self._parse_windows(batch, windows)\n", - "\n", - " windows_batch = dict(\n", - " insample_y=insample_y, # [Ws, L]\n", - " insample_mask=insample_mask, # [Ws, L]\n", - " futr_exog=futr_exog, # [Ws, L+H]\n", - " hist_exog=None, \n", - " stat_exog=stat_exog, # [Ws, 1]\n", - " y_idx=y_idx # [Ws, 1]\n", - " ) \n", - "\n", - " # Model Predictions\n", - " output = self.train_forward(windows_batch)\n", - "\n", - " _, y_loc, y_scale = self._inv_normalization(\n", - " y_hat=outsample_y, \n", - " temporal_cols=batch[\"temporal_cols\"], \n", - " y_idx=y_idx\n", - " )\n", - " # outsample_y = original_insample_y\n", - " outsample_y = original_outsample_y\n", - " distr_args = self.loss.scale_decouple(\n", - " output=output, loc=y_loc, scale=y_scale\n", - " )\n", - " loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n", - "\n", - " if torch.isnan(loss):\n", - " print(\"Model Parameters\", self.hparams)\n", - " print(\"insample_y\", torch.isnan(insample_y).sum())\n", - " print(\"outsample_y\", torch.isnan(outsample_y).sum())\n", - " print(\"output\", torch.isnan(output).sum())\n", - " raise Exception(\"Loss is NaN, training stopped.\")\n", - "\n", - " self.log(\"train_loss\", loss, prog_bar=True, on_epoch=True)\n", - " self.train_trajectories.append((self.global_step, float(loss)))\n", - "\n", - " self.h = self.h_backup \n", - " \n", - " return loss\n", - "\n", - " # Override BaseWindows method\n", - " def validation_step(self, batch, batch_idx):\n", - "\n", - " self.h = self.h_backup\n", - " self.quantiles = self.valid_loss.quantiles\n", - "\n", - " if self.val_size == 0:\n", - " return np.nan\n", - "\n", - " # TODO: Hack to compute number of windows\n", - " windows = self._create_windows(batch, step=\"val\")\n", - " n_windows = len(windows[\"temporal\"])\n", - " y_idx = batch[\"y_idx\"]\n", - "\n", - " # Number of windows in batch\n", - " windows_batch_size = self.inference_windows_batch_size\n", - " if windows_batch_size < 0:\n", - " windows_batch_size = n_windows\n", - " n_batches = int(np.ceil(n_windows / windows_batch_size))\n", - "\n", - " valid_losses = []\n", - " batch_sizes = []\n", - " for i in range(n_batches):\n", - " # Create and normalize windows [Ws, L+H, C]\n", - " w_idxs = np.arange(\n", - " i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows)\n", - " )\n", - " windows = self._create_windows(batch, step=\"val\", w_idxs=w_idxs)\n", - " original_outsample_y = torch.clone(windows[\"temporal\"][:, -self.h:, 0])\n", - " windows = self._normalization(windows=windows, y_idx=y_idx)\n", - "\n", - " # Parse windows\n", - " (\n", - " insample_y,\n", - " insample_mask,\n", - " _,\n", - " outsample_mask,\n", - " _,\n", - " futr_exog,\n", - " stat_exog,\n", - " ) = self._parse_windows(batch, windows)\n", - " \n", - " windows_batch = dict(\n", - " insample_y=insample_y, # [Ws, L]\n", - " insample_mask=insample_mask, # [Ws, L]\n", - " futr_exog=futr_exog, # [Ws, L+H]\n", - " hist_exog=None, # [Ws, L]\n", - " stat_exog=stat_exog,\n", - " y_idx=y_idx,\n", - " ) # [Ws, 1]\n", - "\n", - " # Model Predictions\n", - " output_batch = self(windows_batch)\n", - " # Monte Carlo already returns y_hat with mean and quantiles\n", - " output_batch = output_batch[:,:, 1:] # Remove mean\n", - " valid_loss_batch = self.valid_loss(y=original_outsample_y, y_hat=output_batch, mask=outsample_mask)\n", - " valid_losses.append(valid_loss_batch)\n", - " batch_sizes.append(len(output_batch))\n", - "\n", - " valid_loss = torch.stack(valid_losses)\n", - " batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device)\n", - " valid_loss = torch.sum(valid_loss * batch_sizes) / torch.sum(batch_sizes)\n", - "\n", - " if torch.isnan(valid_loss):\n", - " raise Exception(\"Loss is NaN, training stopped.\")\n", - "\n", - " self.log(\"valid_loss\", valid_loss, prog_bar=True, on_epoch=True)\n", - " self.validation_step_outputs.append(valid_loss)\n", - " return valid_loss\n", - "\n", - " # Override BaseWindows method\n", - " def predict_step(self, batch, batch_idx):\n", - "\n", - " self.h == self.h_backup\n", - " self.quantiles = self.loss.quantiles\n", - "\n", - " # TODO: Hack to compute number of windows\n", - " windows = self._create_windows(batch, step='predict')\n", - " n_windows = len(windows['temporal'])\n", - " y_idx = batch['y_idx']\n", - "\n", - " # Number of windows in batch\n", - " windows_batch_size = self.inference_windows_batch_size\n", - " if windows_batch_size < 0:\n", - " windows_batch_size = n_windows\n", - " n_batches = int(np.ceil(n_windows/windows_batch_size))\n", - "\n", - " y_hats = []\n", - " for i in range(n_batches):\n", - " # Create and normalize windows [Ws, L+H, C]\n", - " w_idxs = np.arange(i*windows_batch_size, \n", - " min((i+1)*windows_batch_size, n_windows))\n", - " windows = self._create_windows(batch, step='predict', w_idxs=w_idxs)\n", - " windows = self._normalization(windows=windows, y_idx=y_idx)\n", - "\n", - " # Parse windows\n", - " insample_y, insample_mask, _, _, _, futr_exog, stat_exog = self._parse_windows(batch, windows)\n", - " windows_batch = dict(insample_y=insample_y, # [Ws, L]\n", - " insample_mask=insample_mask, # [Ws, L]\n", - " futr_exog=futr_exog, # [Ws, L+H]\n", - " stat_exog=stat_exog,\n", - " y_idx=y_idx)\n", - " \n", - " # Model Predictions\n", - " y_hat = self(windows_batch)\n", - " # Monte Carlo already returns y_hat with mean and quantiles\n", - " y_hats.append(y_hat)\n", - " y_hat = torch.cat(y_hats, dim=0)\n", - " return y_hat\n", - "\n", - " def train_forward(self, windows_batch):\n", - " # Parse windows_batch\n", - " x_t = windows_batch['insample_y'].unsqueeze(-1) # [B, L, 1]\n", - " futr_exog = windows_batch['futr_exog'] # [B, L + h, F]\n", - " stat_exog = windows_batch['stat_exog'] # [B, S]\n", - "\n", - " batch_size, seq_len = x_t.shape[:2] # B = batch_size, L = seq_len\n", - "\n", - " # Concatenate x_t with future exogenous\n", - " if self.futr_exog_size > 0: \n", - " futr_exog_t = futr_exog[:, :seq_len] # [B, L + h, F] -> [B, L, F]\n", - " x_t = torch.cat((x_t, futr_exog_t), dim=2) # [B, L, 1] + [B, L, F] -> [B, L, 1 + F] \n", - " \n", - " x_t = x_t.reshape(batch_size, -1) # [B, L, 1 + F] -> [B, L * (1 + F)]\n", - "\n", - " # Concatenate x_t with static exogenous\n", - " if self.stat_exog_size > 0:\n", - " x_t = torch.cat((x_t, stat_exog), dim=1) # [B, L * (1 + F)] + [B, S] -> [B, L * (1 + F) + S]\n", - "\n", - " # Run through DeepNPTSNetwork\n", - " h_t = self.deepnptsnetwork(x_t) # [B, L * (1 + F) + S] -> [B, hidden_size]\n", - " o_t = self.output_layer(h_t) # [B, hidden_size] -> [B, L + 1]\n", - "\n", - " output = self._domain_map(o_t, windows_batch['insample_y']) # [B, L + 1], [B, L] -> [B, 3 * L]\n", - " output = self.loss.domain_map(output) # [B, 3 * L] -> ([B, L], [B, L], [B, L])\n", - "\n", - " return output\n", "\n", " def forward(self, windows_batch):\n", " # Parse windows_batch\n", - " insample_y_t = windows_batch['insample_y'].unsqueeze(-1) # [B, L, 1]\n", + " x = windows_batch['insample_y'].unsqueeze(-1) # [B, L, 1]\n", + " hist_exog = windows_batch['hist_exog'] # [B, L, X]\n", " futr_exog = windows_batch['futr_exog'] # [B, L + h, F]\n", " stat_exog = windows_batch['stat_exog'] # [B, S]\n", - " y_idx = windows_batch['y_idx']\n", - "\n", - " batch_size, seq_len = insample_y_t.shape[:2] # B = batch_size, L = seq_len\n", - " device = insample_y_t.device\n", - " dtype = insample_y_t.dtype\n", - "\n", - " # Repeat insample_y for trajectory samples\n", - " insample_y_t = torch.repeat_interleave(input=insample_y_t, \n", - " repeats=self.trajectory_samples, \n", - " dim=0) # [B, L, 1] -> [B * n_samples, L, 1]\n", - " \n", - " # Input x_t is insample_y at time t\n", - " x_t = insample_y_t\n", "\n", - " # Repeat futr_exog if available for trajectory samples and add to x_t \n", + " batch_size, seq_len = x.shape[:2] # B = batch_size, L = seq_len\n", + " insample_y = windows_batch['insample_y'].unsqueeze(-1) \n", + " \n", + " # Concatenate x_t with future exogenous of input\n", " if self.futr_exog_size > 0: \n", - " futr_exog = torch.repeat_interleave(input=futr_exog, \n", - " repeats=self.trajectory_samples, \n", - " dim=0) # [B, L + h, F] -> [B * n_samples, L + h, F] \n", - " x_t = torch.cat((x_t, futr_exog[:, :seq_len]), dim=2) # [B * n_samples, L, 1] + [B * n_samples, L, F] -> [B * n_samples, L, 1 + F] \n", + " x = torch.cat((x, futr_exog[:, :seq_len]), dim=2) # [B, L, 1] + [B, L, F] -> [B, L, 1 + F] \n", " \n", - " x_t = x_t.reshape(batch_size * self.trajectory_samples, -1) # [B * n_samples, L, 1 + F] -> [B * n_samples, L * (1 + F)]\n", + " # Concatenate x_t with historic exogenous\n", + " if self.hist_exog_size > 0: \n", + " x = torch.cat((x, hist_exog), dim=2) # [B, L, 1 + F] + [B, L, X] -> [B, L, 1 + F + X] \n", "\n", - " # Repeat stat_exog if available for trajectory samples and add to x_t\n", - " if self.stat_exog_size > 0:\n", - " stat_exog = torch.repeat_interleave(\n", - " input=stat_exog, \n", - " repeats=self.trajectory_samples, \n", - " dim=0) # [B, S] -> [B * n_samples, S] \n", - " x_t = torch.cat((x_t, stat_exog), dim=1) # [B * n_samples, L * (1 + F)] + [B * n_samples, S] -> [B * n_samples, L * (1 + F) + S]\n", + " x = x.reshape(batch_size, -1) # [B, L, 1 + F + X] -> [B, L * (1 + F + X)]\n", "\n", - " # Scales for inverse normalization\n", - " y_scale = self.scaler.x_scale[:, :, y_idx]\n", - " y_loc = self.scaler.x_shift[:, :, y_idx]\n", - " y_scale = torch.repeat_interleave(input=y_scale, \n", - " repeats=self.trajectory_samples, \n", - " dim=0)\n", - " y_loc = torch.repeat_interleave(input=y_loc, \n", - " repeats=self.trajectory_samples, \n", - " dim=0)\n", - " # Create forecasts tensor\n", - " forecasts = torch.zeros((batch_size, \n", - " self.h,\n", - " len(self.quantiles) + 1), \n", - " device=device, \n", - " dtype=dtype)\n", - " \n", - " # Recursive predictions\n", - " for t in range(self.h):\n", - " # Run input throught DeepNPTSNetwork\n", - " h_t = self.deepnptsnetwork(x_t) # [B * n_samples, L * (1 + F) + S] -> [B, hidden_size]\n", - " o_t = self.output_layer(h_t) # [B * n_samples, hidden_size] -> [B * n_samples, L (+ 1)]\n", - " output = self._domain_map(o_t, insample_y_t.squeeze(-1)) # [B * n_samples, L + 1], [B * n_samples, L] -> [B * n_samples, 3 * L]\n", - " output = self.loss.domain_map(output) # [B * n_samples, 3 * L] -> ([B * n_samples, L], [B * n_samples, L], [B * n_samples, L])\n", - "\n", - " # Inverse normalization\n", - " distr_args = self.loss.scale_decouple(output=output, \n", - " loc=y_loc, \n", - " scale=y_scale)\n", + " # Concatenate x with static exogenous\n", + " if self.stat_exog_size > 0:\n", + " x = torch.cat((x, stat_exog), dim=1) # [B, L * (1 + F + X)] + [B, S] -> [B, L * (1 + F + X) + S]\n", "\n", - " # Sample and create probabilistic outputs\n", - " samples_t_flat, _, _ = self.loss.sample(distr_args=distr_args, \n", - " num_samples=1)\n", + " # Concatenate x_t with future exogenous of horizon\n", + " if self.futr_exog_size > 0:\n", + " futr_exog = futr_exog[:, seq_len:] # [B, L + h, F] -> [B, h, F]\n", + " futr_exog = futr_exog.reshape(batch_size, -1) # [B, L + h, F] -> [B, h * F]\n", + " x = torch.cat((x, futr_exog), dim=1) # [B, L * (1 + F + X) + S] + [B, h * F] -> [B, L * (1 + F + X) + S + h * F] \n", "\n", - " samples_t_flat = samples_t_flat.squeeze()\n", - " samples_t = samples_t_flat.reshape(batch_size, \n", - " self.trajectory_samples) # [B * n_samples] -> [B, n_samples] \n", - " \n", - " samples_t_mean = torch.mean(samples_t, dim=-1) # [B, n_samples] -> [B] \n", - " quantiles_t = torch.quantile(input=samples_t, \n", - " q=self.quantiles, \n", - " dim=-1) # [B, n_samples] -> [Q, B]\n", - " forecasts[:, t, 0] = samples_t_mean\n", - " forecasts[:, t, 1:] = quantiles_t.permute(1, 0)\n", + " # Run through DeepNPTSNetwork\n", + " weights = self.deepnptsnetwork(x) # [B, L * (1 + F + X) + S + h * F] -> [B, L * h]\n", "\n", - " insample_y_t_next = self.scaler.scaler(samples_t_flat, \n", - " y_loc.squeeze(), \n", - " y_scale.squeeze()) # [B * n_samples] -> [B * n_samples]\n", - " insample_y_t_next = insample_y_t_next.unsqueeze(-1)\\\n", - " .unsqueeze(-1) # [B * n_samples] -> [B * n_samples, 1, 1]\n", + " # Apply softmax for weighted input predictions\n", + " weights = weights.reshape(batch_size, seq_len, -1) # [B, L * h] -> [B, L, h]\n", + " x = F.softmax(weights, dim=1) * insample_y # [B, L, h] * [B, L, 1] = [B, L, h]\n", + " output = torch.sum(x, dim=1).unsqueeze(-1) # [B, L, h] -> [B, h, 1]\n", "\n", - " # Update insample_y_t \n", - " insample_y_t = torch.cat([insample_y_t[:, 1:], \n", - " insample_y_t_next], \n", - " dim=1) # [B * n_samples, L - 1, 1] + [B * n_samples, 1, 1] -> [B * n_samples, L, 1]\n", - " \n", - " # Update input\n", - " x_t = insample_y_t\n", - " # Concatenate x_t with future exogenous\n", - " if self.futr_exog_size > 0: \n", - " x_t = torch.cat((x_t, \n", - " futr_exog[:, t:seq_len + t]), \n", - " dim=2) # [B * n_samples, L, 1] + [B * n_samples, L, F] -> [B * n_samples, L, 1 + F] \n", - " \n", - " x_t = x_t.reshape(batch_size * self.trajectory_samples\n", - " , -1) # [B * n_samples, L, 1 + F] -> [B * n_samples, L * (1 + F)]\n", + " forecast = self.loss.domain_map(output) # [B, h, 1] -> [B, h, 1]\n", "\n", - " # Concatenate x_t with static exogenous\n", - " if self.stat_exog_size > 0:\n", - " x_t = torch.cat((x_t, stat_exog), dim=1) # [B * n_samples, L * (1 + F)] + [B * n_samples, S] -> [B * n_samples, L * (1 + F) + S]\n", - " \n", - " return forecasts\n", - "\n" + " return forecast" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/deepnpts.py#L20){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DeepNPTS\n", - "\n", - "> DeepNPTS (h, input_size:int=-1, hidden_size:int=32, batch_norm:bool=True,\n", - "> dropout:float=0.5, n_layers:int=2, trajectory_samples:int=100,\n", - "> futr_exog_list=None, hist_exog_list=None, stat_exog_list=None,\n", - "> exclude_insample_y=False, loss=GMM(), valid_loss=MQLoss(),\n", - "> max_steps:int=1000, learning_rate:float=0.001,\n", - "> num_lr_decays:int=3, early_stop_patience_steps:int=-1,\n", - "> val_check_steps:int=100, batch_size:int=32,\n", - "> valid_batch_size:Optional[int]=None,\n", - "> windows_batch_size:int=1024,\n", - "> inference_windows_batch_size:int=-1,\n", - "> start_padding_enabled=False, step_size:int=1,\n", - "> scaler_type:str='standard', random_seed:int=1,\n", - "> num_workers_loader=0, drop_last_loader=False, optimizer=None,\n", - "> optimizer_kwargs=None, **trainer_kwargs)\n", - "\n", - "DeepNPTS\n", - "\n", - "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. \n", - "\n", - "**Parameters:**
\n", - "`h`: int, Forecast horizon.
\n", - "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", - "`hidden_size`: int=32, hidden size of dense layers.
\n", - "`batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", - "`dropout`: float=0.1, dropout.
\n", - "`n_layers`: int=2, number of dense layers.
\n", - "`trajectory_samples`: int=100, number of Monte Carlo trajectories during inference.
\n", - "`stat_exog_list`: str list, static exogenous columns.
\n", - "`hist_exog_list`: str list, historic exogenous columns.
\n", - "`futr_exog_list`: str list, future exogenous columns.
\n", - "`exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", - "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", - "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", - "`max_steps`: int=1000, maximum number of training steps.
\n", - "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", - "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", - "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", - "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", - "`batch_size`: int=32, number of different series in each batch.
\n", - "`valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", - "`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", - "`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", - "`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", - "`step_size`: int=1, step size between each window of temporal data.
\n", - "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", - "`random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", - "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", - "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", - "`alias`: str, optional, Custom name of the model.
\n", - "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", - "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", - "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", - "\n", - "**References**
\n", - "- [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/deepnpts.py#L20){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DeepNPTS\n", - "\n", - "> DeepNPTS (h, input_size:int=-1, hidden_size:int=32, batch_norm:bool=True,\n", - "> dropout:float=0.5, n_layers:int=2, trajectory_samples:int=100,\n", - "> futr_exog_list=None, hist_exog_list=None, stat_exog_list=None,\n", - "> exclude_insample_y=False, loss=GMM(), valid_loss=MQLoss(),\n", - "> max_steps:int=1000, learning_rate:float=0.001,\n", - "> num_lr_decays:int=3, early_stop_patience_steps:int=-1,\n", - "> val_check_steps:int=100, batch_size:int=32,\n", - "> valid_batch_size:Optional[int]=None,\n", - "> windows_batch_size:int=1024,\n", - "> inference_windows_batch_size:int=-1,\n", - "> start_padding_enabled=False, step_size:int=1,\n", - "> scaler_type:str='standard', random_seed:int=1,\n", - "> num_workers_loader=0, drop_last_loader=False, optimizer=None,\n", - "> optimizer_kwargs=None, **trainer_kwargs)\n", - "\n", - "DeepNPTS\n", - "\n", - "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. \n", - "\n", - "**Parameters:**
\n", - "`h`: int, Forecast horizon.
\n", - "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", - "`hidden_size`: int=32, hidden size of dense layers.
\n", - "`batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", - "`dropout`: float=0.1, dropout.
\n", - "`n_layers`: int=2, number of dense layers.
\n", - "`trajectory_samples`: int=100, number of Monte Carlo trajectories during inference.
\n", - "`stat_exog_list`: str list, static exogenous columns.
\n", - "`hist_exog_list`: str list, historic exogenous columns.
\n", - "`futr_exog_list`: str list, future exogenous columns.
\n", - "`exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", - "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", - "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", - "`max_steps`: int=1000, maximum number of training steps.
\n", - "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", - "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", - "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", - "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", - "`batch_size`: int=32, number of different series in each batch.
\n", - "`valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", - "`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", - "`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", - "`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", - "`step_size`: int=1, step size between each window of temporal data.
\n", - "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", - "`random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", - "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", - "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", - "`alias`: str, optional, Custom name of the model.
\n", - "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", - "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", - "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", - "\n", - "**References**
\n", - "- [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(DeepNPTS, title_level=3)" ] @@ -751,73 +284,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "### DeepNPTS.fit\n", - "\n", - "> DeepNPTS.fit (dataset, val_size=0, test_size=0, random_seed=None,\n", - "> distributed_config=None)\n", - "\n", - "Fit.\n", - "\n", - "The `fit` method, optimizes the neural network's weights using the\n", - "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n", - "and the `loss` function as defined during the initialization.\n", - "Within `fit` we use a PyTorch Lightning `Trainer` that\n", - "inherits the initialization's `self.trainer_kwargs`, to customize\n", - "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n", - "\n", - "The method is designed to be compatible with SKLearn-like classes\n", - "and in particular to be compatible with the StatsForecast library.\n", - "\n", - "By default the `model` is not saving training checkpoints to protect\n", - "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n", - "\n", - "**Parameters:**
\n", - "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", - "`val_size`: int, validation size for temporal cross-validation.
\n", - "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", - "`test_size`: int, test size for temporal cross-validation.
" - ], - "text/plain": [ - "---\n", - "\n", - "### DeepNPTS.fit\n", - "\n", - "> DeepNPTS.fit (dataset, val_size=0, test_size=0, random_seed=None,\n", - "> distributed_config=None)\n", - "\n", - "Fit.\n", - "\n", - "The `fit` method, optimizes the neural network's weights using the\n", - "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n", - "and the `loss` function as defined during the initialization.\n", - "Within `fit` we use a PyTorch Lightning `Trainer` that\n", - "inherits the initialization's `self.trainer_kwargs`, to customize\n", - "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n", - "\n", - "The method is designed to be compatible with SKLearn-like classes\n", - "and in particular to be compatible with the StatsForecast library.\n", - "\n", - "By default the `model` is not saving training checkpoints to protect\n", - "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n", - "\n", - "**Parameters:**
\n", - "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", - "`val_size`: int, validation size for temporal cross-validation.
\n", - "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", - "`test_size`: int, test size for temporal cross-validation.
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(DeepNPTS.fit, name='DeepNPTS.fit', title_level=3)" ] @@ -826,53 +293,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "### DeepNPTS.predict\n", - "\n", - "> DeepNPTS.predict (dataset, test_size=None, step_size=1, random_seed=None,\n", - "> **data_module_kwargs)\n", - "\n", - "Predict.\n", - "\n", - "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n", - "\n", - "**Parameters:**
\n", - "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", - "`test_size`: int=None, test size for temporal cross-validation.
\n", - "`step_size`: int=1, Step size between each window.
\n", - "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", - "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)." - ], - "text/plain": [ - "---\n", - "\n", - "### DeepNPTS.predict\n", - "\n", - "> DeepNPTS.predict (dataset, test_size=None, step_size=1, random_seed=None,\n", - "> **data_module_kwargs)\n", - "\n", - "Predict.\n", - "\n", - "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n", - "\n", - "**Parameters:**
\n", - "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", - "`test_size`: int=None, test size for temporal cross-validation.
\n", - "`step_size`: int=1, Step size between each window.
\n", - "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", - "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(DeepNPTS.predict, name='DeepNPTS.predict', title_level=3)" ] @@ -891,194 +312,26 @@ "metadata": {}, "outputs": [], "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", "from neuralforecast import NeuralForecast\n", - "from neuralforecast.losses.pytorch import MQLoss, DistributionLoss, GMM\n", - "from neuralforecast.tsdataset import TimeSeriesDataset\n", - "from neuralforecast.utils import AirPassengers, AirPassengersPanel, AirPassengersStatic" + "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Seed set to 1\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b74158f17d254e4884139ee5c48e5706", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Sanity Checking: | | 0/? [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "#| eval: false\n", - "import pandas as pd\n", - "import pytorch_lightning as pl\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from neuralforecast import NeuralForecast\n", - "#from neuralforecast.models import DeepAR\n", - "from neuralforecast.losses.pytorch import DistributionLoss, HuberMQLoss\n", - "from neuralforecast.utils import AirPassengers, AirPassengersPanel, AirPassengersStatic\n", - "\n", - "#AirPassengersPanel['y'] = AirPassengersPanel['y'] + 10\n", "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n", "\n", "nf = NeuralForecast(\n", " models=[DeepNPTS(h=12,\n", - " input_size=12,\n", - " trajectory_samples=100,\n", - " loss=GMM(),\n", - " # learning_rate=1e-5,\n", - " n_layers = 2,\n", - " dropout=0.0,\n", + " input_size=24,\n", " stat_exog_list=['airline1'],\n", " futr_exog_list=['trend'],\n", " max_steps=1000,\n", @@ -1100,29 +353,9 @@ "plot_df = plot_df[plot_df.unique_id=='Airline1'].drop('unique_id', axis=1)\n", "plt.plot(plot_df['ds'], plot_df['y'], c='black', label='True')\n", "plt.plot(plot_df['ds'], plot_df['DeepNPTS'], c='red', label='mean')\n", - "plt.plot(plot_df['ds'], plot_df['DeepNPTS-median'], c='blue', label='median')\n", - "plt.fill_between(x=plot_df['ds'][-12:], \n", - " y1=plot_df['DeepNPTS-lo-90'][-12:].values, \n", - " y2=plot_df['DeepNPTS-hi-90'][-12:].values,\n", - " alpha=0.4, label='level 90')\n", - "plt.legend()\n", "plt.grid()\n", "plt.plot()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/neuralforecast/_modidx.py b/neuralforecast/_modidx.py index 275d7598b..3406a3604 100644 --- a/neuralforecast/_modidx.py +++ b/neuralforecast/_modidx.py @@ -516,20 +516,8 @@ 'neuralforecast/models/deepnpts.py'), 'neuralforecast.models.deepnpts.DeepNPTS.__init__': ( 'models.deepnpts.html#deepnpts.__init__', 'neuralforecast/models/deepnpts.py'), - 'neuralforecast.models.deepnpts.DeepNPTS._domain_map': ( 'models.deepnpts.html#deepnpts._domain_map', - 'neuralforecast/models/deepnpts.py'), - 'neuralforecast.models.deepnpts.DeepNPTS._init_weights': ( 'models.deepnpts.html#deepnpts._init_weights', - 'neuralforecast/models/deepnpts.py'), 'neuralforecast.models.deepnpts.DeepNPTS.forward': ( 'models.deepnpts.html#deepnpts.forward', - 'neuralforecast/models/deepnpts.py'), - 'neuralforecast.models.deepnpts.DeepNPTS.predict_step': ( 'models.deepnpts.html#deepnpts.predict_step', - 'neuralforecast/models/deepnpts.py'), - 'neuralforecast.models.deepnpts.DeepNPTS.train_forward': ( 'models.deepnpts.html#deepnpts.train_forward', - 'neuralforecast/models/deepnpts.py'), - 'neuralforecast.models.deepnpts.DeepNPTS.training_step': ( 'models.deepnpts.html#deepnpts.training_step', - 'neuralforecast/models/deepnpts.py'), - 'neuralforecast.models.deepnpts.DeepNPTS.validation_step': ( 'models.deepnpts.html#deepnpts.validation_step', - 'neuralforecast/models/deepnpts.py')}, + 'neuralforecast/models/deepnpts.py')}, 'neuralforecast.models.dilated_rnn': { 'neuralforecast.models.dilated_rnn.AttentiveLSTMLayer': ( 'models.dilated_rnn.html#attentivelstmlayer', 'neuralforecast/models/dilated_rnn.py'), 'neuralforecast.models.dilated_rnn.AttentiveLSTMLayer.__init__': ( 'models.dilated_rnn.html#attentivelstmlayer.__init__', diff --git a/neuralforecast/losses/pytorch.py b/neuralforecast/losses/pytorch.py index 2e5ede2f5..fdcd5a623 100644 --- a/neuralforecast/losses/pytorch.py +++ b/neuralforecast/losses/pytorch.py @@ -13,7 +13,13 @@ import torch.nn.functional as F from torch.distributions import Distribution -from torch.distributions import Bernoulli, Normal, StudentT, Poisson, NegativeBinomial +from torch.distributions import ( + Bernoulli, + Normal, + StudentT, + Poisson, + NegativeBinomial, +) from torch.distributions import constraints @@ -1166,20 +1172,17 @@ def __init__( # If True, predict_step will return Distribution's parameters self.return_params = return_params if self.return_params: - lambda_names = [f"-lambda-{i}" for i in range(1, n_components + 1)] - weight_names = [f"-weight-{i}" for i in range(1, n_components + 1)] - self.param_names = [i for j in zip(lambda_names, weight_names) for i in j] + self.param_names = [f"-lambda-{i}" for i in range(1, n_components + 1)] self.output_names = self.output_names + self.param_names # Add first output entry for the sample_mean self.output_names.insert(0, "") - self.outputsize_multiplier = 2 * n_components + self.outputsize_multiplier = n_components self.is_distribution_output = True def domain_map(self, output: torch.Tensor): - lambdas, weights = output.chunk(2, dim=-1) - return (lambdas, weights) + return (output,) # , weights def scale_decouple( self, @@ -1193,15 +1196,13 @@ def scale_decouple( variance and residual location based on anchoring `loc`, `scale`. Also adds domain protection to the distribution parameters. """ - lambdas, weights = output - weights = F.softmax(weights, dim=-1) - + lambdas = output[0] if (loc is not None) and (scale is not None): loc = loc.view(lambdas.size(dim=0), 1, -1) scale = scale.view(lambdas.size(dim=0), 1, -1) lambdas = (lambdas * scale) + loc lambdas = F.softplus(lambdas) - return (lambdas, weights) + return (lambdas,) def sample(self, distr_args, num_samples=None): """ @@ -1223,10 +1224,15 @@ def sample(self, distr_args, num_samples=None): if num_samples is None: num_samples = self.num_samples - lambdas, weights = distr_args + lambdas = distr_args[0] B, H, K = lambdas.size() Q = len(self.quantiles) + # Sample K ~ Mult(weights) + # shared across B, H + # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2) + weights = (1 / K) * torch.ones_like(lambdas, device=lambdas.device) + # Avoid loop, vectorize weights = weights.reshape(-1, K) lambdas = lambdas.flatten() @@ -1267,7 +1273,7 @@ def sample(self, distr_args, num_samples=None): def neglog_likelihood( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor], mask: Union[torch.Tensor, None] = None, ): if mask is None: @@ -1276,9 +1282,11 @@ def neglog_likelihood( mask = mask * ((y > 0) * 1) eps = 1e-10 - lambdas, weights = distr_args + lambdas = distr_args[0] B, H, K = lambdas.size() + weights = (1 / K) * torch.ones_like(lambdas, device=lambdas.device) + y = y[:, :, None] mask = mask[:, :, None] @@ -1305,7 +1313,7 @@ def neglog_likelihood( def __call__( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor], mask: Union[torch.Tensor, None] = None, ): @@ -1367,22 +1375,18 @@ def __init__( if self.return_params: mu_names = [f"-mu-{i}" for i in range(1, n_components + 1)] std_names = [f"-std-{i}" for i in range(1, n_components + 1)] - weight_names = [f"-weight-{i}" for i in range(1, n_components + 1)] - self.param_names = [ - i for j in zip(mu_names, std_names, weight_names) for i in j - ] - self.output_names = self.output_names + self.param_names + mu_std_names = [i for j in zip(mu_names, std_names) for i in j] + self.output_names = self.output_names + mu_std_names # Add first output entry for the sample_mean self.output_names.insert(0, "") - self.outputsize_multiplier = 3 * n_components + self.outputsize_multiplier = 2 * n_components self.is_distribution_output = True def domain_map(self, output: torch.Tensor): - means, stds, weights = output.chunk(3, dim=-1) - - return (means, stds, weights) + means, stds = torch.tensor_split(output, 2, dim=-1) + return (means, stds) def scale_decouple( self, @@ -1397,16 +1401,14 @@ def scale_decouple( variance and residual location based on anchoring `loc`, `scale`. Also adds domain protection to the distribution parameters. """ - means, stds, weights = output + means, stds = output stds = F.softplus(stds) - weights = F.softmax(weights, dim=-1) if (loc is not None) and (scale is not None): loc = loc.view(means.size(dim=0), 1, -1) scale = scale.view(means.size(dim=0), 1, -1) means = (means * scale) + loc stds = (stds + eps) * scale - - return (means, stds, weights) + return (means, stds) def sample(self, distr_args, num_samples=None): """ @@ -1428,11 +1430,17 @@ def sample(self, distr_args, num_samples=None): if num_samples is None: num_samples = self.num_samples - means, stds, weights = distr_args + means, stds = distr_args B, H, K = means.size() Q = len(self.quantiles) assert means.shape == stds.shape + # Sample K ~ Mult(weights) + # shared across B, H + # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2) + + weights = (1 / K) * torch.ones_like(means, device=means.device) + # Avoid loop, vectorize weights = weights.reshape(-1, K) means = means.flatten() @@ -1473,16 +1481,18 @@ def sample(self, distr_args, num_samples=None): def neglog_likelihood( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): if mask is None: mask = torch.ones_like(y) - means, stds, weights = distr_args + means, stds = distr_args B, H, K = means.size() + weights = (1 / K) * torch.ones_like(means, device=means.device) + y = y[:, :, None] mask = mask[:, :, None] @@ -1510,7 +1520,7 @@ def neglog_likelihood( def __call__( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): @@ -1568,29 +1578,25 @@ def __init__( f"-total_count-{i}" for i in range(1, n_components + 1) ] probs_names = [f"-probs-{i}" for i in range(1, n_components + 1)] - weight_names = [f"-weight-{i}" for i in range(1, n_components + 1)] - self.param_names = [ - i for j in zip(total_count_names, probs_names, weight_names) for i in j - ] - self.output_names = self.output_names + self.param_names + param_names = [i for j in zip(total_count_names, probs_names) for i in j] + self.output_names = self.output_names + param_names # Add first output entry for the sample_mean self.output_names.insert(0, "") - self.outputsize_multiplier = 3 * n_components + self.outputsize_multiplier = 2 * n_components self.is_distribution_output = True def domain_map(self, output: torch.Tensor): - mu, alpha, weights = output.chunk(3, dim=-1) - - return mu, alpha, weights + mu, alpha = torch.tensor_split(output, 2, dim=-1) + return (mu, alpha) def scale_decouple( self, output, loc: Optional[torch.Tensor] = None, scale: Optional[torch.Tensor] = None, - eps: float = 1e-6, + eps: float = 0.2, ): """Scale Decouple @@ -1599,10 +1605,9 @@ def scale_decouple( Also adds domain protection to the distribution parameters. """ # Efficient NBinomial parametrization - mu, alpha, weights = output - mu = F.softplus(mu) + eps - alpha = F.softplus(alpha) + eps # alpha = 1/total_counts - weights = F.softmax(weights, dim=-1) + mu, alpha = output + mu = F.softplus(mu) + 1e-8 + alpha = F.softplus(alpha) + 1e-8 # alpha = 1/total_counts if (loc is not None) and (scale is not None): loc = loc.view(mu.size(dim=0), 1, -1) mu *= loc @@ -1612,9 +1617,8 @@ def scale_decouple( # => probs = mu / (total_count + mu) # => probs = mu / [total_count * (1 + mu * (1/total_count))] total_count = 1.0 / alpha - probs = mu * alpha / (1.0 + mu * alpha) - probs = torch.clamp(probs, eps, 1 - eps) - return (total_count, probs, weights) + probs = (mu * alpha / (1.0 + mu * alpha)) + 1e-8 + return (total_count, probs) def sample(self, distr_args, num_samples=None): """ @@ -1636,11 +1640,17 @@ def sample(self, distr_args, num_samples=None): if num_samples is None: num_samples = self.num_samples - total_count, probs, weights = distr_args + total_count, probs = distr_args B, H, K = total_count.size() Q = len(self.quantiles) assert total_count.shape == probs.shape + # Sample K ~ Mult(weights) + # shared across B, H + # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2) + + weights = (1 / K) * torch.ones_like(probs, device=probs.device) + # Avoid loop, vectorize weights = weights.reshape(-1, K) total_count = total_count.flatten() @@ -1682,16 +1692,18 @@ def sample(self, distr_args, num_samples=None): def neglog_likelihood( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): if mask is None: mask = torch.ones_like(y) - total_count, probs, weights = distr_args + total_count, probs = distr_args B, H, K = total_count.size() + weights = (1 / K) * torch.ones_like(probs, device=probs.device) + y = y[:, :, None] mask = mask[:, :, None] @@ -1722,7 +1734,7 @@ def neglog_likelihood( def __call__( self, y: torch.Tensor, - distr_args: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], + distr_args: Tuple[torch.Tensor, torch.Tensor], mask: Union[torch.Tensor, None] = None, ): diff --git a/neuralforecast/models/__init__.py b/neuralforecast/models/__init__.py index ee07166ab..e519db838 100644 --- a/neuralforecast/models/__init__.py +++ b/neuralforecast/models/__init__.py @@ -33,4 +33,4 @@ from .itransformer import iTransformer from .bitcn import BiTCN from .tide import TiDE -from .deepnpts import DeepNPTS \ No newline at end of file +from .deepnpts import DeepNPTS diff --git a/neuralforecast/models/deepnpts.py b/neuralforecast/models/deepnpts.py index d4da85974..678f89c11 100644 --- a/neuralforecast/models/deepnpts.py +++ b/neuralforecast/models/deepnpts.py @@ -4,23 +4,21 @@ __all__ = ['DeepNPTS'] # %% ../../nbs/models.deepnpts.ipynb 3 -import numpy as np - import torch import torch.nn as nn +import torch.nn.functional as F import neuralforecast.losses.pytorch as losses from typing import Optional -from functools import partial from ..common._base_windows import BaseWindows -from ..losses.pytorch import MQLoss, GMM, PMM, NBMM +from ..losses.pytorch import MAE # %% ../../nbs/models.deepnpts.ipynb 7 class DeepNPTS(BaseWindows): """DeepNPTS - Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. + Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by (weighted) sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series. **Parameters:**
`h`: int, Forecast horizon.
@@ -29,7 +27,6 @@ class DeepNPTS(BaseWindows): `batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
`dropout`: float=0.1, dropout.
`n_layers`: int=2, number of dense layers.
- `trajectory_samples`: int=100, number of Monte Carlo trajectories during inference.
`stat_exog_list`: str list, static exogenous columns.
`hist_exog_list`: str list, historic exogenous columns.
`futr_exog_list`: str list, future exogenous columns.
@@ -72,15 +69,14 @@ def __init__( batch_norm: bool = True, dropout: float = 0.1, n_layers: int = 2, - trajectory_samples: int = 100, futr_exog_list=None, hist_exog_list=None, stat_exog_list=None, exclude_insample_y=False, - loss=GMM(), - valid_loss=MQLoss(level=[80, 90]), + loss=MAE(), + valid_loss=MAE(), max_steps: int = 1000, - learning_rate: float = 1e-5, + learning_rate: float = 1e-3, num_lr_decays: int = 3, early_stop_patience_steps: int = -1, val_check_steps: int = 100, @@ -99,22 +95,13 @@ def __init__( **trainer_kwargs ): - if hist_exog_list is not None: - raise Exception("DeepNPTS does not support historical exogenous variables.") - if exclude_insample_y: raise Exception("DeepNPTS has no possibility for excluding y.") - supported_losses = (losses.GMM, losses.PMM, losses.NBMM) - - if not isinstance(loss, supported_losses): - raise Exception("DeepNPTS only supports GMM, PMM or NBMM as loss function.") - - if not isinstance(valid_loss, losses.MQLoss): - raise Exception("DeepNPTS only supports MQLoss as validation loss.") - - # Overwrite n_components, it has to be the input_size in DeepNPTS - loss.n_components = input_size + if not isinstance(loss, losses.BasePointLoss): + raise Exception( + "DeepNPTS only supports point loss functions (MAE, MSE, etc) as loss function." + ) # Inherit BaseWindows class super(DeepNPTS, self).__init__( @@ -147,16 +134,19 @@ def __init__( ) self.h = h - self.h_backup = self.h # Used because h=1 during training - self.use_softmax = True self.hidden_size = hidden_size self.dropout = dropout - self.trajectory_samples = trajectory_samples self.futr_exog_size = len(self.futr_exog_list) self.stat_exog_size = len(self.stat_exog_list) + self.hist_exog_size = len(self.hist_exog_list) + + input_dim = ( + input_size * (1 + self.futr_exog_size + self.hist_exog_size) + + self.stat_exog_size + + self.h * self.futr_exog_size + ) - input_dim = input_size * (1 + self.futr_exog_size) + self.stat_exog_size # Create DeepNPTSNetwork modules = [] for i in range(n_layers): @@ -167,391 +157,61 @@ def __init__( if dropout > 0.0: modules.append(nn.Dropout(dropout)) + modules.append(nn.Linear(hidden_size, input_size * self.h)) self.deepnptsnetwork = nn.Sequential(*modules) - self.deepnptsnetwork.apply(partial(self._init_weights, scale=0.07)) - - # Add output layers for Mixture distribution - output_modules = [] - if dropout > 0.0: - output_modules.append(nn.Dropout(self.dropout)) - - if isinstance(loss, GMM): - output_modules.append(nn.Linear(hidden_size, input_size + 1)) - elif isinstance(loss, PMM): - output_modules.append(nn.Linear(hidden_size, input_size)) - elif isinstance(loss, NBMM): - output_modules.append(nn.Linear(hidden_size, input_size)) - - self.output_layer = nn.Sequential(*output_modules) - self.output_layer.apply(self._init_weights) - - @staticmethod - def _init_weights(module, scale=1.0): - if type(module) == nn.Linear: - nn.init.uniform_(module.weight, -scale, scale) - nn.init.zeros_(module.bias) - - def _domain_map(self, o_t, insample_y): - if isinstance(self.loss, GMM): - weights = o_t[:, :-1] # [B, L + 1] -> [B, L] - kernel_width = o_t[:, -1:] # [B, L + 1] -> [B, 1] - kernel_width = torch.repeat_interleave( - input=kernel_width, repeats=weights.shape[1], dim=-1 - ) # [B, 1] -> [B, L] - output = torch.cat( - [insample_y, kernel_width, weights], dim=-1 - ) # [B, L] + [B, L] + [B, L] = [B, 3 * L] - output = output.unsqueeze(1) # [B, 3 * L] = [B, 1, 3 * L] - elif isinstance(self.loss, PMM): - weights = o_t # [B, L] -> [B, L] - output = torch.cat( - [insample_y, weights], dim=-1 - ) # [B, L] + [B, L] = [B, 2 * L] - output = output.unsqueeze(1) # [B, 2 * L] = [B, 1, 2 * L] - elif isinstance(self.loss, NBMM): - weights = torch.ones_like(o_t) # [B, L] -> [B, L] - output = torch.cat( - [insample_y, o_t, weights], dim=-1 - ) # [B, L] + [B, L] + [B, L] = [B, 3 * L] - output = output.unsqueeze(1) # [B, 3 * L] = [B, 1, 3 * - - else: - raise NotImplementedError - - return output - - # Override BaseWindows method - def training_step(self, batch, batch_idx): - - # Only train one-step ahead - self.h = 1 - self.quantiles = self.loss.quantiles - - # Create and normalize windows [Ws, L+H, C] - y_idx = batch["y_idx"] - windows = self._create_windows(batch, step="train") - original_outsample_y = torch.clone(windows["temporal"][:, -self.h :, y_idx]) - windows = self._normalization(windows=windows, y_idx=y_idx) - - # Parse windows - ( - insample_y, - insample_mask, - outsample_y, - outsample_mask, - _, - futr_exog, - stat_exog, - ) = self._parse_windows(batch, windows) - - windows_batch = dict( - insample_y=insample_y, # [Ws, L] - insample_mask=insample_mask, # [Ws, L] - futr_exog=futr_exog, # [Ws, L+H] - hist_exog=None, - stat_exog=stat_exog, # [Ws, 1] - y_idx=y_idx, # [Ws, 1] - ) - - # Model Predictions - output = self.train_forward(windows_batch) - - _, y_loc, y_scale = self._inv_normalization( - y_hat=outsample_y, temporal_cols=batch["temporal_cols"], y_idx=y_idx - ) - # outsample_y = original_insample_y - outsample_y = original_outsample_y - distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale) - loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask) - - if torch.isnan(loss): - print("Model Parameters", self.hparams) - print("insample_y", torch.isnan(insample_y).sum()) - print("outsample_y", torch.isnan(outsample_y).sum()) - print("output", torch.isnan(output).sum()) - raise Exception("Loss is NaN, training stopped.") - - self.log("train_loss", loss, prog_bar=True, on_epoch=True) - self.train_trajectories.append((self.global_step, float(loss))) - - self.h = self.h_backup - - return loss - - # Override BaseWindows method - def validation_step(self, batch, batch_idx): - - self.h = self.h_backup - self.quantiles = self.valid_loss.quantiles - - if self.val_size == 0: - return np.nan - - # TODO: Hack to compute number of windows - windows = self._create_windows(batch, step="val") - n_windows = len(windows["temporal"]) - y_idx = batch["y_idx"] - - # Number of windows in batch - windows_batch_size = self.inference_windows_batch_size - if windows_batch_size < 0: - windows_batch_size = n_windows - n_batches = int(np.ceil(n_windows / windows_batch_size)) - - valid_losses = [] - batch_sizes = [] - for i in range(n_batches): - # Create and normalize windows [Ws, L+H, C] - w_idxs = np.arange( - i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows) - ) - windows = self._create_windows(batch, step="val", w_idxs=w_idxs) - original_outsample_y = torch.clone(windows["temporal"][:, -self.h :, 0]) - windows = self._normalization(windows=windows, y_idx=y_idx) - - # Parse windows - ( - insample_y, - insample_mask, - _, - outsample_mask, - _, - futr_exog, - stat_exog, - ) = self._parse_windows(batch, windows) - - windows_batch = dict( - insample_y=insample_y, # [Ws, L] - insample_mask=insample_mask, # [Ws, L] - futr_exog=futr_exog, # [Ws, L+H] - hist_exog=None, # [Ws, L] - stat_exog=stat_exog, - y_idx=y_idx, - ) # [Ws, 1] - - # Model Predictions - output_batch = self(windows_batch) - # Monte Carlo already returns y_hat with mean and quantiles - output_batch = output_batch[:, :, 1:] # Remove mean - valid_loss_batch = self.valid_loss( - y=original_outsample_y, y_hat=output_batch, mask=outsample_mask - ) - valid_losses.append(valid_loss_batch) - batch_sizes.append(len(output_batch)) - - valid_loss = torch.stack(valid_losses) - batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device) - valid_loss = torch.sum(valid_loss * batch_sizes) / torch.sum(batch_sizes) - - if torch.isnan(valid_loss): - raise Exception("Loss is NaN, training stopped.") - - self.log("valid_loss", valid_loss, prog_bar=True, on_epoch=True) - self.validation_step_outputs.append(valid_loss) - return valid_loss - # Override BaseWindows method - def predict_step(self, batch, batch_idx): - - self.h == self.h_backup - self.quantiles = self.loss.quantiles - - # TODO: Hack to compute number of windows - windows = self._create_windows(batch, step="predict") - n_windows = len(windows["temporal"]) - y_idx = batch["y_idx"] - - # Number of windows in batch - windows_batch_size = self.inference_windows_batch_size - if windows_batch_size < 0: - windows_batch_size = n_windows - n_batches = int(np.ceil(n_windows / windows_batch_size)) - - y_hats = [] - for i in range(n_batches): - # Create and normalize windows [Ws, L+H, C] - w_idxs = np.arange( - i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows) - ) - windows = self._create_windows(batch, step="predict", w_idxs=w_idxs) - windows = self._normalization(windows=windows, y_idx=y_idx) - - # Parse windows - insample_y, insample_mask, _, _, _, futr_exog, stat_exog = ( - self._parse_windows(batch, windows) - ) - windows_batch = dict( - insample_y=insample_y, # [Ws, L] - insample_mask=insample_mask, # [Ws, L] - futr_exog=futr_exog, # [Ws, L+H] - stat_exog=stat_exog, - y_idx=y_idx, - ) - - # Model Predictions - y_hat = self(windows_batch) - # Monte Carlo already returns y_hat with mean and quantiles - y_hats.append(y_hat) - y_hat = torch.cat(y_hats, dim=0) - return y_hat - - def train_forward(self, windows_batch): + def forward(self, windows_batch): # Parse windows_batch - x_t = windows_batch["insample_y"].unsqueeze(-1) # [B, L, 1] + x = windows_batch["insample_y"].unsqueeze(-1) # [B, L, 1] + hist_exog = windows_batch["hist_exog"] # [B, L, X] futr_exog = windows_batch["futr_exog"] # [B, L + h, F] stat_exog = windows_batch["stat_exog"] # [B, S] - batch_size, seq_len = x_t.shape[:2] # B = batch_size, L = seq_len + batch_size, seq_len = x.shape[:2] # B = batch_size, L = seq_len + insample_y = windows_batch["insample_y"].unsqueeze(-1) - # Concatenate x_t with future exogenous + # Concatenate x_t with future exogenous of input if self.futr_exog_size > 0: - futr_exog_t = futr_exog[:, :seq_len] # [B, L + h, F] -> [B, L, F] - x_t = torch.cat( - (x_t, futr_exog_t), dim=2 + x = torch.cat( + (x, futr_exog[:, :seq_len]), dim=2 ) # [B, L, 1] + [B, L, F] -> [B, L, 1 + F] - x_t = x_t.reshape(batch_size, -1) # [B, L, 1 + F] -> [B, L * (1 + F)] + # Concatenate x_t with historic exogenous + if self.hist_exog_size > 0: + x = torch.cat( + (x, hist_exog), dim=2 + ) # [B, L, 1 + F] + [B, L, X] -> [B, L, 1 + F + X] - # Concatenate x_t with static exogenous - if self.stat_exog_size > 0: - x_t = torch.cat( - (x_t, stat_exog), dim=1 - ) # [B, L * (1 + F)] + [B, S] -> [B, L * (1 + F) + S] - - # Run through DeepNPTSNetwork - h_t = self.deepnptsnetwork(x_t) # [B, L * (1 + F) + S] -> [B, hidden_size] - o_t = self.output_layer(h_t) # [B, hidden_size] -> [B, L + 1] - - output = self._domain_map( - o_t, windows_batch["insample_y"] - ) # [B, L + 1], [B, L] -> [B, 3 * L] - output = self.loss.domain_map( - output - ) # [B, 3 * L] -> ([B, L], [B, L], [B, L]) - - return output - - def forward(self, windows_batch): - # Parse windows_batch - insample_y_t = windows_batch["insample_y"].unsqueeze(-1) # [B, L, 1] - futr_exog = windows_batch["futr_exog"] # [B, L + h, F] - stat_exog = windows_batch["stat_exog"] # [B, S] - y_idx = windows_batch["y_idx"] + x = x.reshape(batch_size, -1) # [B, L, 1 + F + X] -> [B, L * (1 + F + X)] - batch_size, seq_len = insample_y_t.shape[:2] # B = batch_size, L = seq_len - device = insample_y_t.device - dtype = insample_y_t.dtype - - # Repeat insample_y for trajectory samples - insample_y_t = torch.repeat_interleave( - input=insample_y_t, repeats=self.trajectory_samples, dim=0 - ) # [B, L, 1] -> [B * n_samples, L, 1] - - # Input x_t is insample_y at time t - x_t = insample_y_t - - # Repeat futr_exog if available for trajectory samples and add to x_t - if self.futr_exog_size > 0: - futr_exog = torch.repeat_interleave( - input=futr_exog, repeats=self.trajectory_samples, dim=0 - ) # [B, L + h, F] -> [B * n_samples, L + h, F] - x_t = torch.cat( - (x_t, futr_exog[:, :seq_len]), dim=2 - ) # [B * n_samples, L, 1] + [B * n_samples, L, F] -> [B * n_samples, L, 1 + F] - - x_t = x_t.reshape( - batch_size * self.trajectory_samples, -1 - ) # [B * n_samples, L, 1 + F] -> [B * n_samples, L * (1 + F)] - - # Repeat stat_exog if available for trajectory samples and add to x_t + # Concatenate x with static exogenous if self.stat_exog_size > 0: - stat_exog = torch.repeat_interleave( - input=stat_exog, repeats=self.trajectory_samples, dim=0 - ) # [B, S] -> [B * n_samples, S] - x_t = torch.cat( - (x_t, stat_exog), dim=1 - ) # [B * n_samples, L * (1 + F)] + [B * n_samples, S] -> [B * n_samples, L * (1 + F) + S] - - # Scales for inverse normalization - y_scale = self.scaler.x_scale[:, :, y_idx] - y_loc = self.scaler.x_shift[:, :, y_idx] - y_scale = torch.repeat_interleave( - input=y_scale, repeats=self.trajectory_samples, dim=0 - ) - y_loc = torch.repeat_interleave( - input=y_loc, repeats=self.trajectory_samples, dim=0 - ) - # Create forecasts tensor - forecasts = torch.zeros( - (batch_size, self.h, len(self.quantiles) + 1), device=device, dtype=dtype - ) + x = torch.cat( + (x, stat_exog), dim=1 + ) # [B, L * (1 + F + X)] + [B, S] -> [B, L * (1 + F + X) + S] - # Recursive predictions - for t in range(self.h): - # Run input throught DeepNPTSNetwork - h_t = self.deepnptsnetwork( - x_t - ) # [B * n_samples, L * (1 + F) + S] -> [B, hidden_size] - o_t = self.output_layer( - h_t - ) # [B * n_samples, hidden_size] -> [B * n_samples, L (+ 1)] - output = self._domain_map( - o_t, insample_y_t.squeeze(-1) - ) # [B * n_samples, L + 1], [B * n_samples, L] -> [B * n_samples, 3 * L] - output = self.loss.domain_map( - output - ) # [B * n_samples, 3 * L] -> ([B * n_samples, L], [B * n_samples, L], [B * n_samples, L]) - - # Inverse normalization - distr_args = self.loss.scale_decouple( - output=output, loc=y_loc, scale=y_scale - ) - - # Sample and create probabilistic outputs - samples_t_flat, _, _ = self.loss.sample( - distr_args=distr_args, num_samples=1 - ) - - samples_t_flat = samples_t_flat.squeeze() - samples_t = samples_t_flat.reshape( - batch_size, self.trajectory_samples - ) # [B * n_samples] -> [B, n_samples] - - samples_t_mean = torch.mean(samples_t, dim=-1) # [B, n_samples] -> [B] - quantiles_t = torch.quantile( - input=samples_t, q=self.quantiles, dim=-1 - ) # [B, n_samples] -> [Q, B] - forecasts[:, t, 0] = samples_t_mean - forecasts[:, t, 1:] = quantiles_t.permute(1, 0) - - insample_y_t_next = self.scaler.scaler( - samples_t_flat, y_loc.squeeze(), y_scale.squeeze() - ) # [B * n_samples] -> [B * n_samples] - insample_y_t_next = insample_y_t_next.unsqueeze(-1).unsqueeze( - -1 - ) # [B * n_samples] -> [B * n_samples, 1, 1] - - # Update insample_y_t - insample_y_t = torch.cat( - [insample_y_t[:, 1:], insample_y_t_next], dim=1 - ) # [B * n_samples, L - 1, 1] + [B * n_samples, 1, 1] -> [B * n_samples, L, 1] + # Concatenate x_t with future exogenous of horizon + if self.futr_exog_size > 0: + futr_exog = futr_exog[:, seq_len:] # [B, L + h, F] -> [B, h, F] + futr_exog = futr_exog.reshape( + batch_size, -1 + ) # [B, L + h, F] -> [B, h * F] + x = torch.cat( + (x, futr_exog), dim=1 + ) # [B, L * (1 + F + X) + S] + [B, h * F] -> [B, L * (1 + F + X) + S + h * F] - # Update input - x_t = insample_y_t - # Concatenate x_t with future exogenous - if self.futr_exog_size > 0: - x_t = torch.cat( - (x_t, futr_exog[:, t : seq_len + t]), dim=2 - ) # [B * n_samples, L, 1] + [B * n_samples, L, F] -> [B * n_samples, L, 1 + F] + # Run through DeepNPTSNetwork + weights = self.deepnptsnetwork( + x + ) # [B, L * (1 + F + X) + S + h * F] -> [B, L * h] - x_t = x_t.reshape( - batch_size * self.trajectory_samples, -1 - ) # [B * n_samples, L, 1 + F] -> [B * n_samples, L * (1 + F)] + # Apply softmax for weighted input predictions + weights = weights.reshape(batch_size, seq_len, -1) # [B, L * h] -> [B, L, h] + x = ( + F.softmax(weights, dim=1) * insample_y + ) # [B, L, h] * [B, L, 1] = [B, L, h] + output = torch.sum(x, dim=1).unsqueeze(-1) # [B, L, h] -> [B, h, 1] - # Concatenate x_t with static exogenous - if self.stat_exog_size > 0: - x_t = torch.cat( - (x_t, stat_exog), dim=1 - ) # [B * n_samples, L * (1 + F)] + [B * n_samples, S] -> [B * n_samples, L * (1 + F) + S] + forecast = self.loss.domain_map(output) # [B, h, 1] -> [B, h, 1] - return forecasts + return forecast From d702d637f73df146886b148e6476276ffa220d45 Mon Sep 17 00:00:00 2001 From: Olivier Sprangers Date: Mon, 6 May 2024 09:40:12 +0200 Subject: [PATCH 06/11] update_model_files --- nbs/models.deepnpts.ipynb | 567 +++++++++++++++++++++++++++++- neuralforecast/_modidx.py | 5 + neuralforecast/models/deepnpts.py | 7 +- 3 files changed, 572 insertions(+), 7 deletions(-) diff --git a/nbs/models.deepnpts.ipynb b/nbs/models.deepnpts.ipynb index c1852c18a..7b6cac9e0 100644 --- a/nbs/models.deepnpts.ipynb +++ b/nbs/models.deepnpts.ipynb @@ -32,7 +32,7 @@ ":::{.callout-warning collapse=\"false\"}\n", "#### Losses\n", "\n", - "This implementation differs from the original work in that a weighted sum of the empirical distribution is returned as forecast, rather than a sampled distributional output. Consequently, DeepNPTS only supports point losses as training loss.\n", + "This implementation differs from the original work in that a weighted sum of the empirical distribution is returned as forecast. Therefore, it only supports point losses.\n", "\n", ":::" ] @@ -160,7 +160,7 @@ " batch_size: int = 32,\n", " valid_batch_size: Optional[int] = None,\n", " windows_batch_size: int = 1024,\n", - " inference_windows_batch_size: int = -1,\n", + " inference_windows_batch_size: int = 1024,\n", " start_padding_enabled = False,\n", " step_size: int = 1,\n", " scaler_type: str = 'standard',\n", @@ -177,6 +177,9 @@ " if not isinstance(loss, losses.BasePointLoss):\n", " raise Exception('DeepNPTS only supports point loss functions (MAE, MSE, etc) as loss function.') \n", " \n", + " if not isinstance(valid_loss, losses.BasePointLoss):\n", + " raise Exception('DeepNPTS only supports point loss functions (MAE, MSE, etc) as valid loss function.') \n", + " \n", " # Inherit BaseWindows class\n", " super(DeepNPTS, self).__init__(h=h,\n", " input_size=input_size,\n", @@ -275,7 +278,139 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/deepnpts.py#L18){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DeepNPTS\n", + "\n", + "> DeepNPTS (h, input_size:int=-1, hidden_size:int=32, batch_norm:bool=True,\n", + "> dropout:float=0.1, n_layers:int=2, futr_exog_list=None,\n", + "> hist_exog_list=None, stat_exog_list=None,\n", + "> exclude_insample_y=False, loss=MAE(), valid_loss=MAE(),\n", + "> max_steps:int=1000, learning_rate:float=0.001,\n", + "> num_lr_decays:int=3, early_stop_patience_steps:int=-1,\n", + "> val_check_steps:int=100, batch_size:int=32,\n", + "> valid_batch_size:Optional[int]=None,\n", + "> windows_batch_size:int=1024,\n", + "> inference_windows_batch_size:int=1024,\n", + "> start_padding_enabled=False, step_size:int=1,\n", + "> scaler_type:str='standard', random_seed:int=1,\n", + "> num_workers_loader=0, drop_last_loader=False, optimizer=None,\n", + "> optimizer_kwargs=None, **trainer_kwargs)\n", + "\n", + "DeepNPTS\n", + "\n", + "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by (weighted) sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series.\n", + "\n", + "**Parameters:**
\n", + "`h`: int, Forecast horizon.
\n", + "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", + "`hidden_size`: int=32, hidden size of dense layers.
\n", + "`batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", + "`dropout`: float=0.1, dropout.
\n", + "`n_layers`: int=2, number of dense layers.
\n", + "`stat_exog_list`: str list, static exogenous columns.
\n", + "`hist_exog_list`: str list, historic exogenous columns.
\n", + "`futr_exog_list`: str list, future exogenous columns.
\n", + "`exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", + "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`max_steps`: int=1000, maximum number of training steps.
\n", + "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", + "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", + "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", + "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", + "`batch_size`: int=32, number of different series in each batch.
\n", + "`valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", + "`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", + "`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", + "`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + "`step_size`: int=1, step size between each window of temporal data.
\n", + "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", + "`random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", + "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", + "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", + "`alias`: str, optional, Custom name of the model.
\n", + "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", + "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", + "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", + "\n", + "**References**
\n", + "- [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/deepnpts.py#L18){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### DeepNPTS\n", + "\n", + "> DeepNPTS (h, input_size:int=-1, hidden_size:int=32, batch_norm:bool=True,\n", + "> dropout:float=0.1, n_layers:int=2, futr_exog_list=None,\n", + "> hist_exog_list=None, stat_exog_list=None,\n", + "> exclude_insample_y=False, loss=MAE(), valid_loss=MAE(),\n", + "> max_steps:int=1000, learning_rate:float=0.001,\n", + "> num_lr_decays:int=3, early_stop_patience_steps:int=-1,\n", + "> val_check_steps:int=100, batch_size:int=32,\n", + "> valid_batch_size:Optional[int]=None,\n", + "> windows_batch_size:int=1024,\n", + "> inference_windows_batch_size:int=1024,\n", + "> start_padding_enabled=False, step_size:int=1,\n", + "> scaler_type:str='standard', random_seed:int=1,\n", + "> num_workers_loader=0, drop_last_loader=False, optimizer=None,\n", + "> optimizer_kwargs=None, **trainer_kwargs)\n", + "\n", + "DeepNPTS\n", + "\n", + "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by (weighted) sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series.\n", + "\n", + "**Parameters:**
\n", + "`h`: int, Forecast horizon.
\n", + "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", + "`hidden_size`: int=32, hidden size of dense layers.
\n", + "`batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", + "`dropout`: float=0.1, dropout.
\n", + "`n_layers`: int=2, number of dense layers.
\n", + "`stat_exog_list`: str list, static exogenous columns.
\n", + "`hist_exog_list`: str list, historic exogenous columns.
\n", + "`futr_exog_list`: str list, future exogenous columns.
\n", + "`exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", + "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`max_steps`: int=1000, maximum number of training steps.
\n", + "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", + "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", + "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", + "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", + "`batch_size`: int=32, number of different series in each batch.
\n", + "`valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", + "`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", + "`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", + "`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + "`step_size`: int=1, step size between each window of temporal data.
\n", + "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", + "`random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", + "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", + "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", + "`alias`: str, optional, Custom name of the model.
\n", + "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", + "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", + "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", + "\n", + "**References**
\n", + "- [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(DeepNPTS, title_level=3)" ] @@ -284,7 +419,73 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "### DeepNPTS.fit\n", + "\n", + "> DeepNPTS.fit (dataset, val_size=0, test_size=0, random_seed=None,\n", + "> distributed_config=None)\n", + "\n", + "Fit.\n", + "\n", + "The `fit` method, optimizes the neural network's weights using the\n", + "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n", + "and the `loss` function as defined during the initialization.\n", + "Within `fit` we use a PyTorch Lightning `Trainer` that\n", + "inherits the initialization's `self.trainer_kwargs`, to customize\n", + "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n", + "\n", + "The method is designed to be compatible with SKLearn-like classes\n", + "and in particular to be compatible with the StatsForecast library.\n", + "\n", + "By default the `model` is not saving training checkpoints to protect\n", + "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`val_size`: int, validation size for temporal cross-validation.
\n", + "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", + "`test_size`: int, test size for temporal cross-validation.
" + ], + "text/plain": [ + "---\n", + "\n", + "### DeepNPTS.fit\n", + "\n", + "> DeepNPTS.fit (dataset, val_size=0, test_size=0, random_seed=None,\n", + "> distributed_config=None)\n", + "\n", + "Fit.\n", + "\n", + "The `fit` method, optimizes the neural network's weights using the\n", + "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n", + "and the `loss` function as defined during the initialization.\n", + "Within `fit` we use a PyTorch Lightning `Trainer` that\n", + "inherits the initialization's `self.trainer_kwargs`, to customize\n", + "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n", + "\n", + "The method is designed to be compatible with SKLearn-like classes\n", + "and in particular to be compatible with the StatsForecast library.\n", + "\n", + "By default the `model` is not saving training checkpoints to protect\n", + "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`val_size`: int, validation size for temporal cross-validation.
\n", + "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", + "`test_size`: int, test size for temporal cross-validation.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(DeepNPTS.fit, name='DeepNPTS.fit', title_level=3)" ] @@ -293,7 +494,53 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "### DeepNPTS.predict\n", + "\n", + "> DeepNPTS.predict (dataset, test_size=None, step_size=1, random_seed=None,\n", + "> **data_module_kwargs)\n", + "\n", + "Predict.\n", + "\n", + "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`test_size`: int=None, test size for temporal cross-validation.
\n", + "`step_size`: int=1, Step size between each window.
\n", + "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", + "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)." + ], + "text/plain": [ + "---\n", + "\n", + "### DeepNPTS.predict\n", + "\n", + "> DeepNPTS.predict (dataset, test_size=None, step_size=1, random_seed=None,\n", + "> **data_module_kwargs)\n", + "\n", + "Predict.\n", + "\n", + "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`test_size`: int=None, test size for temporal cross-validation.
\n", + "`step_size`: int=1, Step size between each window.
\n", + "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", + "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(DeepNPTS.predict, name='DeepNPTS.predict', title_level=3)" ] @@ -323,7 +570,315 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Seed set to 1\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a683239fc3e5435aad7174b0d136376d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | | 0/? [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "#| eval: false\n", "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds Date: Mon, 6 May 2024 20:22:54 +0200 Subject: [PATCH 07/11] merge_conflicts --- nbs/models.ipynb | 397 +++++++++++++++-------------------------- neuralforecast/auto.py | 103 +++++++++-- neuralforecast/core.py | 3 - 3 files changed, 229 insertions(+), 274 deletions(-) diff --git a/nbs/models.ipynb b/nbs/models.ipynb index 428eeabc1..43dfe80e7 100644 --- a/nbs/models.ipynb +++ b/nbs/models.ipynb @@ -54,6 +54,7 @@ "from neuralforecast.models.dlinear import DLinear\n", "from neuralforecast.models.nlinear import NLinear\n", "from neuralforecast.models.tide import TiDE\n", + "from neuralforecast.models.deepnpts import DeepNPTS\n", "\n", "from neuralforecast.models.tft import TFT\n", "from neuralforecast.models.vanillatransformer import VanillaTransformer\n", @@ -2099,89 +2100,7 @@ "execution_count": null, "id": "d31d3bfa", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "### AutoTiDE\n", - "\n", - "> AutoTiDE (h, loss=MAE(), valid_loss=None, config=None,\n", - "> search_alg= object at 0x0000022D7EF8FC10>, num_samples=10,\n", - "> refit_with_val=False, cpus=20, gpus=1, verbose=False,\n", - "> alias=None, backend='ray', callbacks=None)\n", - "\n", - "Class for Automatic Hyperparameter Optimization, it builds on top of `ray` to\n", - "give access to a wide variety of hyperparameter optimization tools ranging\n", - "from classic grid search, to Bayesian optimization and HyperBand algorithm.\n", - "\n", - "The validation loss to be optimized is defined by the `config['loss']` dictionary\n", - "value, the config also contains the rest of the hyperparameter search space.\n", - "\n", - "It is important to note that the success of this hyperparameter optimization\n", - "heavily relies on a strong correlation between the validation and test periods.\n", - "\n", - "| | **Type** | **Default** | **Details** |\n", - "| -- | -------- | ----------- | ----------- |\n", - "| h | int | | Forecast horizon |\n", - "| loss | MAE | MAE() | Instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html). |\n", - "| valid_loss | NoneType | None | Instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html). |\n", - "| config | NoneType | None | Dictionary with ray.tune defined search space or function that takes an optuna trial and returns a configuration dict. |\n", - "| search_alg | BasicVariantGenerator | | For ray see https://docs.ray.io/en/latest/tune/api_docs/suggestion.html
For optuna see https://optuna.readthedocs.io/en/stable/reference/samplers/index.html. |\n", - "| num_samples | int | 10 | Number of hyperparameter optimization steps/samples. |\n", - "| refit_with_val | bool | False | Refit of best model should preserve val_size. |\n", - "| cpus | int | 20 | Number of cpus to use during optimization. Only used with ray tune. |\n", - "| gpus | int | 1 | Number of gpus to use during optimization, default all available. Only used with ray tune. |\n", - "| verbose | bool | False | Track progress. |\n", - "| alias | NoneType | None | Custom name of the model. |\n", - "| backend | str | ray | Backend to use for searching the hyperparameter space, can be either 'ray' or 'optuna'. |\n", - "| callbacks | NoneType | None | List of functions to call during the optimization process.
ray reference: https://docs.ray.io/en/latest/tune/tutorials/tune-metrics.html
optuna reference: https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/007_optuna_callback.html |" - ], - "text/plain": [ - "---\n", - "\n", - "### AutoTiDE\n", - "\n", - "> AutoTiDE (h, loss=MAE(), valid_loss=None, config=None,\n", - "> search_alg= object at 0x0000022D7EF8FC10>, num_samples=10,\n", - "> refit_with_val=False, cpus=20, gpus=1, verbose=False,\n", - "> alias=None, backend='ray', callbacks=None)\n", - "\n", - "Class for Automatic Hyperparameter Optimization, it builds on top of `ray` to\n", - "give access to a wide variety of hyperparameter optimization tools ranging\n", - "from classic grid search, to Bayesian optimization and HyperBand algorithm.\n", - "\n", - "The validation loss to be optimized is defined by the `config['loss']` dictionary\n", - "value, the config also contains the rest of the hyperparameter search space.\n", - "\n", - "It is important to note that the success of this hyperparameter optimization\n", - "heavily relies on a strong correlation between the validation and test periods.\n", - "\n", - "| | **Type** | **Default** | **Details** |\n", - "| -- | -------- | ----------- | ----------- |\n", - "| h | int | | Forecast horizon |\n", - "| loss | MAE | MAE() | Instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html). |\n", - "| valid_loss | NoneType | None | Instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html). |\n", - "| config | NoneType | None | Dictionary with ray.tune defined search space or function that takes an optuna trial and returns a configuration dict. |\n", - "| search_alg | BasicVariantGenerator | | For ray see https://docs.ray.io/en/latest/tune/api_docs/suggestion.html
For optuna see https://optuna.readthedocs.io/en/stable/reference/samplers/index.html. |\n", - "| num_samples | int | 10 | Number of hyperparameter optimization steps/samples. |\n", - "| refit_with_val | bool | False | Refit of best model should preserve val_size. |\n", - "| cpus | int | 20 | Number of cpus to use during optimization. Only used with ray tune. |\n", - "| gpus | int | 1 | Number of gpus to use during optimization, default all available. Only used with ray tune. |\n", - "| verbose | bool | False | Track progress. |\n", - "| alias | NoneType | None | Custom name of the model. |\n", - "| backend | str | ray | Backend to use for searching the hyperparameter space, can be either 'ray' or 'optuna'. |\n", - "| callbacks | NoneType | None | List of functions to call during the optimization process.
ray reference: https://docs.ray.io/en/latest/tune/tutorials/tune-metrics.html
optuna reference: https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/007_optuna_callback.html |" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(AutoTiDE, title_level=3)" ] @@ -2191,19 +2110,7 @@ "execution_count": null, "id": "7ae8f192", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-04-15 19:19:42,074\tINFO worker.py:1752 -- Started a local Ray instance.\n", - "2024-04-15 19:19:43,810\tINFO tune.py:263 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.\n", - "2024-04-15 19:19:43,813\tINFO tune.py:613 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949\n", - "2024-04-15 19:19:50,851\tINFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to 'C:/Users/ospra/ray_results/_train_tune_2024-04-15_19-19-40' in 0.0053s.\n", - "Seed set to 1\n" - ] - } - ], + "outputs": [], "source": [ "%%capture\n", "# Use your own config or AutoTiDE.default_config\n", @@ -2223,165 +2130,7 @@ "execution_count": null, "id": "d66600b9", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[36m(_train_tune pid=30124)\u001b[0m c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\ray\\tune\\integration\\pytorch_lightning.py:194: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\pytorch_lightning\\utilities\\parsing.py:199: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\pytorch_lightning\\utilities\\parsing.py:199: Attribute 'valid_loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['valid_loss'])`.\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m Seed set to 11\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m GPU available: True (cuda), used: True\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m TPU available: False, using: 0 TPU cores\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m IPU available: False, using: 0 IPUs\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m HPU available: False, using: 0 HPUs\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m `Trainer(val_check_interval=1)` was configured so validation will run after every batch.\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m Missing logger folder: C:\\Users\\ospra\\AppData\\Local\\Temp\\ray\\session_2024-04-15_19-19-40_426885_27112\\artifacts\\2024-04-15_19-19-55\\_train_tune_2024-04-15_19-19-55\\working_dirs\\_train_tune_55d90_00000\\lightning_logs\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m \n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m | Name | Type | Params\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m ---------------------------------------------------\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m 0 | loss | MAE | 0 \n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m 1 | padder_train | ConstantPad1d | 0 \n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m 2 | scaler | TemporalNorm | 0 \n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m 3 | dense_encoder | Sequential | 1.1 M \n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m 4 | dense_decoder | Sequential | 361 K \n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m 5 | temporal_decoder | MLPResidual | 1.3 K \n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m 6 | global_skip | Linear | 156 \n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m ---------------------------------------------------\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m 1.4 M Trainable params\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m 0 Non-trainable params\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m 1.4 M Total params\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m 5.706 Total estimated model params size (MB)\n", - "\u001b[36m(_train_tune pid=30124)\u001b[0m c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\pytorch_lightning\\trainer\\connectors\\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Sanity Checking: | | 0/? [00:00 Date: Mon, 6 May 2024 20:24:29 +0200 Subject: [PATCH 08/11] add_deepnpts_to_eval --- action_files/test_models/src/evaluation.py | 2 +- action_files/test_models/src/models.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/action_files/test_models/src/evaluation.py b/action_files/test_models/src/evaluation.py index cbe4e35c6..50a37a8d8 100644 --- a/action_files/test_models/src/evaluation.py +++ b/action_files/test_models/src/evaluation.py @@ -43,7 +43,7 @@ def evaluate(model: str, dataset: str, group: str): groups = ['Monthly'] models = ['AutoDilatedRNN', 'RNN', 'TCN', 'DeepAR', 'NHITS', 'TFT', 'AutoMLP', 'DLinear', 'VanillaTransformer', - 'BiTCN', 'TiDE'] + 'BiTCN', 'TiDE', 'DeepNPTS'] datasets = ['M3'] evaluation = [evaluate(model, dataset, group) for model, group in product(models, groups) for dataset in datasets] evaluation = [eval_ for eval_ in evaluation if eval_ is not None] diff --git a/action_files/test_models/src/models.py b/action_files/test_models/src/models.py index 7fb66f2d2..5dc70a308 100644 --- a/action_files/test_models/src/models.py +++ b/action_files/test_models/src/models.py @@ -28,6 +28,7 @@ from neuralforecast.models.dlinear import DLinear from neuralforecast.models.bitcn import BiTCN from neuralforecast.models.tide import TiDE +from neuralforecast.models.deepnpts import DeepNPTS from neuralforecast.auto import ( AutoMLP, @@ -76,6 +77,7 @@ def main(dataset: str = 'M3', group: str = 'Monthly') -> None: DeepAR(h=horizon, input_size=2 * horizon, scaler_type='minmax1', max_steps=1000), BiTCN(h=horizon, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500), TiDE(h=horizon, input_size=2 * horizon, loss=MAE(), max_steps=1000, val_check_steps=500), + DeepNPTS(h=horizon, input_size=2 * horizon, loss=MAE(), max_steps=1000, val_check_steps=500), ] # Models From 237be0d686c4904a5db152fd667921773c99b29c Mon Sep 17 00:00:00 2001 From: Olivier Sprangers Date: Mon, 6 May 2024 20:26:38 +0200 Subject: [PATCH 09/11] add_deepnpts_to_eval --- nbs/models.deepnpts.ipynb | 560 +------------------------------------- 1 file changed, 4 insertions(+), 556 deletions(-) diff --git a/nbs/models.deepnpts.ipynb b/nbs/models.deepnpts.ipynb index 7b6cac9e0..f70dff2ec 100644 --- a/nbs/models.deepnpts.ipynb +++ b/nbs/models.deepnpts.ipynb @@ -278,139 +278,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/deepnpts.py#L18){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DeepNPTS\n", - "\n", - "> DeepNPTS (h, input_size:int=-1, hidden_size:int=32, batch_norm:bool=True,\n", - "> dropout:float=0.1, n_layers:int=2, futr_exog_list=None,\n", - "> hist_exog_list=None, stat_exog_list=None,\n", - "> exclude_insample_y=False, loss=MAE(), valid_loss=MAE(),\n", - "> max_steps:int=1000, learning_rate:float=0.001,\n", - "> num_lr_decays:int=3, early_stop_patience_steps:int=-1,\n", - "> val_check_steps:int=100, batch_size:int=32,\n", - "> valid_batch_size:Optional[int]=None,\n", - "> windows_batch_size:int=1024,\n", - "> inference_windows_batch_size:int=1024,\n", - "> start_padding_enabled=False, step_size:int=1,\n", - "> scaler_type:str='standard', random_seed:int=1,\n", - "> num_workers_loader=0, drop_last_loader=False, optimizer=None,\n", - "> optimizer_kwargs=None, **trainer_kwargs)\n", - "\n", - "DeepNPTS\n", - "\n", - "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by (weighted) sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series.\n", - "\n", - "**Parameters:**
\n", - "`h`: int, Forecast horizon.
\n", - "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", - "`hidden_size`: int=32, hidden size of dense layers.
\n", - "`batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", - "`dropout`: float=0.1, dropout.
\n", - "`n_layers`: int=2, number of dense layers.
\n", - "`stat_exog_list`: str list, static exogenous columns.
\n", - "`hist_exog_list`: str list, historic exogenous columns.
\n", - "`futr_exog_list`: str list, future exogenous columns.
\n", - "`exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", - "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", - "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", - "`max_steps`: int=1000, maximum number of training steps.
\n", - "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", - "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", - "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", - "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", - "`batch_size`: int=32, number of different series in each batch.
\n", - "`valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", - "`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", - "`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", - "`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", - "`step_size`: int=1, step size between each window of temporal data.
\n", - "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", - "`random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", - "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", - "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", - "`alias`: str, optional, Custom name of the model.
\n", - "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", - "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", - "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", - "\n", - "**References**
\n", - "- [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
" - ], - "text/plain": [ - "---\n", - "\n", - "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/deepnpts.py#L18){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", - "\n", - "### DeepNPTS\n", - "\n", - "> DeepNPTS (h, input_size:int=-1, hidden_size:int=32, batch_norm:bool=True,\n", - "> dropout:float=0.1, n_layers:int=2, futr_exog_list=None,\n", - "> hist_exog_list=None, stat_exog_list=None,\n", - "> exclude_insample_y=False, loss=MAE(), valid_loss=MAE(),\n", - "> max_steps:int=1000, learning_rate:float=0.001,\n", - "> num_lr_decays:int=3, early_stop_patience_steps:int=-1,\n", - "> val_check_steps:int=100, batch_size:int=32,\n", - "> valid_batch_size:Optional[int]=None,\n", - "> windows_batch_size:int=1024,\n", - "> inference_windows_batch_size:int=1024,\n", - "> start_padding_enabled=False, step_size:int=1,\n", - "> scaler_type:str='standard', random_seed:int=1,\n", - "> num_workers_loader=0, drop_last_loader=False, optimizer=None,\n", - "> optimizer_kwargs=None, **trainer_kwargs)\n", - "\n", - "DeepNPTS\n", - "\n", - "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by (weighted) sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series.\n", - "\n", - "**Parameters:**
\n", - "`h`: int, Forecast horizon.
\n", - "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", - "`hidden_size`: int=32, hidden size of dense layers.
\n", - "`batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", - "`dropout`: float=0.1, dropout.
\n", - "`n_layers`: int=2, number of dense layers.
\n", - "`stat_exog_list`: str list, static exogenous columns.
\n", - "`hist_exog_list`: str list, historic exogenous columns.
\n", - "`futr_exog_list`: str list, future exogenous columns.
\n", - "`exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", - "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", - "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", - "`max_steps`: int=1000, maximum number of training steps.
\n", - "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", - "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", - "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", - "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", - "`batch_size`: int=32, number of different series in each batch.
\n", - "`valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", - "`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", - "`inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", - "`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", - "`step_size`: int=1, step size between each window of temporal data.
\n", - "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", - "`random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", - "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", - "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", - "`alias`: str, optional, Custom name of the model.
\n", - "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", - "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", - "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", - "\n", - "**References**
\n", - "- [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(DeepNPTS, title_level=3)" ] @@ -419,73 +287,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "### DeepNPTS.fit\n", - "\n", - "> DeepNPTS.fit (dataset, val_size=0, test_size=0, random_seed=None,\n", - "> distributed_config=None)\n", - "\n", - "Fit.\n", - "\n", - "The `fit` method, optimizes the neural network's weights using the\n", - "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n", - "and the `loss` function as defined during the initialization.\n", - "Within `fit` we use a PyTorch Lightning `Trainer` that\n", - "inherits the initialization's `self.trainer_kwargs`, to customize\n", - "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n", - "\n", - "The method is designed to be compatible with SKLearn-like classes\n", - "and in particular to be compatible with the StatsForecast library.\n", - "\n", - "By default the `model` is not saving training checkpoints to protect\n", - "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n", - "\n", - "**Parameters:**
\n", - "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", - "`val_size`: int, validation size for temporal cross-validation.
\n", - "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", - "`test_size`: int, test size for temporal cross-validation.
" - ], - "text/plain": [ - "---\n", - "\n", - "### DeepNPTS.fit\n", - "\n", - "> DeepNPTS.fit (dataset, val_size=0, test_size=0, random_seed=None,\n", - "> distributed_config=None)\n", - "\n", - "Fit.\n", - "\n", - "The `fit` method, optimizes the neural network's weights using the\n", - "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n", - "and the `loss` function as defined during the initialization.\n", - "Within `fit` we use a PyTorch Lightning `Trainer` that\n", - "inherits the initialization's `self.trainer_kwargs`, to customize\n", - "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n", - "\n", - "The method is designed to be compatible with SKLearn-like classes\n", - "and in particular to be compatible with the StatsForecast library.\n", - "\n", - "By default the `model` is not saving training checkpoints to protect\n", - "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n", - "\n", - "**Parameters:**
\n", - "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", - "`val_size`: int, validation size for temporal cross-validation.
\n", - "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", - "`test_size`: int, test size for temporal cross-validation.
" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(DeepNPTS.fit, name='DeepNPTS.fit', title_level=3)" ] @@ -494,53 +296,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "---\n", - "\n", - "### DeepNPTS.predict\n", - "\n", - "> DeepNPTS.predict (dataset, test_size=None, step_size=1, random_seed=None,\n", - "> **data_module_kwargs)\n", - "\n", - "Predict.\n", - "\n", - "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n", - "\n", - "**Parameters:**
\n", - "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", - "`test_size`: int=None, test size for temporal cross-validation.
\n", - "`step_size`: int=1, Step size between each window.
\n", - "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", - "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)." - ], - "text/plain": [ - "---\n", - "\n", - "### DeepNPTS.predict\n", - "\n", - "> DeepNPTS.predict (dataset, test_size=None, step_size=1, random_seed=None,\n", - "> **data_module_kwargs)\n", - "\n", - "Predict.\n", - "\n", - "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n", - "\n", - "**Parameters:**
\n", - "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", - "`test_size`: int=None, test size for temporal cross-validation.
\n", - "`step_size`: int=1, Step size between each window.
\n", - "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n", - "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "show_doc(DeepNPTS.predict, name='DeepNPTS.predict', title_level=3)" ] @@ -570,315 +326,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Seed set to 1\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "a683239fc3e5435aad7174b0d136376d", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Sanity Checking: | | 0/? [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "#| eval: false\n", "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds Date: Mon, 6 May 2024 14:40:39 -0600 Subject: [PATCH 10/11] test From d3bf75b0925d617fef95c3b322fcc9690e61e441 Mon Sep 17 00:00:00 2001 From: Olivier Sprangers Date: Mon, 6 May 2024 23:16:24 +0200 Subject: [PATCH 11/11] remove_eval_false_from_usage_example --- nbs/models.deepnpts.ipynb | 745 +++++++++++++++++++------------------- 1 file changed, 372 insertions(+), 373 deletions(-) diff --git a/nbs/models.deepnpts.ipynb b/nbs/models.deepnpts.ipynb index e26906ee5..4f0d41445 100644 --- a/nbs/models.deepnpts.ipynb +++ b/nbs/models.deepnpts.ipynb @@ -1,373 +1,372 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| default_exp models.deepnpts" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# DeepNPTS" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a non-parametric baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a tunable strategy. This strategy is learned by exploiting the information across multiple related time series. This model provides a strong, simple baseline for time series forecasting. \n", - "\n", - "\n", - "**References**
\n", - "[Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
\n", - "\n", - "\n", - ":::{.callout-warning collapse=\"false\"}\n", - "#### Losses\n", - "\n", - "This implementation differs from the original work in that a weighted sum of the empirical distribution is returned as forecast. Therefore, it only supports point losses.\n", - "\n", - ":::" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "import neuralforecast.losses.pytorch as losses\n", - "from typing import Optional\n", - "\n", - "\n", - "from neuralforecast.common._base_windows import BaseWindows\n", - "from neuralforecast.losses.pytorch import MAE\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "import logging\n", - "import warnings\n", - "\n", - "from fastcore.test import test_eq\n", - "from nbdev.showdoc import show_doc" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n", - "warnings.filterwarnings(\"ignore\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. DeepNPTS" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "class DeepNPTS(BaseWindows):\n", - " \"\"\" DeepNPTS\n", - "\n", - " Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by (weighted) sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series.\n", - "\n", - " **Parameters:**
\n", - " `h`: int, Forecast horizon.
\n", - " `input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", - " `hidden_size`: int=32, hidden size of dense layers.
\n", - " `batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", - " `dropout`: float=0.1, dropout.
\n", - " `n_layers`: int=2, number of dense layers.
\n", - " `stat_exog_list`: str list, static exogenous columns.
\n", - " `hist_exog_list`: str list, historic exogenous columns.
\n", - " `futr_exog_list`: str list, future exogenous columns.
\n", - " `exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", - " `loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", - " `valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", - " `max_steps`: int=1000, maximum number of training steps.
\n", - " `learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", - " `num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", - " `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", - " `val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", - " `batch_size`: int=32, number of different series in each batch.
\n", - " `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", - " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", - " `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", - " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", - " `step_size`: int=1, step size between each window of temporal data.
\n", - " `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", - " `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", - " `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", - " `drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", - " `alias`: str, optional, Custom name of the model.
\n", - " `optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", - " `optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", - " `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", - "\n", - " **References**
\n", - " - [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
\n", - "\n", - " \"\"\"\n", - " # Class attributes\n", - " SAMPLING_TYPE = 'windows'\n", - " \n", - " def __init__(self,\n", - " h,\n", - " input_size: int = -1,\n", - " hidden_size: int = 32,\n", - " batch_norm: bool = True,\n", - " dropout: float = 0.1,\n", - " n_layers: int = 2,\n", - " futr_exog_list = None,\n", - " hist_exog_list = None,\n", - " stat_exog_list = None,\n", - " exclude_insample_y = False,\n", - " loss = MAE(),\n", - " valid_loss = MAE(),\n", - " max_steps: int = 1000,\n", - " learning_rate: float = 1e-3,\n", - " num_lr_decays: int = 3,\n", - " early_stop_patience_steps: int =-1,\n", - " val_check_steps: int = 100,\n", - " batch_size: int = 32,\n", - " valid_batch_size: Optional[int] = None,\n", - " windows_batch_size: int = 1024,\n", - " inference_windows_batch_size: int = 1024,\n", - " start_padding_enabled = False,\n", - " step_size: int = 1,\n", - " scaler_type: str = 'standard',\n", - " random_seed: int = 1,\n", - " num_workers_loader = 0,\n", - " drop_last_loader = False,\n", - " optimizer = None,\n", - " optimizer_kwargs = None,\n", - " **trainer_kwargs):\n", - "\n", - " if exclude_insample_y:\n", - " raise Exception('DeepNPTS has no possibility for excluding y.')\n", - "\n", - " if not isinstance(loss, losses.BasePointLoss):\n", - " raise Exception('DeepNPTS only supports point loss functions (MAE, MSE, etc) as loss function.') \n", - " \n", - " if not isinstance(valid_loss, losses.BasePointLoss):\n", - " raise Exception('DeepNPTS only supports point loss functions (MAE, MSE, etc) as valid loss function.') \n", - " \n", - " # Inherit BaseWindows class\n", - " super(DeepNPTS, self).__init__(h=h,\n", - " input_size=input_size,\n", - " futr_exog_list=futr_exog_list,\n", - " hist_exog_list=hist_exog_list,\n", - " stat_exog_list=stat_exog_list,\n", - " exclude_insample_y = exclude_insample_y,\n", - " loss=loss,\n", - " valid_loss=valid_loss,\n", - " max_steps=max_steps,\n", - " learning_rate=learning_rate,\n", - " num_lr_decays=num_lr_decays,\n", - " early_stop_patience_steps=early_stop_patience_steps,\n", - " val_check_steps=val_check_steps,\n", - " batch_size=batch_size,\n", - " windows_batch_size=windows_batch_size,\n", - " valid_batch_size=valid_batch_size,\n", - " inference_windows_batch_size=inference_windows_batch_size,\n", - " start_padding_enabled=start_padding_enabled,\n", - " step_size=step_size,\n", - " scaler_type=scaler_type,\n", - " num_workers_loader=num_workers_loader,\n", - " drop_last_loader=drop_last_loader,\n", - " random_seed=random_seed,\n", - " optimizer=optimizer,\n", - " optimizer_kwargs=optimizer_kwargs,\n", - " **trainer_kwargs)\n", - "\n", - " self.h = h\n", - " self.hidden_size = hidden_size\n", - " self.dropout = dropout\n", - "\n", - " self.futr_exog_size = len(self.futr_exog_list)\n", - " self.stat_exog_size = len(self.stat_exog_list)\n", - " self.hist_exog_size = len(self.hist_exog_list)\n", - "\n", - " input_dim = input_size * (1 + self.futr_exog_size + self.hist_exog_size) + self.stat_exog_size + self.h * self.futr_exog_size\n", - " \n", - " # Create DeepNPTSNetwork\n", - " modules = [] \n", - " for i in range(n_layers):\n", - " modules.append(nn.Linear(input_dim if i == 0 else hidden_size, hidden_size))\n", - " modules.append(nn.ReLU())\n", - " if batch_norm:\n", - " modules.append(nn.BatchNorm1d(hidden_size))\n", - " if dropout > 0.0:\n", - " modules.append(nn.Dropout(dropout))\n", - "\n", - " modules.append(nn.Linear(hidden_size, input_size * self.h))\n", - " self.deepnptsnetwork = nn.Sequential(*modules)\n", - "\n", - " def forward(self, windows_batch):\n", - " # Parse windows_batch\n", - " x = windows_batch['insample_y'].unsqueeze(-1) # [B, L, 1]\n", - " hist_exog = windows_batch['hist_exog'] # [B, L, X]\n", - " futr_exog = windows_batch['futr_exog'] # [B, L + h, F]\n", - " stat_exog = windows_batch['stat_exog'] # [B, S]\n", - "\n", - " batch_size, seq_len = x.shape[:2] # B = batch_size, L = seq_len\n", - " insample_y = windows_batch['insample_y'].unsqueeze(-1) \n", - " \n", - " # Concatenate x_t with future exogenous of input\n", - " if self.futr_exog_size > 0: \n", - " x = torch.cat((x, futr_exog[:, :seq_len]), dim=2) # [B, L, 1] + [B, L, F] -> [B, L, 1 + F] \n", - " \n", - " # Concatenate x_t with historic exogenous\n", - " if self.hist_exog_size > 0: \n", - " x = torch.cat((x, hist_exog), dim=2) # [B, L, 1 + F] + [B, L, X] -> [B, L, 1 + F + X] \n", - "\n", - " x = x.reshape(batch_size, -1) # [B, L, 1 + F + X] -> [B, L * (1 + F + X)]\n", - "\n", - " # Concatenate x with static exogenous\n", - " if self.stat_exog_size > 0:\n", - " x = torch.cat((x, stat_exog), dim=1) # [B, L * (1 + F + X)] + [B, S] -> [B, L * (1 + F + X) + S]\n", - "\n", - " # Concatenate x_t with future exogenous of horizon\n", - " if self.futr_exog_size > 0:\n", - " futr_exog = futr_exog[:, seq_len:] # [B, L + h, F] -> [B, h, F]\n", - " futr_exog = futr_exog.reshape(batch_size, -1) # [B, L + h, F] -> [B, h * F]\n", - " x = torch.cat((x, futr_exog), dim=1) # [B, L * (1 + F + X) + S] + [B, h * F] -> [B, L * (1 + F + X) + S + h * F] \n", - "\n", - " # Run through DeepNPTSNetwork\n", - " weights = self.deepnptsnetwork(x) # [B, L * (1 + F + X) + S + h * F] -> [B, L * h]\n", - "\n", - " # Apply softmax for weighted input predictions\n", - " weights = weights.reshape(batch_size, seq_len, -1) # [B, L * h] -> [B, L, h]\n", - " x = F.softmax(weights, dim=1) * insample_y # [B, L, h] * [B, L, 1] = [B, L, h]\n", - " output = torch.sum(x, dim=1).unsqueeze(-1) # [B, L, h] -> [B, h, 1]\n", - "\n", - " forecast = self.loss.domain_map(output) # [B, h, 1] -> [B, h, 1]\n", - "\n", - " return forecast" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "show_doc(DeepNPTS, title_level=3)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "show_doc(DeepNPTS.fit, name='DeepNPTS.fit', title_level=3)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "show_doc(DeepNPTS.predict, name='DeepNPTS.predict', title_level=3)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Usage Example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from neuralforecast import NeuralForecast\n", - "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| eval: false\n", - "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n", - "\n", - "nf = NeuralForecast(\n", - " models=[DeepNPTS(h=12,\n", - " input_size=24,\n", - " stat_exog_list=['airline1'],\n", - " futr_exog_list=['trend'],\n", - " max_steps=1000,\n", - " val_check_steps=10,\n", - " early_stop_patience_steps=3,\n", - " scaler_type='robust',\n", - " enable_progress_bar=True),\n", - " ],\n", - " freq='M'\n", - ")\n", - "nf.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)\n", - "Y_hat_df = nf.predict(futr_df=Y_test_df)\n", - "\n", - "# Plot quantile predictions\n", - "Y_hat_df = Y_hat_df.reset_index(drop=False).drop(columns=['unique_id','ds'])\n", - "plot_df = pd.concat([Y_test_df, Y_hat_df], axis=1)\n", - "plot_df = pd.concat([Y_train_df, plot_df])\n", - "\n", - "plot_df = plot_df[plot_df.unique_id=='Airline1'].drop('unique_id', axis=1)\n", - "plt.plot(plot_df['ds'], plot_df['y'], c='black', label='True')\n", - "plt.plot(plot_df['ds'], plot_df['DeepNPTS'], c='red', label='mean')\n", - "plt.grid()\n", - "plt.plot()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "python3", - "language": "python", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp models.deepnpts" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# DeepNPTS" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a non-parametric baseline model for time-series forecasting. This model generates predictions by sampling from the empirical distribution according to a tunable strategy. This strategy is learned by exploiting the information across multiple related time series. This model provides a strong, simple baseline for time series forecasting. \n", + "\n", + "\n", + "**References**
\n", + "[Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
\n", + "\n", + "\n", + ":::{.callout-warning collapse=\"false\"}\n", + "#### Losses\n", + "\n", + "This implementation differs from the original work in that a weighted sum of the empirical distribution is returned as forecast. Therefore, it only supports point losses.\n", + "\n", + ":::" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import neuralforecast.losses.pytorch as losses\n", + "from typing import Optional\n", + "\n", + "\n", + "from neuralforecast.common._base_windows import BaseWindows\n", + "from neuralforecast.losses.pytorch import MAE\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "import logging\n", + "import warnings\n", + "\n", + "from fastcore.test import test_eq\n", + "from nbdev.showdoc import show_doc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. DeepNPTS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class DeepNPTS(BaseWindows):\n", + " \"\"\" DeepNPTS\n", + "\n", + " Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by (weighted) sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series.\n", + "\n", + " **Parameters:**
\n", + " `h`: int, Forecast horizon.
\n", + " `input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", + " `hidden_size`: int=32, hidden size of dense layers.
\n", + " `batch_norm`: bool=True, if True, applies Batch Normalization after each dense layer in the network.
\n", + " `dropout`: float=0.1, dropout.
\n", + " `n_layers`: int=2, number of dense layers.
\n", + " `stat_exog_list`: str list, static exogenous columns.
\n", + " `hist_exog_list`: str list, historic exogenous columns.
\n", + " `futr_exog_list`: str list, future exogenous columns.
\n", + " `exclude_insample_y`: bool=False, the model skips the autoregressive features y[t-input_size:t] if True.
\n", + " `loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + " `valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + " `max_steps`: int=1000, maximum number of training steps.
\n", + " `learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", + " `num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", + " `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", + " `val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", + " `batch_size`: int=32, number of different series in each batch.
\n", + " `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n", + " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n", + " `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n", + " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n", + " `step_size`: int=1, step size between each window of temporal data.
\n", + " `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", + " `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n", + " `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", + " `drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", + " `alias`: str, optional, Custom name of the model.
\n", + " `optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", + " `optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", + " `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", + "\n", + " **References**
\n", + " - [Rangapuram, Syama Sundar, Jan Gasthaus, Lorenzo Stella, Valentin Flunkert, David Salinas, Yuyang Wang, and Tim Januschowski (2023). \"Deep Non-Parametric Time Series Forecaster\". arXiv.](https://arxiv.org/abs/2312.14657)
\n", + "\n", + " \"\"\"\n", + " # Class attributes\n", + " SAMPLING_TYPE = 'windows'\n", + " \n", + " def __init__(self,\n", + " h,\n", + " input_size: int = -1,\n", + " hidden_size: int = 32,\n", + " batch_norm: bool = True,\n", + " dropout: float = 0.1,\n", + " n_layers: int = 2,\n", + " futr_exog_list = None,\n", + " hist_exog_list = None,\n", + " stat_exog_list = None,\n", + " exclude_insample_y = False,\n", + " loss = MAE(),\n", + " valid_loss = MAE(),\n", + " max_steps: int = 1000,\n", + " learning_rate: float = 1e-3,\n", + " num_lr_decays: int = 3,\n", + " early_stop_patience_steps: int =-1,\n", + " val_check_steps: int = 100,\n", + " batch_size: int = 32,\n", + " valid_batch_size: Optional[int] = None,\n", + " windows_batch_size: int = 1024,\n", + " inference_windows_batch_size: int = 1024,\n", + " start_padding_enabled = False,\n", + " step_size: int = 1,\n", + " scaler_type: str = 'standard',\n", + " random_seed: int = 1,\n", + " num_workers_loader = 0,\n", + " drop_last_loader = False,\n", + " optimizer = None,\n", + " optimizer_kwargs = None,\n", + " **trainer_kwargs):\n", + "\n", + " if exclude_insample_y:\n", + " raise Exception('DeepNPTS has no possibility for excluding y.')\n", + "\n", + " if not isinstance(loss, losses.BasePointLoss):\n", + " raise Exception('DeepNPTS only supports point loss functions (MAE, MSE, etc) as loss function.') \n", + " \n", + " if not isinstance(valid_loss, losses.BasePointLoss):\n", + " raise Exception('DeepNPTS only supports point loss functions (MAE, MSE, etc) as valid loss function.') \n", + " \n", + " # Inherit BaseWindows class\n", + " super(DeepNPTS, self).__init__(h=h,\n", + " input_size=input_size,\n", + " futr_exog_list=futr_exog_list,\n", + " hist_exog_list=hist_exog_list,\n", + " stat_exog_list=stat_exog_list,\n", + " exclude_insample_y = exclude_insample_y,\n", + " loss=loss,\n", + " valid_loss=valid_loss,\n", + " max_steps=max_steps,\n", + " learning_rate=learning_rate,\n", + " num_lr_decays=num_lr_decays,\n", + " early_stop_patience_steps=early_stop_patience_steps,\n", + " val_check_steps=val_check_steps,\n", + " batch_size=batch_size,\n", + " windows_batch_size=windows_batch_size,\n", + " valid_batch_size=valid_batch_size,\n", + " inference_windows_batch_size=inference_windows_batch_size,\n", + " start_padding_enabled=start_padding_enabled,\n", + " step_size=step_size,\n", + " scaler_type=scaler_type,\n", + " num_workers_loader=num_workers_loader,\n", + " drop_last_loader=drop_last_loader,\n", + " random_seed=random_seed,\n", + " optimizer=optimizer,\n", + " optimizer_kwargs=optimizer_kwargs,\n", + " **trainer_kwargs)\n", + "\n", + " self.h = h\n", + " self.hidden_size = hidden_size\n", + " self.dropout = dropout\n", + "\n", + " self.futr_exog_size = len(self.futr_exog_list)\n", + " self.stat_exog_size = len(self.stat_exog_list)\n", + " self.hist_exog_size = len(self.hist_exog_list)\n", + "\n", + " input_dim = input_size * (1 + self.futr_exog_size + self.hist_exog_size) + self.stat_exog_size + self.h * self.futr_exog_size\n", + " \n", + " # Create DeepNPTSNetwork\n", + " modules = [] \n", + " for i in range(n_layers):\n", + " modules.append(nn.Linear(input_dim if i == 0 else hidden_size, hidden_size))\n", + " modules.append(nn.ReLU())\n", + " if batch_norm:\n", + " modules.append(nn.BatchNorm1d(hidden_size))\n", + " if dropout > 0.0:\n", + " modules.append(nn.Dropout(dropout))\n", + "\n", + " modules.append(nn.Linear(hidden_size, input_size * self.h))\n", + " self.deepnptsnetwork = nn.Sequential(*modules)\n", + "\n", + " def forward(self, windows_batch):\n", + " # Parse windows_batch\n", + " x = windows_batch['insample_y'].unsqueeze(-1) # [B, L, 1]\n", + " hist_exog = windows_batch['hist_exog'] # [B, L, X]\n", + " futr_exog = windows_batch['futr_exog'] # [B, L + h, F]\n", + " stat_exog = windows_batch['stat_exog'] # [B, S]\n", + "\n", + " batch_size, seq_len = x.shape[:2] # B = batch_size, L = seq_len\n", + " insample_y = windows_batch['insample_y'].unsqueeze(-1) \n", + " \n", + " # Concatenate x_t with future exogenous of input\n", + " if self.futr_exog_size > 0: \n", + " x = torch.cat((x, futr_exog[:, :seq_len]), dim=2) # [B, L, 1] + [B, L, F] -> [B, L, 1 + F] \n", + " \n", + " # Concatenate x_t with historic exogenous\n", + " if self.hist_exog_size > 0: \n", + " x = torch.cat((x, hist_exog), dim=2) # [B, L, 1 + F] + [B, L, X] -> [B, L, 1 + F + X] \n", + "\n", + " x = x.reshape(batch_size, -1) # [B, L, 1 + F + X] -> [B, L * (1 + F + X)]\n", + "\n", + " # Concatenate x with static exogenous\n", + " if self.stat_exog_size > 0:\n", + " x = torch.cat((x, stat_exog), dim=1) # [B, L * (1 + F + X)] + [B, S] -> [B, L * (1 + F + X) + S]\n", + "\n", + " # Concatenate x_t with future exogenous of horizon\n", + " if self.futr_exog_size > 0:\n", + " futr_exog = futr_exog[:, seq_len:] # [B, L + h, F] -> [B, h, F]\n", + " futr_exog = futr_exog.reshape(batch_size, -1) # [B, L + h, F] -> [B, h * F]\n", + " x = torch.cat((x, futr_exog), dim=1) # [B, L * (1 + F + X) + S] + [B, h * F] -> [B, L * (1 + F + X) + S + h * F] \n", + "\n", + " # Run through DeepNPTSNetwork\n", + " weights = self.deepnptsnetwork(x) # [B, L * (1 + F + X) + S + h * F] -> [B, L * h]\n", + "\n", + " # Apply softmax for weighted input predictions\n", + " weights = weights.reshape(batch_size, seq_len, -1) # [B, L * h] -> [B, L, h]\n", + " x = F.softmax(weights, dim=1) * insample_y # [B, L, h] * [B, L, 1] = [B, L, h]\n", + " output = torch.sum(x, dim=1).unsqueeze(-1) # [B, L, h] -> [B, h, 1]\n", + "\n", + " forecast = self.loss.domain_map(output) # [B, h, 1] -> [B, h, 1]\n", + "\n", + " return forecast" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "show_doc(DeepNPTS, title_level=3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "show_doc(DeepNPTS.fit, name='DeepNPTS.fit', title_level=3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "show_doc(DeepNPTS.predict, name='DeepNPTS.predict', title_level=3)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage Example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from neuralforecast import NeuralForecast\n", + "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n", + "\n", + "nf = NeuralForecast(\n", + " models=[DeepNPTS(h=12,\n", + " input_size=24,\n", + " stat_exog_list=['airline1'],\n", + " futr_exog_list=['trend'],\n", + " max_steps=1000,\n", + " val_check_steps=10,\n", + " early_stop_patience_steps=3,\n", + " scaler_type='robust',\n", + " enable_progress_bar=True),\n", + " ],\n", + " freq='M'\n", + ")\n", + "nf.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)\n", + "Y_hat_df = nf.predict(futr_df=Y_test_df)\n", + "\n", + "# Plot quantile predictions\n", + "Y_hat_df = Y_hat_df.reset_index(drop=False).drop(columns=['unique_id','ds'])\n", + "plot_df = pd.concat([Y_test_df, Y_hat_df], axis=1)\n", + "plot_df = pd.concat([Y_train_df, plot_df])\n", + "\n", + "plot_df = plot_df[plot_df.unique_id=='Airline1'].drop('unique_id', axis=1)\n", + "plt.plot(plot_df['ds'], plot_df['y'], c='black', label='True')\n", + "plt.plot(plot_df['ds'], plot_df['DeepNPTS'], c='red', label='mean')\n", + "plt.grid()\n", + "plt.plot()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}