From 5e96e9d64bc4bd62927e74966b1e784191a7b9e1 Mon Sep 17 00:00:00 2001
From: Ben Steel <bendavidsteel@gmail.com>
Date: Fri, 17 Feb 2023 23:24:10 +0000
Subject: [PATCH 1/3] Added fixes for weight size error calc

---
 pytorch_forecasting/models/base_model.py      |  6 +++-
 pytorch_forecasting/models/nhits/__init__.py  | 27 ++++++++++++----
 .../models/nhits/sub_modules.py               | 31 ++++++++++++++-----
 3 files changed, 49 insertions(+), 15 deletions(-)

diff --git a/pytorch_forecasting/models/base_model.py b/pytorch_forecasting/models/base_model.py
index b3d39130..95265dd4 100644
--- a/pytorch_forecasting/models/base_model.py
+++ b/pytorch_forecasting/models/base_model.py
@@ -206,6 +206,7 @@ def forward(self, x):
 
     def __init__(
         self,
+        dataset_parameters: Dict[str, Any] = None,
         log_interval: Union[int, float] = -1,
         log_val_interval: Union[int, float] = None,
         learning_rate: Union[float, List[float]] = 1e-3,
@@ -279,6 +280,8 @@ def __init__(
             self.output_transformer = output_transformer
         if not hasattr(self, "optimizer"):  # callables are removed from hyperparameters, so better to save them
             self.optimizer = self.hparams.optimizer
+        if not hasattr(self, "dataset_parameters"):
+            self.dataset_parameters = dataset_parameters
 
         # delete everything from hparams that cannot be serialized with yaml.dump
         # which is particularly important for tensorboard logging
@@ -993,8 +996,9 @@ def from_dataset(cls, dataset: TimeSeriesDataSet, **kwargs) -> LightningModule:
         """
         if "output_transformer" not in kwargs:
             kwargs["output_transformer"] = dataset.target_normalizer
+        if "dataset_parameters" not in kwargs:
+            kwargs["dataset_parameters"] = dataset.get_parameters()
         net = cls(**kwargs)
-        net.dataset_parameters = dataset.get_parameters()
         if dataset.multi_target:
             assert isinstance(
                 net.loss, MultiLoss
diff --git a/pytorch_forecasting/models/nhits/__init__.py b/pytorch_forecasting/models/nhits/__init__.py
index 477a362d..a7384a70 100644
--- a/pytorch_forecasting/models/nhits/__init__.py
+++ b/pytorch_forecasting/models/nhits/__init__.py
@@ -179,7 +179,8 @@ def __init__(
             prediction_length=self.hparams.prediction_length,
             output_size=to_list(output_size),
             static_size=self.static_size,
-            covariate_size=self.covariate_size,
+            encoder_covariate_size=self.encoder_covariate_size,
+            decoder_covariate_size=self.decoder_covariate_size,
             static_hidden_size=self.hparams.static_hidden_size,
             n_blocks=self.hparams.n_blocks,
             n_layers=self.hparams.n_layers,
@@ -197,13 +198,24 @@ def __init__(
         )
 
     @property
-    def covariate_size(self) -> int:
-        """Covariate size.
+    def decoder_covariate_size(self) -> int:
+        """Decoder covariates size.
 
         Returns:
-            int: size of time-dependent covariates
+            int: size of time-dependent covariates used by the decoder
         """
         return len(set(self.hparams.time_varying_reals_decoder) - set(self.target_names)) + sum(
+            self.embeddings.output_size[name] for name in self.hparams.time_varying_categoricals_decoder
+        )
+
+    @property
+    def encoder_covariate_size(self) -> int:
+        """Encoder covariate size.
+
+        Returns:
+            int: size of time-dependent covariates used by the encoder
+        """
+        return len(set(self.hparams.time_varying_reals_encoder) - set(self.target_names)) + sum(
             self.embeddings.output_size[name] for name in self.hparams.time_varying_categoricals_encoder
         )
 
@@ -239,16 +251,19 @@ def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
             Dict[str, torch.Tensor]: output of model
         """
         # covariates
-        if self.covariate_size > 0:
+        if self.encoder_covariate_size > 0:
             encoder_features = self.extract_features(x, self.embeddings, period="encoder")
             encoder_x_t = torch.concat(
                 [encoder_features[name] for name in self.encoder_variables if name not in self.target_names],
                 dim=2,
             )
+        else:
+            encoder_x_t = None
+
+        if self.decoder_covariate_size > 0:
             decoder_features = self.extract_features(x, self.embeddings, period="decoder")
             decoder_x_t = torch.concat([decoder_features[name] for name in self.decoder_variables], dim=2)
         else:
-            encoder_x_t = None
             decoder_x_t = None
 
         # statics
diff --git a/pytorch_forecasting/models/nhits/sub_modules.py b/pytorch_forecasting/models/nhits/sub_modules.py
index a97f32a0..5a36f79b 100644
--- a/pytorch_forecasting/models/nhits/sub_modules.py
+++ b/pytorch_forecasting/models/nhits/sub_modules.py
@@ -92,7 +92,8 @@ def __init__(
         context_length: int,
         prediction_length: int,
         output_size: int,
-        covariate_size: int,
+        encoder_covariate_size: int,
+        decoder_covariate_size: int,
         static_size: int,
         static_hidden_size: int,
         n_theta: int,
@@ -120,14 +121,16 @@ def __init__(
         self.prediction_length = prediction_length
         self.static_size = static_size
         self.static_hidden_size = static_hidden_size
-        self.covariate_size = covariate_size
+        self.encoder_covariate_size = encoder_covariate_size
+        self.decoder_covariate_size = decoder_covariate_size
         self.pooling_sizes = pooling_sizes
         self.batch_normalization = batch_normalization
         self.dropout = dropout
 
         self.hidden_size = [
             self.context_length_pooled * len(self.output_size)
-            + (self.context_length + self.prediction_length) * self.covariate_size
+            + self.context_length * self.encoder_covariate_size
+            + self.prediction_length * self.decoder_covariate_size
             + self.static_hidden_size
         ] + hidden_size
 
@@ -174,11 +177,19 @@ def forward(
         encoder_y = self.pooling_layer(encoder_y)
         encoder_y = encoder_y.transpose(1, 2).reshape(batch_size, -1)
 
-        if self.covariate_size > 0:
+        if self.encoder_covariate_size > 0:
             encoder_y = torch.cat(
                 (
                     encoder_y,
                     encoder_x_t.reshape(batch_size, -1),
+                ),
+                1,
+            )
+
+        if self.decoder_covariate_size > 0:
+            encoder_y = torch.cat(
+                (
+                    encoder_y,
                     decoder_x_t.reshape(batch_size, -1),
                 ),
                 1,
@@ -211,7 +222,8 @@ def __init__(
         prediction_length,
         output_size: int,
         static_size,
-        covariate_size,
+        encoder_covariate_size,
+        decoder_covariate_size,
         static_hidden_size,
         n_blocks: list,
         n_layers: list,
@@ -239,7 +251,8 @@ def __init__(
             context_length=context_length,
             prediction_length=prediction_length,
             output_size=output_size,
-            covariate_size=covariate_size,
+            encoder_covariate_size=encoder_covariate_size,
+            decoder_covariate_size=decoder_covariate_size,
             static_size=static_size,
             static_hidden_size=static_hidden_size,
             n_layers=n_layers,
@@ -262,7 +275,8 @@ def create_stack(
         context_length,
         prediction_length,
         output_size,
-        covariate_size,
+        encoder_covariate_size,
+        decoder_covariate_size,
         static_size,
         static_hidden_size,
         n_layers,
@@ -303,7 +317,8 @@ def create_stack(
                         context_length=context_length,
                         prediction_length=prediction_length,
                         output_size=output_size,
-                        covariate_size=covariate_size,
+                        encoder_covariate_size=encoder_covariate_size,
+                        decoder_covariate_size=decoder_covariate_size,
                         static_size=static_size,
                         static_hidden_size=static_hidden_size,
                         n_theta=n_theta,

From bf3530749f637ac0150fcf0aef81c6ad3e562740 Mon Sep 17 00:00:00 2001
From: Ben Steel <bendavidsteel@gmail.com>
Date: Wed, 13 Sep 2023 20:19:11 +0000
Subject: [PATCH 2/3] Added specific tests for different sized encoder and
 decoder in nhits

---
 tests/test_models/conftest.py   | 29 +++++++++++++++++++++++++++++
 tests/test_models/test_nhits.py |  4 ++++
 2 files changed, 33 insertions(+)

diff --git a/tests/test_models/conftest.py b/tests/test_models/conftest.py
index 40614282..f7ca51d8 100644
--- a/tests/test_models/conftest.py
+++ b/tests/test_models/conftest.py
@@ -130,6 +130,35 @@ def make_dataloaders(data_with_covariates, **kwargs):
 def multiple_dataloaders_with_covariates(data_with_covariates, request):
     return make_dataloaders(data_with_covariates, **request.param)
 
+@pytest.fixture(scope="session")
+def dataloaders_with_different_encoder_decoder_length(data_with_covariates):
+    return make_dataloaders(
+        data_with_covariates.copy(),
+        target="target",
+        time_varying_known_categoricals=["special_days", "month"],
+        variable_groups=dict(
+            special_days=[
+                "easter_day",
+                "good_friday",
+                "new_year",
+                "christmas",
+                "labor_day",
+                "independence_day",
+                "revolution_day_memorial",
+                "regional_games",
+                "fifa_u_17_world_cup",
+                "football_gold_cup",
+                "beer_capital",
+                "music_fest",
+            ]
+        ),
+        time_varying_known_reals=["time_idx", "price_regular", "price_actual", "discount", "discount_in_percent"],
+        time_varying_unknown_categoricals=[],
+        time_varying_unknown_reals=["target", "volume", "log_volume", "industry_volume", "soda_volume", "avg_max_temp"],
+        static_categoricals=["agency"],
+        add_relative_time_idx=False,
+        target_normalizer=GroupNormalizer(groups=["agency", "sku"], center=False),
+    )
 
 @pytest.fixture(scope="session")
 def dataloaders_with_covariates(data_with_covariates):
diff --git a/tests/test_models/test_nhits.py b/tests/test_models/test_nhits.py
index 70af906c..854bddfd 100644
--- a/tests/test_models/test_nhits.py
+++ b/tests/test_models/test_nhits.py
@@ -69,6 +69,7 @@ def _integration(dataloader, tmp_path, gpus, **kwargs):
     "dataloader",
     [
         "with_covariates",
+        "different_encoder_decoder_size",
         "fixed_window_without_covariates",
         "multi_target",
         "quantiles",
@@ -78,6 +79,7 @@ def _integration(dataloader, tmp_path, gpus, **kwargs):
 )
 def test_integration(
     dataloaders_with_covariates,
+    dataloaders_with_different_encoder_decoder_length,
     dataloaders_fixed_window_without_covariates,
     dataloaders_multi_target,
     tmp_path,
@@ -88,6 +90,8 @@ def test_integration(
     if dataloader == "with_covariates":
         dataloader = dataloaders_with_covariates
         kwargs["backcast_loss_ratio"] = 0.5
+    elif dataloader == "different_encoder_decoder_size":
+        dataloader = dataloaders_with_different_encoder_decoder_length
     elif dataloader == "fixed_window_without_covariates":
         dataloader = dataloaders_fixed_window_without_covariates
     elif dataloader == "multi_target":

From 2878a76ccbba3b80e88d7b300ba073b02c44955b Mon Sep 17 00:00:00 2001
From: Ben Steel <bendavidsteel@gmail.com>
Date: Thu, 28 Sep 2023 12:34:21 -0400
Subject: [PATCH 3/3] Added newlines to pass linter check

---
 tests/test_models/conftest.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_models/conftest.py b/tests/test_models/conftest.py
index f7ca51d8..97e94dd0 100644
--- a/tests/test_models/conftest.py
+++ b/tests/test_models/conftest.py
@@ -130,6 +130,7 @@ def make_dataloaders(data_with_covariates, **kwargs):
 def multiple_dataloaders_with_covariates(data_with_covariates, request):
     return make_dataloaders(data_with_covariates, **request.param)
 
+
 @pytest.fixture(scope="session")
 def dataloaders_with_different_encoder_decoder_length(data_with_covariates):
     return make_dataloaders(
@@ -160,6 +161,7 @@ def dataloaders_with_different_encoder_decoder_length(data_with_covariates):
         target_normalizer=GroupNormalizer(groups=["agency", "sku"], center=False),
     )
 
+
 @pytest.fixture(scope="session")
 def dataloaders_with_covariates(data_with_covariates):
     return make_dataloaders(