From 5e96e9d64bc4bd62927e74966b1e784191a7b9e1 Mon Sep 17 00:00:00 2001 From: Ben Steel Date: Fri, 17 Feb 2023 23:24:10 +0000 Subject: [PATCH 1/3] Added fixes for weight size error calc --- pytorch_forecasting/models/base_model.py | 6 +++- pytorch_forecasting/models/nhits/__init__.py | 27 ++++++++++++---- .../models/nhits/sub_modules.py | 31 ++++++++++++++----- 3 files changed, 49 insertions(+), 15 deletions(-) diff --git a/pytorch_forecasting/models/base_model.py b/pytorch_forecasting/models/base_model.py index b3d39130..95265dd4 100644 --- a/pytorch_forecasting/models/base_model.py +++ b/pytorch_forecasting/models/base_model.py @@ -206,6 +206,7 @@ def forward(self, x): def __init__( self, + dataset_parameters: Dict[str, Any] = None, log_interval: Union[int, float] = -1, log_val_interval: Union[int, float] = None, learning_rate: Union[float, List[float]] = 1e-3, @@ -279,6 +280,8 @@ def __init__( self.output_transformer = output_transformer if not hasattr(self, "optimizer"): # callables are removed from hyperparameters, so better to save them self.optimizer = self.hparams.optimizer + if not hasattr(self, "dataset_parameters"): + self.dataset_parameters = dataset_parameters # delete everything from hparams that cannot be serialized with yaml.dump # which is particularly important for tensorboard logging @@ -993,8 +996,9 @@ def from_dataset(cls, dataset: TimeSeriesDataSet, **kwargs) -> LightningModule: """ if "output_transformer" not in kwargs: kwargs["output_transformer"] = dataset.target_normalizer + if "dataset_parameters" not in kwargs: + kwargs["dataset_parameters"] = dataset.get_parameters() net = cls(**kwargs) - net.dataset_parameters = dataset.get_parameters() if dataset.multi_target: assert isinstance( net.loss, MultiLoss diff --git a/pytorch_forecasting/models/nhits/__init__.py b/pytorch_forecasting/models/nhits/__init__.py index 477a362d..a7384a70 100644 --- a/pytorch_forecasting/models/nhits/__init__.py +++ b/pytorch_forecasting/models/nhits/__init__.py @@ -179,7 +179,8 @@ def __init__( prediction_length=self.hparams.prediction_length, output_size=to_list(output_size), static_size=self.static_size, - covariate_size=self.covariate_size, + encoder_covariate_size=self.encoder_covariate_size, + decoder_covariate_size=self.decoder_covariate_size, static_hidden_size=self.hparams.static_hidden_size, n_blocks=self.hparams.n_blocks, n_layers=self.hparams.n_layers, @@ -197,13 +198,24 @@ def __init__( ) @property - def covariate_size(self) -> int: - """Covariate size. + def decoder_covariate_size(self) -> int: + """Decoder covariates size. Returns: - int: size of time-dependent covariates + int: size of time-dependent covariates used by the decoder """ return len(set(self.hparams.time_varying_reals_decoder) - set(self.target_names)) + sum( + self.embeddings.output_size[name] for name in self.hparams.time_varying_categoricals_decoder + ) + + @property + def encoder_covariate_size(self) -> int: + """Encoder covariate size. + + Returns: + int: size of time-dependent covariates used by the encoder + """ + return len(set(self.hparams.time_varying_reals_encoder) - set(self.target_names)) + sum( self.embeddings.output_size[name] for name in self.hparams.time_varying_categoricals_encoder ) @@ -239,16 +251,19 @@ def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: Dict[str, torch.Tensor]: output of model """ # covariates - if self.covariate_size > 0: + if self.encoder_covariate_size > 0: encoder_features = self.extract_features(x, self.embeddings, period="encoder") encoder_x_t = torch.concat( [encoder_features[name] for name in self.encoder_variables if name not in self.target_names], dim=2, ) + else: + encoder_x_t = None + + if self.decoder_covariate_size > 0: decoder_features = self.extract_features(x, self.embeddings, period="decoder") decoder_x_t = torch.concat([decoder_features[name] for name in self.decoder_variables], dim=2) else: - encoder_x_t = None decoder_x_t = None # statics diff --git a/pytorch_forecasting/models/nhits/sub_modules.py b/pytorch_forecasting/models/nhits/sub_modules.py index a97f32a0..5a36f79b 100644 --- a/pytorch_forecasting/models/nhits/sub_modules.py +++ b/pytorch_forecasting/models/nhits/sub_modules.py @@ -92,7 +92,8 @@ def __init__( context_length: int, prediction_length: int, output_size: int, - covariate_size: int, + encoder_covariate_size: int, + decoder_covariate_size: int, static_size: int, static_hidden_size: int, n_theta: int, @@ -120,14 +121,16 @@ def __init__( self.prediction_length = prediction_length self.static_size = static_size self.static_hidden_size = static_hidden_size - self.covariate_size = covariate_size + self.encoder_covariate_size = encoder_covariate_size + self.decoder_covariate_size = decoder_covariate_size self.pooling_sizes = pooling_sizes self.batch_normalization = batch_normalization self.dropout = dropout self.hidden_size = [ self.context_length_pooled * len(self.output_size) - + (self.context_length + self.prediction_length) * self.covariate_size + + self.context_length * self.encoder_covariate_size + + self.prediction_length * self.decoder_covariate_size + self.static_hidden_size ] + hidden_size @@ -174,11 +177,19 @@ def forward( encoder_y = self.pooling_layer(encoder_y) encoder_y = encoder_y.transpose(1, 2).reshape(batch_size, -1) - if self.covariate_size > 0: + if self.encoder_covariate_size > 0: encoder_y = torch.cat( ( encoder_y, encoder_x_t.reshape(batch_size, -1), + ), + 1, + ) + + if self.decoder_covariate_size > 0: + encoder_y = torch.cat( + ( + encoder_y, decoder_x_t.reshape(batch_size, -1), ), 1, @@ -211,7 +222,8 @@ def __init__( prediction_length, output_size: int, static_size, - covariate_size, + encoder_covariate_size, + decoder_covariate_size, static_hidden_size, n_blocks: list, n_layers: list, @@ -239,7 +251,8 @@ def __init__( context_length=context_length, prediction_length=prediction_length, output_size=output_size, - covariate_size=covariate_size, + encoder_covariate_size=encoder_covariate_size, + decoder_covariate_size=decoder_covariate_size, static_size=static_size, static_hidden_size=static_hidden_size, n_layers=n_layers, @@ -262,7 +275,8 @@ def create_stack( context_length, prediction_length, output_size, - covariate_size, + encoder_covariate_size, + decoder_covariate_size, static_size, static_hidden_size, n_layers, @@ -303,7 +317,8 @@ def create_stack( context_length=context_length, prediction_length=prediction_length, output_size=output_size, - covariate_size=covariate_size, + encoder_covariate_size=encoder_covariate_size, + decoder_covariate_size=decoder_covariate_size, static_size=static_size, static_hidden_size=static_hidden_size, n_theta=n_theta, From bf3530749f637ac0150fcf0aef81c6ad3e562740 Mon Sep 17 00:00:00 2001 From: Ben Steel Date: Wed, 13 Sep 2023 20:19:11 +0000 Subject: [PATCH 2/3] Added specific tests for different sized encoder and decoder in nhits --- tests/test_models/conftest.py | 29 +++++++++++++++++++++++++++++ tests/test_models/test_nhits.py | 4 ++++ 2 files changed, 33 insertions(+) diff --git a/tests/test_models/conftest.py b/tests/test_models/conftest.py index 40614282..f7ca51d8 100644 --- a/tests/test_models/conftest.py +++ b/tests/test_models/conftest.py @@ -130,6 +130,35 @@ def make_dataloaders(data_with_covariates, **kwargs): def multiple_dataloaders_with_covariates(data_with_covariates, request): return make_dataloaders(data_with_covariates, **request.param) +@pytest.fixture(scope="session") +def dataloaders_with_different_encoder_decoder_length(data_with_covariates): + return make_dataloaders( + data_with_covariates.copy(), + target="target", + time_varying_known_categoricals=["special_days", "month"], + variable_groups=dict( + special_days=[ + "easter_day", + "good_friday", + "new_year", + "christmas", + "labor_day", + "independence_day", + "revolution_day_memorial", + "regional_games", + "fifa_u_17_world_cup", + "football_gold_cup", + "beer_capital", + "music_fest", + ] + ), + time_varying_known_reals=["time_idx", "price_regular", "price_actual", "discount", "discount_in_percent"], + time_varying_unknown_categoricals=[], + time_varying_unknown_reals=["target", "volume", "log_volume", "industry_volume", "soda_volume", "avg_max_temp"], + static_categoricals=["agency"], + add_relative_time_idx=False, + target_normalizer=GroupNormalizer(groups=["agency", "sku"], center=False), + ) @pytest.fixture(scope="session") def dataloaders_with_covariates(data_with_covariates): diff --git a/tests/test_models/test_nhits.py b/tests/test_models/test_nhits.py index 70af906c..854bddfd 100644 --- a/tests/test_models/test_nhits.py +++ b/tests/test_models/test_nhits.py @@ -69,6 +69,7 @@ def _integration(dataloader, tmp_path, gpus, **kwargs): "dataloader", [ "with_covariates", + "different_encoder_decoder_size", "fixed_window_without_covariates", "multi_target", "quantiles", @@ -78,6 +79,7 @@ def _integration(dataloader, tmp_path, gpus, **kwargs): ) def test_integration( dataloaders_with_covariates, + dataloaders_with_different_encoder_decoder_length, dataloaders_fixed_window_without_covariates, dataloaders_multi_target, tmp_path, @@ -88,6 +90,8 @@ def test_integration( if dataloader == "with_covariates": dataloader = dataloaders_with_covariates kwargs["backcast_loss_ratio"] = 0.5 + elif dataloader == "different_encoder_decoder_size": + dataloader = dataloaders_with_different_encoder_decoder_length elif dataloader == "fixed_window_without_covariates": dataloader = dataloaders_fixed_window_without_covariates elif dataloader == "multi_target": From 2878a76ccbba3b80e88d7b300ba073b02c44955b Mon Sep 17 00:00:00 2001 From: Ben Steel Date: Thu, 28 Sep 2023 12:34:21 -0400 Subject: [PATCH 3/3] Added newlines to pass linter check --- tests/test_models/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_models/conftest.py b/tests/test_models/conftest.py index f7ca51d8..97e94dd0 100644 --- a/tests/test_models/conftest.py +++ b/tests/test_models/conftest.py @@ -130,6 +130,7 @@ def make_dataloaders(data_with_covariates, **kwargs): def multiple_dataloaders_with_covariates(data_with_covariates, request): return make_dataloaders(data_with_covariates, **request.param) + @pytest.fixture(scope="session") def dataloaders_with_different_encoder_decoder_length(data_with_covariates): return make_dataloaders( @@ -160,6 +161,7 @@ def dataloaders_with_different_encoder_decoder_length(data_with_covariates): target_normalizer=GroupNormalizer(groups=["agency", "sku"], center=False), ) + @pytest.fixture(scope="session") def dataloaders_with_covariates(data_with_covariates): return make_dataloaders(