From 8a01b600af3b22ddb62df6ab0e41117facf9d36d Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 23 Mar 2023 13:54:32 +0300 Subject: [PATCH 01/18] added predict method --- etna/models/tbats.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/etna/models/tbats.py b/etna/models/tbats.py index 32977a2a9..696eeb82d 100644 --- a/etna/models/tbats.py +++ b/etna/models/tbats.py @@ -65,7 +65,24 @@ def forecast(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Itera return y_pred def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Iterable[float]) -> pd.DataFrame: - raise NotImplementedError("Method predict isn't currently implemented!") + if self._fitted_model is None or self._freq is None: + raise ValueError("Model is not fitted! Fit the model before calling predict method!") + + y_pred = pd.DataFrame() + y_pred["target"] = self._fitted_model.y_hat + + if prediction_interval: + for quantile in quantiles: + confidence_intervals = self._fitted_model._calculate_confidence_intervals( + y_pred["target"].values, quantile + ) + + if quantile < 1 / 2: + y_pred[f"target_{quantile:.4g}"] = confidence_intervals["lower_bound"] + else: + y_pred[f"target_{quantile:.4g}"] = confidence_intervals["upper_bound"] + + return y_pred def get_model(self) -> Model: """Get internal :py:class:`tbats.tbats.Model` model that was fitted inside etna class. From 5c59af2b542fa046be871c2c7997b2acd1e152f1 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 23 Mar 2023 13:55:04 +0300 Subject: [PATCH 02/18] fixed components names --- etna/models/tbats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etna/models/tbats.py b/etna/models/tbats.py index 696eeb82d..cf3325f91 100644 --- a/etna/models/tbats.py +++ b/etna/models/tbats.py @@ -240,7 +240,7 @@ def _process_components(self, raw_components: np.ndarray) -> pd.DataFrame: raw_components[:, component_idx : component_idx + p + q], axis=1 ) - return pd.DataFrame(data=named_components) + return pd.DataFrame(data=named_components).add_prefix("target_component_") class BATSModel( From 0e65cccebb3c2b741a3bd62806e414ac6ea13a39 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 23 Mar 2023 13:57:35 +0300 Subject: [PATCH 03/18] components rescaling --- etna/models/tbats.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/etna/models/tbats.py b/etna/models/tbats.py index cf3325f91..a1ee6cea1 100644 --- a/etna/models/tbats.py +++ b/etna/models/tbats.py @@ -174,6 +174,16 @@ def _check_components(self): if len(not_fitted_components) > 0: warn(f"Following components are not fitted: {', '.join(not_fitted_components)}!") + def _rescale_components(self, raw_components: np.ndarray) -> np.ndarray: + """Rescale components when Box-Cox transform used.""" + if self._fitted_model is None: + raise ValueError("Fitted model is not set!") + + transformed_pred = np.sum(raw_components, axis=1) + pred = self._fitted_model._inv_boxcox(transformed_pred) + components = raw_components * pred[..., np.newaxis] / transformed_pred[..., np.newaxis] + return components + def _decompose_forecast(self, horizon: int) -> np.ndarray: """Estimate raw forecast components.""" if self._fitted_model is None: From bd4b847f6a63df9c5d254d00ed359f708c90d644 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 23 Mar 2023 13:59:23 +0300 Subject: [PATCH 04/18] prediction decomposition --- etna/models/tbats.py | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/etna/models/tbats.py b/etna/models/tbats.py index a1ee6cea1..acfe0851c 100644 --- a/etna/models/tbats.py +++ b/etna/models/tbats.py @@ -131,7 +131,15 @@ def predict_components(self, df: pd.DataFrame) -> pd.DataFrame: : dataframe with prediction components """ - raise NotImplementedError("Prediction decomposition isn't currently implemented!") + if self._fitted_model is None or self._freq is None: + raise ValueError("Model is not fitted! Fit the model before estimating forecast components!") + + self._check_components() + + raw_components = self._decompose_predict() + components = self._process_components(raw_components=raw_components) + + return components def _get_steps_to_forecast(self, df: pd.DataFrame) -> int: if self._freq is None: @@ -202,9 +210,35 @@ def _decompose_forecast(self, horizon: int) -> np.ndarray: raw_components = np.stack(components, axis=0) if model.params.components.use_box_cox: - transformed_pred = np.sum(raw_components, axis=1) - pred = model._inv_boxcox(transformed_pred) - raw_components = raw_components * pred[..., np.newaxis] / transformed_pred[..., np.newaxis] + raw_components = self._rescale_components(raw_components) + + return raw_components + + def _decompose_predict(self) -> np.ndarray: + """Estimate raw prediction components.""" + if self._fitted_model is None: + raise ValueError("Fitted model is not set!") + + model = self._fitted_model + state_matrix = model.matrix.make_F_matrix() + component_weights = model.matrix.make_w_vector() + error_weights = model.matrix.make_g_vector() + + target = model._boxcox(model.y) + + steps = len(target) + state = model.params.x0 + + components = [] + for t in range(steps): + components.append(component_weights * state) + error = target[t] - component_weights @ state + state = state_matrix @ state + error_weights * error + + raw_components = np.stack(components, axis=0) + + if model.params.components.use_box_cox: + raw_components = self._rescale_components(raw_components) return raw_components From d3700bdbd3b050554c19c822ff6dcc80310ea1da Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 23 Mar 2023 14:00:58 +0300 Subject: [PATCH 05/18] prediction tests --- tests/test_models/test_tbats.py | 54 +++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/tests/test_models/test_tbats.py b/tests/test_models/test_tbats.py index f4e06463d..6c88a1a64 100644 --- a/tests/test_models/test_tbats.py +++ b/tests/test_models/test_tbats.py @@ -1,3 +1,5 @@ +from copy import deepcopy + import numpy as np import pandas as pd import pytest @@ -121,16 +123,19 @@ def test_repr(model_class, model_class_repr): @pytest.mark.parametrize("model", (TBATSModel(), BATSModel())) -def test_not_fitted(model, linear_segments_ts_unique): +@pytest.mark.parametrize("method", ("forecast", "predict")) +def test_not_fitted(model, method, linear_segments_ts_unique): train, test = linear_segments_ts_unique to_forecast = train.make_future(3) + + method_to_call = getattr(model, method) with pytest.raises(ValueError, match="model is not fitted!"): - model.forecast(to_forecast) + method_to_call(ts=to_forecast) @pytest.mark.long_2 @pytest.mark.parametrize("model", [TBATSModel(), BATSModel()]) -def test_format(model, new_format_df): +def test_forecast_format(model, new_format_df): df = new_format_df ts = TSDataset(df, "1d") lags = LagTransform(lags=[3, 4, 5], in_column="target") @@ -144,22 +149,51 @@ def test_format(model, new_format_df): @pytest.mark.long_2 @pytest.mark.parametrize("model", [TBATSModel(), BATSModel()]) -def test_dummy(model, sinusoid_ts): +def test_predict_format(model, new_format_df): + df = new_format_df + ts = TSDataset(df, "1d") + lags = LagTransform(lags=[3], in_column="target") + ts.fit_transform([lags]) + model.fit(ts) + pred = model.predict(ts) + assert not pred[:, :, "target"].isnull().values.any() + + +@pytest.mark.long_2 +@pytest.mark.parametrize("model", [TBATSModel(), BATSModel()]) +@pytest.mark.parametrize("method, use_future", (("predict", False), ("forecast", True))) +def test_dummy(model, method, use_future, sinusoid_ts): train, test = sinusoid_ts model.fit(train) - future_ts = train.make_future(14) - y_pred = model.forecast(future_ts) + + if use_future: + pred_ts = train.make_future(14) + y_true = test + else: + pred_ts = deepcopy(train) + y_true = train + + method_to_call = getattr(model, method) + y_pred = method_to_call(ts=pred_ts) + metric = MAE("macro") - value_metric = metric(y_pred, test) + value_metric = metric(y_true, y_pred) assert value_metric < 0.33 @pytest.mark.long_2 @pytest.mark.parametrize("model", [TBATSModel(), BATSModel()]) -def test_prediction_interval(model, example_tsds): +@pytest.mark.parametrize("method, use_future", (("predict", False), ("forecast", True))) +def test_prediction_interval(model, method, use_future, example_tsds): model.fit(example_tsds) - future_ts = example_tsds.make_future(3) - forecast = model.forecast(future_ts, prediction_interval=True, quantiles=[0.025, 0.975]) + if use_future: + pred_ts = example_tsds.make_future(3) + else: + pred_ts = deepcopy(example_tsds) + + method_to_call = getattr(model, method) + forecast = method_to_call(ts=pred_ts, prediction_interval=True, quantiles=[0.025, 0.975]) + for segment in forecast.segments: segment_slice = forecast[:, segment, :][segment] assert {"target_0.025", "target_0.975", "target"}.issubset(segment_slice.columns) From 8fd1e8d065cd75dc403c6807ff8d00a6422e8be6 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 23 Mar 2023 14:01:51 +0300 Subject: [PATCH 06/18] updated tests --- tests/test_models/test_tbats.py | 120 ++++++++++++++++++++++++-------- 1 file changed, 92 insertions(+), 28 deletions(-) diff --git a/tests/test_models/test_tbats.py b/tests/test_models/test_tbats.py index 6c88a1a64..f0c135f7b 100644 --- a/tests/test_models/test_tbats.py +++ b/tests/test_models/test_tbats.py @@ -206,18 +206,13 @@ def test_save_load(model, example_tsds): assert_model_equals_loaded_original(model=model, ts=example_tsds, transforms=[], horizon=3) -def test_forecast_decompose_not_fitted(small_periodic_ts): +@pytest.mark.parametrize("method", ("predict_components", "forecast_components")) +def test_decompose_not_fitted(small_periodic_ts, method): model = _TBATSAdapter(model=BATS()) + method_to_call = getattr(model, method) with pytest.raises(ValueError, match="Model is not fitted!"): - model.forecast_components(df=small_periodic_ts.df) - - -def test_predict_components_not_implemented(small_periodic_ts): - model = _TBATSAdapter(model=BATS()) - - with pytest.raises(NotImplementedError, match="Prediction decomposition isn't currently implemented!"): - model.predict_components(df=small_periodic_ts.df) + method_to_call(df=small_periodic_ts.df) @pytest.mark.parametrize( @@ -237,6 +232,23 @@ def test_decompose_forecast_output_format(small_periodic_ts, estimator): assert components.shape[0] == horizon +@pytest.mark.parametrize( + "estimator", + ( + BATSModel, + TBATSModel, + ), +) +def test_decompose_predict_output_format(small_periodic_ts, estimator): + model = estimator() + model.fit(small_periodic_ts) + + target = small_periodic_ts[:, "segment_1", "target"].values + components = model._models["segment_1"]._decompose_predict() + assert isinstance(components, np.ndarray) + assert components.shape[0] == target.shape[0] + + @pytest.mark.parametrize( "estimator", ( @@ -264,24 +276,44 @@ def test_named_components_output_format(small_periodic_ts, estimator): ( BATSModel, {"use_box_cox": False, "use_trend": True, "use_arma_errors": True, "seasonal_periods": [7, 14]}, - {"local_level", "trend", "arma(p=1,q=1)", "seasonal(s=7)", "seasonal(s=14)"}, + { + "target_component_local_level", + "target_component_trend", + "target_component_arma(p=1,q=1)", + "target_component_seasonal(s=7)", + "target_component_seasonal(s=14)", + }, ), ( TBATSModel, {"use_box_cox": False, "use_trend": True, "use_arma_errors": False, "seasonal_periods": [7, 14]}, - {"local_level", "trend", "seasonal(s=7.0)", "seasonal(s=14.0)"}, + { + "target_component_local_level", + "target_component_trend", + "target_component_seasonal(s=7.0)", + "target_component_seasonal(s=14.0)", + }, ), ), ) -def test_components_names(periodic_ts, estimator, params, components_names): +@pytest.mark.parametrize( + "method,use_future", + ( + ("predict_components", False), + ("forecast_components", True), + ), +) +def test_components_names(periodic_ts, estimator, params, components_names, method, use_future): train, test = periodic_ts model = estimator(**params) model.fit(train) - future = train.make_future(3).to_pandas(flatten=True) + pred_ts = train.make_future(3) if use_future else train for segment in test.columns.get_level_values("segment"): - components_df = model._models[segment].forecast_components(df=future) + pred_df = pred_ts[:, segment, :].droplevel("segment", axis=1).reset_index() + method_to_call = getattr(model._models[segment], method) + components_df = method_to_call(df=pred_df) assert set(components_df.columns) == components_names @@ -292,16 +324,23 @@ def test_components_names(periodic_ts, estimator, params, components_names): TBATSModel, ), ) -def test_seasonal_components_not_fitted(small_periodic_ts, estimator): +@pytest.mark.parametrize("method,use_future", (("predict_components", False), ("forecast_components", True))) +def test_seasonal_components_not_fitted(small_periodic_ts, estimator, method, use_future): model = estimator(seasonal_periods=[7, 14], use_arma_errors=False) model.fit(small_periodic_ts) - future = small_periodic_ts.make_future(3).to_pandas(flatten=True) segment_model = model._models["segment_1"] segment_model._fitted_model.params.components.seasonal_periods = [] + pred_ts = small_periodic_ts + if use_future: + pred_ts = pred_ts.make_future(3) + + pred_df = pred_ts[:, "segment_1", :].droplevel("segment", axis=1).reset_index() + + method_to_call = getattr(segment_model, method) with pytest.warns(Warning, match=f"Following components are not fitted: Seasonal!"): - segment_model.forecast_components(df=future) + method_to_call(df=pred_df) @pytest.mark.parametrize( @@ -311,16 +350,23 @@ def test_seasonal_components_not_fitted(small_periodic_ts, estimator): TBATSModel, ), ) -def test_arma_component_not_fitted(small_periodic_ts, estimator): +@pytest.mark.parametrize("method,use_future", (("predict_components", False), ("forecast_components", True))) +def test_arma_component_not_fitted(small_periodic_ts, estimator, method, use_future): model = estimator(use_arma_errors=True, seasonal_periods=[]) model.fit(small_periodic_ts) - future = small_periodic_ts.make_future(3).to_pandas(flatten=True) segment_model = model._models["segment_1"] segment_model._fitted_model.params.components.use_arma_errors = False + pred_ts = small_periodic_ts + if use_future: + pred_ts = pred_ts.make_future(3) + + pred_df = pred_ts[:, "segment_1", :].droplevel("segment", axis=1).reset_index() + + method_to_call = getattr(segment_model, method) with pytest.warns(Warning, match=f"Following components are not fitted: ARMA!"): - segment_model.forecast_components(df=future) + method_to_call(df=pred_df) @pytest.mark.parametrize( @@ -330,17 +376,24 @@ def test_arma_component_not_fitted(small_periodic_ts, estimator): TBATSModel, ), ) -def test_arma_w_seasonal_components_not_fitted(small_periodic_ts, estimator): +@pytest.mark.parametrize("method,use_future", (("predict_components", False), ("forecast_components", True))) +def test_arma_w_seasonal_components_not_fitted(small_periodic_ts, estimator, method, use_future): model = estimator(use_arma_errors=True, seasonal_periods=[2, 3]) model.fit(small_periodic_ts) - future = small_periodic_ts.make_future(3).to_pandas(flatten=True) segment_model = model._models["segment_1"] segment_model._fitted_model.params.components.use_arma_errors = False segment_model._fitted_model.params.components.seasonal_periods = [] + pred_ts = small_periodic_ts + if use_future: + pred_ts = pred_ts.make_future(3) + + pred_df = pred_ts[:, "segment_1", :].droplevel("segment", axis=1).reset_index() + + method_to_call = getattr(segment_model, method) with pytest.warns(Warning, match=f"Following components are not fitted: Seasonal, ARMA!"): - segment_model.forecast_components(df=future) + method_to_call(df=pred_df) @pytest.mark.long_1 @@ -376,16 +429,27 @@ def test_arma_w_seasonal_components_not_fitted(small_periodic_ts, estimator): }, ), ) -def test_forecast_decompose_sum_up_to_target(periodic_ts, estimator, params): +@pytest.mark.parametrize("method,use_future", (("predict_components", False), ("forecast_components", True))) +def test_forecast_decompose_sum_up_to_target(periodic_ts, estimator, params, method, use_future): train, test = periodic_ts - horizon = 14 model = estimator(**params) model.fit(train) - future_ts = train.make_future(horizon) - y_pred = model.forecast(future_ts) + + if use_future: + pred_ts = train.make_future(future_steps=14) + y_pred = model.forecast(pred_ts) + + else: + pred_ts = deepcopy(train) + y_pred = model.predict(train) for segment in y_pred.columns.get_level_values("segment"): - components = model._models[segment].forecast_components(df=future_ts.to_pandas(flatten=True)) + pred_df = pred_ts[:, segment, :].droplevel("segment", axis=1).reset_index() + + method_to_call = getattr(model._models[segment], method) + + components = method_to_call(df=pred_df) + y_hat_pred = np.sum(components.values, axis=1) np.testing.assert_allclose(y_hat_pred, y_pred[:, segment, "target"].values) From d438ea71c03bbf5efd3d3c87ab818df78fa16ce0 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 23 Mar 2023 17:40:43 +0300 Subject: [PATCH 07/18] optimized --- etna/models/tbats.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/etna/models/tbats.py b/etna/models/tbats.py index acfe0851c..b731ea9a6 100644 --- a/etna/models/tbats.py +++ b/etna/models/tbats.py @@ -224,16 +224,14 @@ def _decompose_predict(self) -> np.ndarray: component_weights = model.matrix.make_w_vector() error_weights = model.matrix.make_g_vector() - target = model._boxcox(model.y) - - steps = len(target) + steps = len(model.y) state = model.params.x0 + weighted_error = model.resid_boxcox[..., np.newaxis] * error_weights[np.newaxis] components = [] for t in range(steps): components.append(component_weights * state) - error = target[t] - component_weights @ state - state = state_matrix @ state + error_weights * error + state = state_matrix @ state + weighted_error[t] raw_components = np.stack(components, axis=0) From 0748bfa3d78f699fa0420c815bc8217b44ee7798 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Thu, 23 Mar 2023 17:51:28 +0300 Subject: [PATCH 08/18] updated `CHANGELOG.md` --- CHANGELOG.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb32980b0..e508b1ab7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,15 +19,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `ChangePointsLevelTransform` and base classes `PerIntervalModel`, `BaseChangePointsModelAdapter` for per-interval transforms ([#998](https://github.com/tinkoff-ai/etna/pull/998)) - Method `set_params` to change parameters of ETNA objects ([#1102](https://github.com/tinkoff-ai/etna/pull/1102)) - Function `plot_forecast_decomposition` ([#1129](https://github.com/tinkoff-ai/etna/pull/1129)) -- Method `forecast_components` for forecast decomposition in `_TBATSAdapter` ([#1125](https://github.com/tinkoff-ai/etna/issues/1125)) -- Methods `forecast_components` and `predict_components` for forecast decomposition in `_CatBoostAdapter` ([#1135](https://github.com/tinkoff-ai/etna/issues/1135)) -- Methods `forecast_components` and `predict_components` for forecast decomposition in `_HoltWintersAdapter ` ([#1146](https://github.com/tinkoff-ai/etna/issues/1146)) -- Methods `predict_components` for forecast decomposition in `_ProphetAdapter` ([#1161](https://github.com/tinkoff-ai/etna/issues/1161)) +- Method `forecast_components` for forecast decomposition in `_TBATSAdapter` ([#1133](https://github.com/tinkoff-ai/etna/pull/1133)) +- Methods `forecast_components` and `predict_components` for forecast decomposition in `_CatBoostAdapter` ([#1148](https://github.com/tinkoff-ai/etna/pull/1148)) +- Methods `forecast_components` and `predict_components` for forecast decomposition in `_HoltWintersAdapter ` ([#1162](https://github.com/tinkoff-ai/etna/pull/1162)) +- Method `predict_components` for forecast decomposition in `_ProphetAdapter` ([#1172](https://github.com/tinkoff-ai/etna/pull/1172)) - Add `refit` parameter into `backtest` ([#1159](https://github.com/tinkoff-ai/etna/pull/1159)) - Add `stride` parameter into `backtest` ([#1165](https://github.com/tinkoff-ai/etna/pull/1165)) - Add optional parameter `ts` into `forecast` method of pipelines ([#1071](https://github.com/tinkoff-ai/etna/pull/1071)) - Add tests on `transform` method of transforms on subset of segments, on new segments, on future with gap ([#1094](https://github.com/tinkoff-ai/etna/pull/1094)) - Add tests on `inverse_transform` method of transforms on subset of segments, on new segments, on future with gap ([#1127](https://github.com/tinkoff-ai/etna/pull/1127)) +- In-sample prediction for `BATSModel` and `TBATSModel` ([#1181](https://github.com/tinkoff-ai/etna/pull/1181)) +- Method `predict_components` for forecast decomposition in `_TBATSAdapter` ([#1181](https://github.com/tinkoff-ai/etna/pull/1181)) ### Changed - Add optional `features` parameter in the signature of `TSDataset.to_pandas`, `TSDataset.to_flatten` ([#809](https://github.com/tinkoff-ai/etna/pull/809)) - Signature of the constructor of `TFTModel`, `DeepARModel` ([#1110](https://github.com/tinkoff-ai/etna/pull/1110)) From 327796e4a0cc488a887b0e5b587bde958b18c8b7 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Fri, 24 Mar 2023 12:36:35 +0300 Subject: [PATCH 09/18] partial in-sample prediction --- etna/models/tbats.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/etna/models/tbats.py b/etna/models/tbats.py index b731ea9a6..bf9aff9cd 100644 --- a/etna/models/tbats.py +++ b/etna/models/tbats.py @@ -70,6 +70,10 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Iterab y_pred = pd.DataFrame() y_pred["target"] = self._fitted_model.y_hat + y_pred["timestamp"] = pd.date_range(end=str(self._last_train_timestamp), freq=self._freq, periods=len(y_pred)) + + if len(set(y_pred["timestamp"]) & set(df["timestamp"])) == 0: + raise NotImplementedError("Method predict isn't currently implemented for out-of-sample prediction!") if prediction_interval: for quantile in quantiles: @@ -82,6 +86,9 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Iterab else: y_pred[f"target_{quantile:.4g}"] = confidence_intervals["upper_bound"] + # selecting time points from provided dataframe + y_pred = y_pred.merge(df["timestamp"], on="timestamp").drop(columns=["timestamp"]) + return y_pred def get_model(self) -> Model: From c244c3525c5cf5093006cbbad32941368333608c Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Fri, 24 Mar 2023 12:37:05 +0300 Subject: [PATCH 10/18] updated tests --- tests/test_models/test_inference/test_predict.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_models/test_inference/test_predict.py b/tests/test_models/test_inference/test_predict.py index 1a91de593..e84786cd7 100644 --- a/tests/test_models/test_inference/test_predict.py +++ b/tests/test_models/test_inference/test_predict.py @@ -60,6 +60,8 @@ class TestPredictInSampleFull: (HoltModel(), []), (HoltWintersModel(), []), (SimpleExpSmoothingModel(), []), + (BATSModel(use_trend=True), []), + (TBATSModel(use_trend=True), []) ], ) def test_predict_in_sample_full(self, model, transforms, example_tsds): @@ -95,8 +97,6 @@ def test_predict_in_sample_full_failed_not_enough_context(self, model, transform @pytest.mark.parametrize( "model, transforms", [ - (BATSModel(use_trend=True), []), - (TBATSModel(use_trend=True), []), ( DeepARModel( dataset_builder=PytorchForecastingDatasetBuilder( @@ -171,6 +171,8 @@ class TestPredictInSampleSuffix: (NaiveModel(lag=3), []), (SeasonalMovingAverageModel(), []), (DeadlineMovingAverageModel(window=1), []), + (BATSModel(use_trend=True), []), + (TBATSModel(use_trend=True), []), ], ) def test_predict_in_sample_suffix(self, model, transforms, example_tsds): @@ -180,8 +182,6 @@ def test_predict_in_sample_suffix(self, model, transforms, example_tsds): @pytest.mark.parametrize( "model, transforms", [ - (BATSModel(use_trend=True), []), - (TBATSModel(use_trend=True), []), ( DeepARModel( dataset_builder=PytorchForecastingDatasetBuilder( @@ -714,6 +714,8 @@ def _test_predict_subset_segments(self, ts, model, transforms, segments, num_ski (SeasonalMovingAverageModel(), []), (NaiveModel(lag=3), []), (DeadlineMovingAverageModel(window=1), []), + (BATSModel(use_trend=True), []), + (TBATSModel(use_trend=True), []), ], ) def test_predict_subset_segments(self, model, transforms, example_tsds): @@ -723,8 +725,6 @@ def test_predict_subset_segments(self, model, transforms, example_tsds): @pytest.mark.parametrize( "model, transforms", [ - (BATSModel(use_trend=True), []), - (TBATSModel(use_trend=True), []), ( DeepARModel( dataset_builder=PytorchForecastingDatasetBuilder( From 872650a2d34bd0edda27d7656101da124589d6a1 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Fri, 24 Mar 2023 12:37:15 +0300 Subject: [PATCH 11/18] added notes --- etna/models/tbats.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/etna/models/tbats.py b/etna/models/tbats.py index bf9aff9cd..620a1b156 100644 --- a/etna/models/tbats.py +++ b/etna/models/tbats.py @@ -295,7 +295,15 @@ def _process_components(self, raw_components: np.ndarray) -> pd.DataFrame: class BATSModel( PerSegmentModelMixin, PredictionIntervalContextIgnorantModelMixin, PredictionIntervalContextIgnorantAbstractModel ): - """Class for holding segment interval BATS model.""" + """Class for holding segment interval BATS model. + + Notes + ----- + This model supports in-sample and out-of-sample prediction decomposition. + Prediction components for BATS model are: local level, trend, seasonality and ARMA component. + In-sample and out-of-sample decompositions components are estimated directly from the fitted model parameters. + Box-Cox transform supported with components proportional rescaling. + """ def __init__( self, @@ -364,7 +372,15 @@ def __init__( class TBATSModel( PerSegmentModelMixin, PredictionIntervalContextIgnorantModelMixin, PredictionIntervalContextIgnorantAbstractModel ): - """Class for holding segment interval TBATS model.""" + """Class for holding segment interval TBATS model. + + Notes + ----- + This model supports in-sample and out-of-sample prediction decomposition. + Prediction components for TBATS model are: local level, trend, seasonality and ARMA component. + In-sample and out-of-sample decompositions components are estimated directly from the fitted model parameters. + Box-Cox transform supported with components proportional rescaling. + """ def __init__( self, From d9ade084fbc1d2de38a0b04eb4c3c337cbffcda7 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Fri, 24 Mar 2023 12:38:27 +0300 Subject: [PATCH 12/18] formatting --- tests/test_models/test_inference/test_predict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_models/test_inference/test_predict.py b/tests/test_models/test_inference/test_predict.py index e84786cd7..c0232d232 100644 --- a/tests/test_models/test_inference/test_predict.py +++ b/tests/test_models/test_inference/test_predict.py @@ -61,7 +61,7 @@ class TestPredictInSampleFull: (HoltWintersModel(), []), (SimpleExpSmoothingModel(), []), (BATSModel(use_trend=True), []), - (TBATSModel(use_trend=True), []) + (TBATSModel(use_trend=True), []), ], ) def test_predict_in_sample_full(self, model, transforms, example_tsds): From 89f7d9e280d9c7485f6a6d46c51afc3150f4c100 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Mon, 27 Mar 2023 16:48:56 +0300 Subject: [PATCH 13/18] updated timestamp checks --- etna/models/tbats.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/etna/models/tbats.py b/etna/models/tbats.py index 620a1b156..a320adccf 100644 --- a/etna/models/tbats.py +++ b/etna/models/tbats.py @@ -22,6 +22,7 @@ class _TBATSAdapter(BaseAdapter): def __init__(self, model: Estimator): self._model = model self._fitted_model: Optional[Model] = None + self._first_train_timestamp = None self._last_train_timestamp = None self._freq = None @@ -32,6 +33,7 @@ def fit(self, df: pd.DataFrame, regressors: Iterable[str]): target = df["target"] self._fitted_model = self._model.fit(target) + self._first_train_timestamp = df["timestamp"].min() self._last_train_timestamp = df["timestamp"].max() self._freq = freq @@ -68,13 +70,17 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Iterab if self._fitted_model is None or self._freq is None: raise ValueError("Model is not fitted! Fit the model before calling predict method!") - y_pred = pd.DataFrame() - y_pred["target"] = self._fitted_model.y_hat - y_pred["timestamp"] = pd.date_range(end=str(self._last_train_timestamp), freq=self._freq, periods=len(y_pred)) + train_timestamp = pd.date_range( + start=str(self._first_train_timestamp), end=str(self._last_train_timestamp), freq=self._freq + ) - if len(set(y_pred["timestamp"]) & set(df["timestamp"])) == 0: + if not (set(train_timestamp) >= set(df["timestamp"])): raise NotImplementedError("Method predict isn't currently implemented for out-of-sample prediction!") + y_pred = pd.DataFrame() + y_pred["target"] = self._fitted_model.y_hat + y_pred["timestamp"] = train_timestamp + if prediction_interval: for quantile in quantiles: confidence_intervals = self._fitted_model._calculate_confidence_intervals( @@ -141,11 +147,24 @@ def predict_components(self, df: pd.DataFrame) -> pd.DataFrame: if self._fitted_model is None or self._freq is None: raise ValueError("Model is not fitted! Fit the model before estimating forecast components!") + train_timestamp = pd.date_range( + start=str(self._first_train_timestamp), end=str(self._last_train_timestamp), freq=self._freq + ) + + if not (set(train_timestamp) >= set(df["timestamp"])): + raise NotImplementedError( + "Method predict_components isn't currently implemented for out-of-sample prediction!" + ) + self._check_components() raw_components = self._decompose_predict() components = self._process_components(raw_components=raw_components) + # selecting time points from provided dataframe + components["timestamp"] = train_timestamp + components = components.merge(df["timestamp"], on="timestamp").drop(columns=["timestamp"]) + return components def _get_steps_to_forecast(self, df: pd.DataFrame) -> int: From 94a3af33f93aa9b027dbbe2b0daee5ad13950d67 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Mon, 27 Mar 2023 16:50:02 +0300 Subject: [PATCH 14/18] added test for subset decomposition --- tests/test_models/test_tbats.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_models/test_tbats.py b/tests/test_models/test_tbats.py index f0c135f7b..2860589e6 100644 --- a/tests/test_models/test_tbats.py +++ b/tests/test_models/test_tbats.py @@ -269,6 +269,18 @@ def test_named_components_output_format(small_periodic_ts, estimator): assert len(components) == horizon +@pytest.mark.parametrize( + "train_slice,decompose_slice", ((slice(5, 20), slice(None, 20)), (slice(5, 10), slice(10, 20))) +) +def test_predict_components_out_of_sample_error(periodic_dfs, train_slice, decompose_slice): + train, _ = periodic_dfs + + model = _TBATSAdapter(model=BATS()) + model.fit(train.iloc[train_slice], []) + with pytest.raises(NotImplementedError, match="isn't currently implemented for out-of-sample prediction"): + model.predict_components(df=train.iloc[decompose_slice]) + + @pytest.mark.long_1 @pytest.mark.parametrize( "estimator,params,components_names", From c19c7aaedd91071f358ddae904afba4f9ce69d32 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Mon, 27 Mar 2023 16:50:45 +0300 Subject: [PATCH 15/18] reworked tests --- tests/test_models/test_tbats.py | 232 ++++++++++++++++---------------- 1 file changed, 117 insertions(+), 115 deletions(-) diff --git a/tests/test_models/test_tbats.py b/tests/test_models/test_tbats.py index 2860589e6..a320a78c1 100644 --- a/tests/test_models/test_tbats.py +++ b/tests/test_models/test_tbats.py @@ -7,6 +7,7 @@ from etna.datasets import TSDataset from etna.metrics import MAE from etna.models.tbats import BATS +from etna.models.tbats import TBATS from etna.models.tbats import BATSModel from etna.models.tbats import TBATSModel from etna.models.tbats import _TBATSAdapter @@ -47,9 +48,8 @@ def sinusoid_ts(): @pytest.fixture() -def periodic_ts(): - horizon = 14 - periods = 100 +def periodic_dfs(): + periods = 50 t = np.arange(periods) # data from https://pypi.org/project/tbats/ @@ -60,20 +60,29 @@ def periodic_ts(): + 20 ) - ts_1 = pd.DataFrame( + df = pd.DataFrame( { - "segment": ["segment_1"] * periods, "timestamp": pd.date_range(start="1/1/2018", periods=periods), "target": y, } ) - ts_2 = pd.DataFrame( - { - "segment": ["segment_2"] * periods, - "timestamp": pd.date_range(start="1/1/2018", periods=periods), - "target": 2 * y, - } - ) + + return df.iloc[:40], df.iloc[40:] + + +@pytest.fixture() +def periodic_ts(periodic_dfs): + horizon = 10 + + df = pd.concat(periodic_dfs, axis=0).reset_index(drop=True) + + ts_1 = df.copy() + ts_1["segment"] = "segment_1" + + ts_2 = df.copy() + ts_2["segment"] = "segment_2" + ts_2["target"] *= 2 + df = pd.concat((ts_1, ts_2)) df = TSDataset.to_dataset(df) ts = TSDataset(df, freq="D") @@ -147,18 +156,6 @@ def test_forecast_format(model, new_format_df): assert not future_ts.isnull().values.any() -@pytest.mark.long_2 -@pytest.mark.parametrize("model", [TBATSModel(), BATSModel()]) -def test_predict_format(model, new_format_df): - df = new_format_df - ts = TSDataset(df, "1d") - lags = LagTransform(lags=[3], in_column="target") - ts.fit_transform([lags]) - model.fit(ts) - pred = model.predict(ts) - assert not pred[:, :, "target"].isnull().values.any() - - @pytest.mark.long_2 @pytest.mark.parametrize("model", [TBATSModel(), BATSModel()]) @pytest.mark.parametrize("method, use_future", (("predict", False), ("forecast", True))) @@ -217,17 +214,16 @@ def test_decompose_not_fitted(small_periodic_ts, method): @pytest.mark.parametrize( "estimator", - ( - BATSModel, - TBATSModel, - ), + (BATS, TBATS), ) -def test_decompose_forecast_output_format(small_periodic_ts, estimator): +def test_decompose_forecast_output_format(periodic_dfs, estimator): + _, train = periodic_dfs horizon = 3 - model = estimator() - model.fit(small_periodic_ts) - components = model._models["segment_1"]._decompose_forecast(horizon=horizon) + model = _TBATSAdapter(model=estimator()) + model.fit(train, []) + + components = model._decompose_forecast(horizon=horizon) assert isinstance(components, np.ndarray) assert components.shape[0] == horizon @@ -235,35 +231,36 @@ def test_decompose_forecast_output_format(small_periodic_ts, estimator): @pytest.mark.parametrize( "estimator", ( - BATSModel, - TBATSModel, + BATS, + TBATS, ), ) -def test_decompose_predict_output_format(small_periodic_ts, estimator): - model = estimator() - model.fit(small_periodic_ts) +def test_decompose_predict_output_format(periodic_dfs, estimator): + _, train = periodic_dfs + model = _TBATSAdapter(model=estimator()) + model.fit(train, []) - target = small_periodic_ts[:, "segment_1", "target"].values - components = model._models["segment_1"]._decompose_predict() + components = model._decompose_predict() assert isinstance(components, np.ndarray) - assert components.shape[0] == target.shape[0] + assert components.shape[0] == len(train) @pytest.mark.parametrize( "estimator", ( - BATSModel, - TBATSModel, + BATS, + TBATS, ), ) -def test_named_components_output_format(small_periodic_ts, estimator): +def test_named_components_output_format(periodic_dfs, estimator): + _, train = periodic_dfs horizon = 3 - model = estimator() - model.fit(small_periodic_ts) - segment_model = model._models["segment_1"] - components = segment_model._decompose_forecast(horizon=horizon) - components = segment_model._process_components(raw_components=components) + model = _TBATSAdapter(model=estimator()) + model.fit(train, []) + + components = model._decompose_forecast(horizon=horizon) + components = model._process_components(raw_components=components) assert isinstance(components, pd.DataFrame) assert len(components) == horizon @@ -286,8 +283,8 @@ def test_predict_components_out_of_sample_error(periodic_dfs, train_slice, decom "estimator,params,components_names", ( ( - BATSModel, - {"use_box_cox": False, "use_trend": True, "use_arma_errors": True, "seasonal_periods": [7, 14]}, + BATS, + {"use_box_cox": True, "use_trend": True, "use_arma_errors": True, "seasonal_periods": [7, 14]}, { "target_component_local_level", "target_component_trend", @@ -297,7 +294,7 @@ def test_predict_components_out_of_sample_error(periodic_dfs, train_slice, decom }, ), ( - TBATSModel, + TBATS, {"use_box_cox": False, "use_trend": True, "use_arma_errors": False, "seasonal_periods": [7, 14]}, { "target_component_local_level", @@ -315,42 +312,36 @@ def test_predict_components_out_of_sample_error(periodic_dfs, train_slice, decom ("forecast_components", True), ), ) -def test_components_names(periodic_ts, estimator, params, components_names, method, use_future): - train, test = periodic_ts - model = estimator(**params) - model.fit(train) +def test_components_names(periodic_dfs, estimator, params, components_names, method, use_future): + train, test = periodic_dfs + model = _TBATSAdapter(model=estimator(**params)) + model.fit(train, []) - pred_ts = train.make_future(3) if use_future else train + pred_df = test if use_future else train - for segment in test.columns.get_level_values("segment"): - pred_df = pred_ts[:, segment, :].droplevel("segment", axis=1).reset_index() - method_to_call = getattr(model._models[segment], method) - components_df = method_to_call(df=pred_df) - assert set(components_df.columns) == components_names + method_to_call = getattr(model, method) + components_df = method_to_call(df=pred_df) + assert set(components_df.columns) == components_names @pytest.mark.parametrize( "estimator", ( - BATSModel, - TBATSModel, + BATS, + TBATS, ), ) @pytest.mark.parametrize("method,use_future", (("predict_components", False), ("forecast_components", True))) -def test_seasonal_components_not_fitted(small_periodic_ts, estimator, method, use_future): - model = estimator(seasonal_periods=[7, 14], use_arma_errors=False) - model.fit(small_periodic_ts) - - segment_model = model._models["segment_1"] - segment_model._fitted_model.params.components.seasonal_periods = [] +def test_seasonal_components_not_fitted(periodic_dfs, estimator, method, use_future): + train, test = periodic_dfs + model = _TBATSAdapter(model=estimator(seasonal_periods=[7, 14], use_arma_errors=False)) + model.fit(train, []) - pred_ts = small_periodic_ts - if use_future: - pred_ts = pred_ts.make_future(3) + model._fitted_model.params.components.seasonal_periods = [] - pred_df = pred_ts[:, "segment_1", :].droplevel("segment", axis=1).reset_index() + pred_df = test if use_future else train - method_to_call = getattr(segment_model, method) + method_to_call = getattr(model, method) with pytest.warns(Warning, match=f"Following components are not fitted: Seasonal!"): method_to_call(df=pred_df) @@ -358,25 +349,22 @@ def test_seasonal_components_not_fitted(small_periodic_ts, estimator, method, us @pytest.mark.parametrize( "estimator", ( - BATSModel, - TBATSModel, + BATS, + TBATS, ), ) @pytest.mark.parametrize("method,use_future", (("predict_components", False), ("forecast_components", True))) -def test_arma_component_not_fitted(small_periodic_ts, estimator, method, use_future): - model = estimator(use_arma_errors=True, seasonal_periods=[]) - model.fit(small_periodic_ts) +def test_arma_component_not_fitted(periodic_dfs, estimator, method, use_future): + train, test = periodic_dfs - segment_model = model._models["segment_1"] - segment_model._fitted_model.params.components.use_arma_errors = False + model = _TBATSAdapter(model=estimator(use_arma_errors=True)) + model.fit(train, []) - pred_ts = small_periodic_ts - if use_future: - pred_ts = pred_ts.make_future(3) + model._fitted_model.params.components.use_arma_errors = False - pred_df = pred_ts[:, "segment_1", :].droplevel("segment", axis=1).reset_index() + pred_df = test if use_future else train - method_to_call = getattr(segment_model, method) + method_to_call = getattr(model, method) with pytest.warns(Warning, match=f"Following components are not fitted: ARMA!"): method_to_call(df=pred_df) @@ -384,26 +372,23 @@ def test_arma_component_not_fitted(small_periodic_ts, estimator, method, use_fut @pytest.mark.parametrize( "estimator", ( - BATSModel, - TBATSModel, + BATS, + TBATS, ), ) @pytest.mark.parametrize("method,use_future", (("predict_components", False), ("forecast_components", True))) -def test_arma_w_seasonal_components_not_fitted(small_periodic_ts, estimator, method, use_future): - model = estimator(use_arma_errors=True, seasonal_periods=[2, 3]) - model.fit(small_periodic_ts) +def test_arma_with_seasonal_components_not_fitted(periodic_dfs, estimator, method, use_future): + train, test = periodic_dfs - segment_model = model._models["segment_1"] - segment_model._fitted_model.params.components.use_arma_errors = False - segment_model._fitted_model.params.components.seasonal_periods = [] + model = _TBATSAdapter(model=estimator(use_arma_errors=True, seasonal_periods=[2, 3], use_box_cox=False)) + model.fit(train, []) - pred_ts = small_periodic_ts - if use_future: - pred_ts = pred_ts.make_future(3) + model._fitted_model.params.components.use_arma_errors = False + model._fitted_model.params.components.seasonal_periods = [] - pred_df = pred_ts[:, "segment_1", :].droplevel("segment", axis=1).reset_index() + pred_df = test if use_future else train - method_to_call = getattr(segment_model, method) + method_to_call = getattr(model, method) with pytest.warns(Warning, match=f"Following components are not fitted: Seasonal, ARMA!"): method_to_call(df=pred_df) @@ -413,8 +398,8 @@ def test_arma_w_seasonal_components_not_fitted(small_periodic_ts, estimator, met @pytest.mark.parametrize( "estimator", ( - BATSModel, - TBATSModel, + BATS, + TBATS, ), ) @pytest.mark.parametrize( @@ -442,26 +427,43 @@ def test_arma_w_seasonal_components_not_fitted(small_periodic_ts, estimator, met ), ) @pytest.mark.parametrize("method,use_future", (("predict_components", False), ("forecast_components", True))) -def test_forecast_decompose_sum_up_to_target(periodic_ts, estimator, params, method, use_future): - train, test = periodic_ts +def test_forecast_decompose_sum_up_to_target(periodic_dfs, estimator, params, method, use_future): + train, test = periodic_dfs - model = estimator(**params) - model.fit(train) + model = _TBATSAdapter(model=estimator(**params)) + model.fit(train, []) if use_future: - pred_ts = train.make_future(future_steps=14) - y_pred = model.forecast(pred_ts) + pred_df = test + y_pred = model.forecast(test, prediction_interval=False, quantiles=[]) else: - pred_ts = deepcopy(train) - y_pred = model.predict(train) + pred_df = train + y_pred = model.predict(train, prediction_interval=False, quantiles=[]) + + method_to_call = getattr(model, method) + components = method_to_call(df=pred_df) + + y_hat_pred = np.sum(components.values, axis=1) + np.testing.assert_allclose(y_hat_pred, np.squeeze(y_pred.values)) + - for segment in y_pred.columns.get_level_values("segment"): - pred_df = pred_ts[:, segment, :].droplevel("segment", axis=1).reset_index() +@pytest.mark.parametrize( + "estimator", + ( + BATS, + TBATS, + ), +) +def test_predict_decompose_on_subset(dfs_w_exog, estimator): + train, _ = dfs_w_exog + sub_train = train.iloc[5:] - method_to_call = getattr(model._models[segment], method) + model = _TBATSAdapter(model=estimator()) + model.fit(train, []) - components = method_to_call(df=pred_df) + y_pred = model.predict(df=sub_train, prediction_interval=False, quantiles=[]) + components = model.predict_components(df=sub_train) - y_hat_pred = np.sum(components.values, axis=1) - np.testing.assert_allclose(y_hat_pred, y_pred[:, segment, "target"].values) + y_hat_pred = np.sum(components.values, axis=1) + np.testing.assert_allclose(y_hat_pred, np.squeeze(y_pred.values)) From dd86a703ef5a329d3bfa9fce147cf39881dce382 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Mon, 27 Mar 2023 16:52:42 +0300 Subject: [PATCH 16/18] changed fixture --- tests/test_models/test_tbats.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_models/test_tbats.py b/tests/test_models/test_tbats.py index a320a78c1..a98d45f90 100644 --- a/tests/test_models/test_tbats.py +++ b/tests/test_models/test_tbats.py @@ -455,8 +455,8 @@ def test_forecast_decompose_sum_up_to_target(periodic_dfs, estimator, params, me TBATS, ), ) -def test_predict_decompose_on_subset(dfs_w_exog, estimator): - train, _ = dfs_w_exog +def test_predict_decompose_on_subset(periodic_dfs, estimator): + train, _ = periodic_dfs sub_train = train.iloc[5:] model = _TBATSAdapter(model=estimator()) From 6a635f69808d90cba9e528af8c31da9164184fa3 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Mon, 27 Mar 2023 18:08:33 +0300 Subject: [PATCH 17/18] added tests to separate group --- .github/workflows/test.yml | 40 ++++++++++++++++++++++++++++++++- pyproject.toml | 3 ++- tests/test_models/test_tbats.py | 8 +++++++ 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d735cf455..5c2e5740f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -66,7 +66,7 @@ jobs: - name: PyTest ("not long") run: | - poetry run pytest tests -v --cov=etna -m "not long_1 and not long_2" --ignore=tests/test_experimental --cov-report=xml --durations=10 + poetry run pytest tests -v --cov=etna -m "not long_1 and not long_2 and not long_3" --ignore=tests/test_experimental --cov-report=xml --durations=10 poetry run pytest etna -v --doctest-modules --ignore=etna/libs --durations=10 - name: Upload coverage @@ -148,6 +148,44 @@ jobs: - name: Upload coverage uses: codecov/codecov-action@v2 + long-3-test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python + id: setup-python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + version: 1.4.0 # TODO: remove after poetry fix + virtualenvs-create: true + virtualenvs-in-project: true + + - name: Load cached venv + id: cached-poetry-dependencies + uses: actions/cache@v2 + with: + path: .venv + key: venv-${{ runner.os }}-3.8-${{ hashFiles('**/poetry.lock') }} + + - name: Install dependencies + if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + run: | + poetry install -E "all tests" -vv + + - name: PyTest ("long") + run: | + poetry run pytest tests -v --cov=etna -m "long_3" --ignore=tests/test_experimental --cov-report=xml --durations=10 + + - name: Upload coverage + uses: codecov/codecov-action@v2 + experimental-test: runs-on: ubuntu-latest diff --git a/pyproject.toml b/pyproject.toml index 0b49061a9..27416dda5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -265,7 +265,8 @@ doctest_optionflags = "NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL NUMBER" markers = [ "smoke", "long_1", - "long_2" + "long_2", + "long_3" ] [tool.coverage.report] diff --git a/tests/test_models/test_tbats.py b/tests/test_models/test_tbats.py index a98d45f90..c2ee0ac39 100644 --- a/tests/test_models/test_tbats.py +++ b/tests/test_models/test_tbats.py @@ -212,6 +212,7 @@ def test_decompose_not_fitted(small_periodic_ts, method): method_to_call(df=small_periodic_ts.df) +@pytest.mark.long_3 @pytest.mark.parametrize( "estimator", (BATS, TBATS), @@ -228,6 +229,7 @@ def test_decompose_forecast_output_format(periodic_dfs, estimator): assert components.shape[0] == horizon +@pytest.mark.long_3 @pytest.mark.parametrize( "estimator", ( @@ -245,6 +247,7 @@ def test_decompose_predict_output_format(periodic_dfs, estimator): assert components.shape[0] == len(train) +@pytest.mark.long_3 @pytest.mark.parametrize( "estimator", ( @@ -266,6 +269,7 @@ def test_named_components_output_format(periodic_dfs, estimator): assert len(components) == horizon +@pytest.mark.long_3 @pytest.mark.parametrize( "train_slice,decompose_slice", ((slice(5, 20), slice(None, 20)), (slice(5, 10), slice(10, 20))) ) @@ -324,6 +328,7 @@ def test_components_names(periodic_dfs, estimator, params, components_names, met assert set(components_df.columns) == components_names +@pytest.mark.long_3 @pytest.mark.parametrize( "estimator", ( @@ -346,6 +351,7 @@ def test_seasonal_components_not_fitted(periodic_dfs, estimator, method, use_fut method_to_call(df=pred_df) +@pytest.mark.long_3 @pytest.mark.parametrize( "estimator", ( @@ -369,6 +375,7 @@ def test_arma_component_not_fitted(periodic_dfs, estimator, method, use_future): method_to_call(df=pred_df) +@pytest.mark.long_3 @pytest.mark.parametrize( "estimator", ( @@ -448,6 +455,7 @@ def test_forecast_decompose_sum_up_to_target(periodic_dfs, estimator, params, me np.testing.assert_allclose(y_hat_pred, np.squeeze(y_pred.values)) +@pytest.mark.long_3 @pytest.mark.parametrize( "estimator", ( From d861096f6886ea87b2dd34071310469476f2ef25 Mon Sep 17 00:00:00 2001 From: brsnw250 Date: Tue, 28 Mar 2023 16:57:11 +0300 Subject: [PATCH 18/18] review fixes --- .github/workflows/test.yml | 40 +-------------------------------- etna/models/tbats.py | 13 +++++++---- pyproject.toml | 3 +-- tests/test_models/test_tbats.py | 29 ++---------------------- 4 files changed, 13 insertions(+), 72 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5c2e5740f..d735cf455 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -66,7 +66,7 @@ jobs: - name: PyTest ("not long") run: | - poetry run pytest tests -v --cov=etna -m "not long_1 and not long_2 and not long_3" --ignore=tests/test_experimental --cov-report=xml --durations=10 + poetry run pytest tests -v --cov=etna -m "not long_1 and not long_2" --ignore=tests/test_experimental --cov-report=xml --durations=10 poetry run pytest etna -v --doctest-modules --ignore=etna/libs --durations=10 - name: Upload coverage @@ -148,44 +148,6 @@ jobs: - name: Upload coverage uses: codecov/codecov-action@v2 - long-3-test: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - - name: Set up Python - id: setup-python - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - - name: Install Poetry - uses: snok/install-poetry@v1 - with: - version: 1.4.0 # TODO: remove after poetry fix - virtualenvs-create: true - virtualenvs-in-project: true - - - name: Load cached venv - id: cached-poetry-dependencies - uses: actions/cache@v2 - with: - path: .venv - key: venv-${{ runner.os }}-3.8-${{ hashFiles('**/poetry.lock') }} - - - name: Install dependencies - if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: | - poetry install -E "all tests" -vv - - - name: PyTest ("long") - run: | - poetry run pytest tests -v --cov=etna -m "long_3" --ignore=tests/test_experimental --cov-report=xml --durations=10 - - - name: Upload coverage - uses: codecov/codecov-action@v2 - experimental-test: runs-on: ubuntu-latest diff --git a/etna/models/tbats.py b/etna/models/tbats.py index a320adccf..a65449aa9 100644 --- a/etna/models/tbats.py +++ b/etna/models/tbats.py @@ -74,7 +74,7 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Iterab start=str(self._first_train_timestamp), end=str(self._last_train_timestamp), freq=self._freq ) - if not (set(train_timestamp) >= set(df["timestamp"])): + if not (set(df["timestamp"]) <= set(train_timestamp)): raise NotImplementedError("Method predict isn't currently implemented for out-of-sample prediction!") y_pred = pd.DataFrame() @@ -93,7 +93,9 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Iterab y_pred[f"target_{quantile:.4g}"] = confidence_intervals["upper_bound"] # selecting time points from provided dataframe - y_pred = y_pred.merge(df["timestamp"], on="timestamp").drop(columns=["timestamp"]) + y_pred.set_index("timestamp", inplace=True) + y_pred = y_pred.loc[df["timestamp"]] + y_pred.reset_index(drop=True, inplace=True) return y_pred @@ -151,7 +153,7 @@ def predict_components(self, df: pd.DataFrame) -> pd.DataFrame: start=str(self._first_train_timestamp), end=str(self._last_train_timestamp), freq=self._freq ) - if not (set(train_timestamp) >= set(df["timestamp"])): + if not (set(df["timestamp"]) <= set(train_timestamp)): raise NotImplementedError( "Method predict_components isn't currently implemented for out-of-sample prediction!" ) @@ -163,7 +165,10 @@ def predict_components(self, df: pd.DataFrame) -> pd.DataFrame: # selecting time points from provided dataframe components["timestamp"] = train_timestamp - components = components.merge(df["timestamp"], on="timestamp").drop(columns=["timestamp"]) + + components.set_index("timestamp", inplace=True) + components = components.loc[df["timestamp"]] + components.reset_index(drop=True, inplace=True) return components diff --git a/pyproject.toml b/pyproject.toml index 27416dda5..0b49061a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -265,8 +265,7 @@ doctest_optionflags = "NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL NUMBER" markers = [ "smoke", "long_1", - "long_2", - "long_3" + "long_2" ] [tool.coverage.report] diff --git a/tests/test_models/test_tbats.py b/tests/test_models/test_tbats.py index c2ee0ac39..e6495ff48 100644 --- a/tests/test_models/test_tbats.py +++ b/tests/test_models/test_tbats.py @@ -1,4 +1,5 @@ from copy import deepcopy +from unittest.mock import Mock import numpy as np import pandas as pd @@ -11,7 +12,6 @@ from etna.models.tbats import BATSModel from etna.models.tbats import TBATSModel from etna.models.tbats import _TBATSAdapter -from etna.transforms import LagTransform from tests.test_models.test_linear_model import linear_segments_by_parameters from tests.test_models.utils import assert_model_equals_loaded_original @@ -134,26 +134,9 @@ def test_repr(model_class, model_class_repr): @pytest.mark.parametrize("model", (TBATSModel(), BATSModel())) @pytest.mark.parametrize("method", ("forecast", "predict")) def test_not_fitted(model, method, linear_segments_ts_unique): - train, test = linear_segments_ts_unique - to_forecast = train.make_future(3) - method_to_call = getattr(model, method) with pytest.raises(ValueError, match="model is not fitted!"): - method_to_call(ts=to_forecast) - - -@pytest.mark.long_2 -@pytest.mark.parametrize("model", [TBATSModel(), BATSModel()]) -def test_forecast_format(model, new_format_df): - df = new_format_df - ts = TSDataset(df, "1d") - lags = LagTransform(lags=[3, 4, 5], in_column="target") - ts.fit_transform([lags]) - model.fit(ts) - future_ts = ts.make_future(3, transforms=[lags]) - model.forecast(future_ts) - future_ts.inverse_transform([lags]) - assert not future_ts.isnull().values.any() + method_to_call(ts=Mock()) @pytest.mark.long_2 @@ -212,7 +195,6 @@ def test_decompose_not_fitted(small_periodic_ts, method): method_to_call(df=small_periodic_ts.df) -@pytest.mark.long_3 @pytest.mark.parametrize( "estimator", (BATS, TBATS), @@ -229,7 +211,6 @@ def test_decompose_forecast_output_format(periodic_dfs, estimator): assert components.shape[0] == horizon -@pytest.mark.long_3 @pytest.mark.parametrize( "estimator", ( @@ -247,7 +228,6 @@ def test_decompose_predict_output_format(periodic_dfs, estimator): assert components.shape[0] == len(train) -@pytest.mark.long_3 @pytest.mark.parametrize( "estimator", ( @@ -269,7 +249,6 @@ def test_named_components_output_format(periodic_dfs, estimator): assert len(components) == horizon -@pytest.mark.long_3 @pytest.mark.parametrize( "train_slice,decompose_slice", ((slice(5, 20), slice(None, 20)), (slice(5, 10), slice(10, 20))) ) @@ -328,7 +307,6 @@ def test_components_names(periodic_dfs, estimator, params, components_names, met assert set(components_df.columns) == components_names -@pytest.mark.long_3 @pytest.mark.parametrize( "estimator", ( @@ -351,7 +329,6 @@ def test_seasonal_components_not_fitted(periodic_dfs, estimator, method, use_fut method_to_call(df=pred_df) -@pytest.mark.long_3 @pytest.mark.parametrize( "estimator", ( @@ -375,7 +352,6 @@ def test_arma_component_not_fitted(periodic_dfs, estimator, method, use_future): method_to_call(df=pred_df) -@pytest.mark.long_3 @pytest.mark.parametrize( "estimator", ( @@ -455,7 +431,6 @@ def test_forecast_decompose_sum_up_to_target(periodic_dfs, estimator, params, me np.testing.assert_allclose(y_hat_pred, np.squeeze(y_pred.values)) -@pytest.mark.long_3 @pytest.mark.parametrize( "estimator", (