tinkoff-ai · alex-hse-repository · Mar 29, 2023 · Mar 23, 2023 · Mar 23, 2023 · Mar 23, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 ### Added
+- Forecast decomposition for `SeasonalMovingAverageModel`([#1180](https://github.com/tinkoff-ai/etna/pull/1180))
 - Target components logic into base classes of pipelines ([#1173](https://github.com/tinkoff-ai/etna/pull/1173))
 - Method `predict_components` for forecast decomposition in `_SklearnAdapter` and `_LinearAdapter` for linear models ([#1164](https://github.com/tinkoff-ai/etna/pull/1164))
 - Target components logic into base classes of models ([#1158](https://github.com/tinkoff-ai/etna/pull/1158))

diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py
@@ -1167,7 +1167,7 @@ def add_target_components(self, target_components_df: pd.DataFrame):
                 )
 
         components_sum = target_components_df.sum(axis=1, level="segment")
-        if not np.array_equal(components_sum.values, self[..., "target"].values):
+        if not np.allclose(components_sum.values, self[..., "target"].values):
             raise ValueError("Components don't sum up to target!")
 
         self._target_components_names = components_names

diff --git a/etna/models/moving_average.py b/etna/models/moving_average.py
@@ -8,6 +8,11 @@ class MovingAverageModel(SeasonalMovingAverageModel):
         y_{t} = \\frac{\\sum_{i=1}^{n} y_{t-i} }{n},
 
     where :math:`n` is window size.
+
+    Notes
+    -----
+    This model supports in-sample and out-of-sample prediction decomposition.
+    Prediction components are corresponding target lags with weights of :math:`1/window`.
     """
 
     def __init__(self, window: int = 5):

diff --git a/etna/models/naive.py b/etna/models/naive.py
@@ -8,6 +8,11 @@ class NaiveModel(SeasonalMovingAverageModel):
         y_{t} = y_{t-s},
 
     where :math:`s` is lag.
+
+    Notes
+    -----
+    This model supports in-sample and out-of-sample prediction decomposition.
+    Prediction component here is the corresponding target lag.
     """
 
     def __init__(self, lag: int = 1):

diff --git a/etna/models/seasonal_ma.py b/etna/models/seasonal_ma.py
@@ -16,6 +16,11 @@ class SeasonalMovingAverageModel(
         y_{t} = \\frac{\\sum_{i=1}^{n} y_{t-is} }{n},
 
     where :math:`s` is seasonality, :math:`n` is window size (how many history values are taken for forecast).
+
+    Notes
+    -----
+    This model supports in-sample and out-of-sample prediction decomposition.
+    Prediction components are corresponding target lags with weights of :math:`1/window`.
     """
 
     def __init__(self, window: int = 5, seasonality: int = 7):
@@ -81,7 +86,41 @@ def _validate_context(self, df: pd.DataFrame, prediction_size: int):
                 "Given context isn't big enough, try to decrease context_size, prediction_size or increase length of given dataframe!"
             )
 
-    def _forecast(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
+    def _predict_components(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
+        """Estimate forecast components.
+
+        Parameters
+        ----------
+        df:
+            DatаFrame with target, containing lags that was used to make a prediction
+        prediction_size:
+            Number of last timestamps to leave after making prediction.
+            Previous timestamps will be used as a context.
+
+        Returns
+        -------
+        :
+            DataFrame with target components
+        """
+        self._validate_context(df=df, prediction_size=prediction_size)
+
+        all_transformed_features = []
+        segments = sorted(set(df.columns.get_level_values("segment")))
+        lags = list(range(self.seasonality, self.context_size + 1, self.seasonality))
+
+        target = df.loc[:, pd.IndexSlice[:, "target"]]
+        for lag in lags:
+            transformed_features = target.shift(lag)
+            transformed_features.columns = pd.MultiIndex.from_product(
+                [segments, [f"target_component_lag_{lag}"]], names=("segment", "feature")
+            )
+            all_transformed_features.append(transformed_features)
+
+        target_components_df = pd.concat(all_transformed_features, axis=1) / self.window
+        target_components_df = target_components_df.iloc[-prediction_size:]
+        return target_components_df
+
+    def _forecast(self, df: pd.DataFrame, prediction_size: int) -> np.ndarray:
         """Make autoregressive forecasts on a wide dataframe."""
         self._validate_context(df=df, prediction_size=prediction_size)
 
@@ -96,10 +135,8 @@ def _forecast(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
         for i in range(self.context_size, len(res)):
             res[i] = res[i - self.context_size : i : self.seasonality].mean(axis=0)
 
-        df = df.iloc[-prediction_size:]
         y_pred = res[-prediction_size:]
-        df.loc[:, pd.IndexSlice[:, "target"]] = y_pred
-        return df
+        return y_pred
 
     def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset:
         """Make autoregressive forecasts.
@@ -128,15 +165,19 @@ def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool
         ValueError:
             if forecast context contains NaNs
         """
-        if return_components:
-            raise NotImplementedError("This mode isn't currently implemented!")
-
         df = ts.to_pandas()
-        new_df = self._forecast(df=df, prediction_size=prediction_size)
-        ts.df = new_df
+        y_pred = self._forecast(df=df, prediction_size=prediction_size)
+        ts.df = ts.df.iloc[-prediction_size:]
+        ts.df.loc[:, pd.IndexSlice[:, "target"]] = y_pred
+
+        if return_components:
+            # We use predicted targets as lags in autoregressive style
+            df.loc[df.index[-prediction_size:], pd.IndexSlice[:, "target"]] = y_pred
+            target_components_df = self._predict_components(df=df, prediction_size=prediction_size)
+            ts.add_target_components(target_components_df=target_components_df)
         return ts
 
-    def _predict(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
+    def _predict(self, df: pd.DataFrame, prediction_size: int) -> np.ndarray:
         """Make predictions on a wide dataframe using true values as autoregression context."""
         self._validate_context(df=df, prediction_size=prediction_size)
 
@@ -151,10 +192,8 @@ def _predict(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
         for res_idx, context_idx in enumerate(range(self.context_size, len(context))):
             res[res_idx] = context[context_idx - self.context_size : context_idx : self.seasonality].mean(axis=0)
 
-        df = df.iloc[-prediction_size:]
         y_pred = res[-prediction_size:]
-        df.loc[:, pd.IndexSlice[:, "target"]] = y_pred
-        return df
+        return y_pred
 
     def predict(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset:
         """Make predictions using true values as autoregression context (teacher forcing).
@@ -183,12 +222,15 @@ def predict(self, ts: TSDataset, prediction_size: int, return_components: bool =
         ValueError:
             if forecast context contains NaNs
         """
-        if return_components:
-            raise NotImplementedError("This mode isn't currently implemented!")
-
         df = ts.to_pandas()
-        new_df = self._predict(df=df, prediction_size=prediction_size)
-        ts.df = new_df
+        y_pred = self._predict(df=df, prediction_size=prediction_size)
+        ts.df = ts.df.iloc[-prediction_size:]
+        ts.df.loc[:, pd.IndexSlice[:, "target"]] = y_pred
+
+        if return_components:
+            # We use true targets as lags
+            target_components_df = self._predict_components(df=df, prediction_size=prediction_size)
+            ts.add_target_components(target_components_df=target_components_df)
         return ts
 
 

diff --git a/etna/transforms/math/differencing.py b/etna/transforms/math/differencing.py
@@ -398,7 +398,7 @@ def _fit(self, df: pd.DataFrame) -> "DifferencingTransform":
             if NaNs are present inside the segment
         """
         # this is made because transforms of high order may need some columns created by transforms of lower order
-        result_df = df.copy()
+        result_df = df
         for transform in self._differencing_transforms:
             result_df = transform._fit_transform(result_df)
         self._fit_segments = df.columns.get_level_values("segment").unique().tolist()

diff --git a/etna/transforms/math/lags.py b/etna/transforms/math/lags.py
@@ -80,7 +80,7 @@ def _transform(self, df: pd.DataFrame) -> pd.DataFrame:
         result: pd.Dataframe
             transformed dataframe
         """
-        result = df.copy()
+        result = df
         segments = sorted(set(df.columns.get_level_values("segment")))
         all_transformed_features = []
         features = df.loc[:, pd.IndexSlice[:, self.in_column]]

diff --git a/etna/transforms/math/scalers.py b/etna/transforms/math/scalers.py
@@ -63,7 +63,7 @@ def __init__(
         self.with_std = with_std
         super().__init__(
             in_column=in_column,
-            transformer=StandardScaler(with_mean=self.with_mean, with_std=self.with_std, copy=True),
+            transformer=StandardScaler(with_mean=self.with_mean, with_std=self.with_std, copy=False),
             out_column=out_column,
             inplace=inplace,
             mode=mode,
@@ -140,7 +140,7 @@ def __init__(
                 with_scaling=self.with_scaling,
                 quantile_range=self.quantile_range,
                 unit_variance=self.unit_variance,
-                copy=True,
+                copy=False,
             ),
             mode=mode,
         )
@@ -199,7 +199,7 @@ def __init__(
             in_column=in_column,
             inplace=inplace,
             out_column=out_column,
-            transformer=MinMaxScaler(feature_range=self.feature_range, clip=self.clip, copy=True),
+            transformer=MinMaxScaler(feature_range=self.feature_range, clip=self.clip, copy=False),
             mode=mode,
         )
 
@@ -248,7 +248,7 @@ def __init__(
             in_column=in_column,
             inplace=inplace,
             out_column=out_column,
-            transformer=MaxAbsScaler(copy=True),
+            transformer=MaxAbsScaler(copy=False),
             mode=mode,
         )
 

diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
@@ -224,6 +224,7 @@ def inconsistent_target_components_names_duplication_df(target_components_df):
 
 @pytest.fixture
 def inconsistent_target_components_values_df(target_components_df):
+    target_components_df.loc[target_components_df.index[-1], pd.IndexSlice["1", "target_component_a"]] = 100
     target_components_df.loc[target_components_df.index[10], pd.IndexSlice["1", "target_component_a"]] = 100
     return target_components_df
 

diff --git a/tests/test_models/test_simple_models.py b/tests/test_models/test_simple_models.py
@@ -729,3 +729,48 @@ def test_deadline_model_forecast_correct_with_big_horizons(two_month_ts):
 )
 def test_save_load(model, example_tsds):
     assert_model_equals_loaded_original(model=model, ts=example_tsds, transforms=[], horizon=3)
+
+
+@pytest.mark.parametrize("method_name", ("forecast", "predict"))
+@pytest.mark.parametrize(
+    "window, seasonality, expected_components_names",
+    ((1, 7, ["target_component_lag_7"]), (2, 7, ["target_component_lag_7", "target_component_lag_14"])),
+)
+def test_sma_model_predict_components_correct_names(
+    example_tsds, method_name, window, seasonality, expected_components_names, horizon=10
+):
+    model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
+    model.fit(example_tsds)
+    to_call = getattr(model, method_name)
+    forecast = to_call(ts=example_tsds, prediction_size=horizon, return_components=True)
+    assert sorted(forecast.target_components_names) == sorted(expected_components_names)
+
+
+@pytest.mark.parametrize("method_name", ("forecast", "predict"))
+@pytest.mark.parametrize("window", (1, 3, 5))
+@pytest.mark.parametrize("seasonality", (1, 7, 14))
+def test_sma_model_predict_components_sum_up_to_target(example_tsds, method_name, window, seasonality, horizon=10):
+    model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
+    model.fit(example_tsds)
+    to_call = getattr(model, method_name)
+    forecast = to_call(ts=example_tsds, prediction_size=horizon, return_components=True)
+
+    target = forecast.to_pandas(features=["target"])
+    target_components_df = forecast.get_target_components()
+    np.testing.assert_allclose(target.values, target_components_df.sum(axis=1, level="segment").values)
+
+
+@pytest.mark.parametrize(
+    "method_name, expected_values",
+    (("forecast", [[44, 4], [45, 6], [44, 4]]), ("predict", [[44, 4], [45, 6], [46, 8]])),
+)
+def test_sma_model_predict_components_correct(
+    simple_df, method_name, expected_values, window=1, seasonality=2, horizon=3
+):
+    model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
+    model.fit(simple_df)
+    to_call = getattr(model, method_name)
+    forecast = to_call(ts=simple_df, prediction_size=horizon, return_components=True)
+
+    target_components_df = forecast.get_target_components()
+    np.testing.assert_allclose(target_components_df.values, expected_values)