diff --git a/CHANGELOG.md b/CHANGELOG.md index f4783ee6b..8d6085ccc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased ### Added +- Forecast decomposition for `SeasonalMovingAverageModel`([#1180](https://github.com/tinkoff-ai/etna/pull/1180)) - Target components logic into base classes of pipelines ([#1173](https://github.com/tinkoff-ai/etna/pull/1173)) - Method `predict_components` for forecast decomposition in `_SklearnAdapter` and `_LinearAdapter` for linear models ([#1164](https://github.com/tinkoff-ai/etna/pull/1164)) - Target components logic into base classes of models ([#1158](https://github.com/tinkoff-ai/etna/pull/1158)) diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py index 118b81293..b8e226805 100644 --- a/etna/datasets/tsdataset.py +++ b/etna/datasets/tsdataset.py @@ -1167,7 +1167,7 @@ def add_target_components(self, target_components_df: pd.DataFrame): ) components_sum = target_components_df.sum(axis=1, level="segment") - if not np.array_equal(components_sum.values, self[..., "target"].values): + if not np.allclose(components_sum.values, self[..., "target"].values): raise ValueError("Components don't sum up to target!") self._target_components_names = components_names diff --git a/etna/models/moving_average.py b/etna/models/moving_average.py index 8607a5f51..a6fbcd001 100644 --- a/etna/models/moving_average.py +++ b/etna/models/moving_average.py @@ -8,6 +8,11 @@ class MovingAverageModel(SeasonalMovingAverageModel): y_{t} = \\frac{\\sum_{i=1}^{n} y_{t-i} }{n}, where :math:`n` is window size. + + Notes + ----- + This model supports in-sample and out-of-sample prediction decomposition. + Prediction components are corresponding target lags with weights of :math:`1/window`. """ def __init__(self, window: int = 5): diff --git a/etna/models/naive.py b/etna/models/naive.py index 29fec2f85..59367c2fd 100644 --- a/etna/models/naive.py +++ b/etna/models/naive.py @@ -8,6 +8,11 @@ class NaiveModel(SeasonalMovingAverageModel): y_{t} = y_{t-s}, where :math:`s` is lag. + + Notes + ----- + This model supports in-sample and out-of-sample prediction decomposition. + Prediction component here is the corresponding target lag. """ def __init__(self, lag: int = 1): diff --git a/etna/models/seasonal_ma.py b/etna/models/seasonal_ma.py index b1190eebb..7196a14b0 100644 --- a/etna/models/seasonal_ma.py +++ b/etna/models/seasonal_ma.py @@ -16,6 +16,11 @@ class SeasonalMovingAverageModel( y_{t} = \\frac{\\sum_{i=1}^{n} y_{t-is} }{n}, where :math:`s` is seasonality, :math:`n` is window size (how many history values are taken for forecast). + + Notes + ----- + This model supports in-sample and out-of-sample prediction decomposition. + Prediction components are corresponding target lags with weights of :math:`1/window`. """ def __init__(self, window: int = 5, seasonality: int = 7): @@ -81,7 +86,41 @@ def _validate_context(self, df: pd.DataFrame, prediction_size: int): "Given context isn't big enough, try to decrease context_size, prediction_size or increase length of given dataframe!" ) - def _forecast(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame: + def _predict_components(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame: + """Estimate forecast components. + + Parameters + ---------- + df: + DatŠ°Frame with target, containing lags that was used to make a prediction + prediction_size: + Number of last timestamps to leave after making prediction. + Previous timestamps will be used as a context. + + Returns + ------- + : + DataFrame with target components + """ + self._validate_context(df=df, prediction_size=prediction_size) + + all_transformed_features = [] + segments = sorted(set(df.columns.get_level_values("segment"))) + lags = list(range(self.seasonality, self.context_size + 1, self.seasonality)) + + target = df.loc[:, pd.IndexSlice[:, "target"]] + for lag in lags: + transformed_features = target.shift(lag) + transformed_features.columns = pd.MultiIndex.from_product( + [segments, [f"target_component_lag_{lag}"]], names=("segment", "feature") + ) + all_transformed_features.append(transformed_features) + + target_components_df = pd.concat(all_transformed_features, axis=1) / self.window + target_components_df = target_components_df.iloc[-prediction_size:] + return target_components_df + + def _forecast(self, df: pd.DataFrame, prediction_size: int) -> np.ndarray: """Make autoregressive forecasts on a wide dataframe.""" self._validate_context(df=df, prediction_size=prediction_size) @@ -96,10 +135,8 @@ def _forecast(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame: for i in range(self.context_size, len(res)): res[i] = res[i - self.context_size : i : self.seasonality].mean(axis=0) - df = df.iloc[-prediction_size:] y_pred = res[-prediction_size:] - df.loc[:, pd.IndexSlice[:, "target"]] = y_pred - return df + return y_pred def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset: """Make autoregressive forecasts. @@ -128,15 +165,19 @@ def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool ValueError: if forecast context contains NaNs """ - if return_components: - raise NotImplementedError("This mode isn't currently implemented!") - df = ts.to_pandas() - new_df = self._forecast(df=df, prediction_size=prediction_size) - ts.df = new_df + y_pred = self._forecast(df=df, prediction_size=prediction_size) + ts.df = ts.df.iloc[-prediction_size:] + ts.df.loc[:, pd.IndexSlice[:, "target"]] = y_pred + + if return_components: + # We use predicted targets as lags in autoregressive style + df.loc[df.index[-prediction_size:], pd.IndexSlice[:, "target"]] = y_pred + target_components_df = self._predict_components(df=df, prediction_size=prediction_size) + ts.add_target_components(target_components_df=target_components_df) return ts - def _predict(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame: + def _predict(self, df: pd.DataFrame, prediction_size: int) -> np.ndarray: """Make predictions on a wide dataframe using true values as autoregression context.""" self._validate_context(df=df, prediction_size=prediction_size) @@ -151,10 +192,8 @@ def _predict(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame: for res_idx, context_idx in enumerate(range(self.context_size, len(context))): res[res_idx] = context[context_idx - self.context_size : context_idx : self.seasonality].mean(axis=0) - df = df.iloc[-prediction_size:] y_pred = res[-prediction_size:] - df.loc[:, pd.IndexSlice[:, "target"]] = y_pred - return df + return y_pred def predict(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset: """Make predictions using true values as autoregression context (teacher forcing). @@ -183,12 +222,15 @@ def predict(self, ts: TSDataset, prediction_size: int, return_components: bool = ValueError: if forecast context contains NaNs """ - if return_components: - raise NotImplementedError("This mode isn't currently implemented!") - df = ts.to_pandas() - new_df = self._predict(df=df, prediction_size=prediction_size) - ts.df = new_df + y_pred = self._predict(df=df, prediction_size=prediction_size) + ts.df = ts.df.iloc[-prediction_size:] + ts.df.loc[:, pd.IndexSlice[:, "target"]] = y_pred + + if return_components: + # We use true targets as lags + target_components_df = self._predict_components(df=df, prediction_size=prediction_size) + ts.add_target_components(target_components_df=target_components_df) return ts diff --git a/etna/transforms/math/differencing.py b/etna/transforms/math/differencing.py index 1fd1c20ad..59bae77ad 100644 --- a/etna/transforms/math/differencing.py +++ b/etna/transforms/math/differencing.py @@ -398,7 +398,7 @@ def _fit(self, df: pd.DataFrame) -> "DifferencingTransform": if NaNs are present inside the segment """ # this is made because transforms of high order may need some columns created by transforms of lower order - result_df = df.copy() + result_df = df for transform in self._differencing_transforms: result_df = transform._fit_transform(result_df) self._fit_segments = df.columns.get_level_values("segment").unique().tolist() diff --git a/etna/transforms/math/lags.py b/etna/transforms/math/lags.py index fd4490371..aae433624 100644 --- a/etna/transforms/math/lags.py +++ b/etna/transforms/math/lags.py @@ -80,7 +80,7 @@ def _transform(self, df: pd.DataFrame) -> pd.DataFrame: result: pd.Dataframe transformed dataframe """ - result = df.copy() + result = df segments = sorted(set(df.columns.get_level_values("segment"))) all_transformed_features = [] features = df.loc[:, pd.IndexSlice[:, self.in_column]] diff --git a/etna/transforms/math/scalers.py b/etna/transforms/math/scalers.py index 876a20c87..247622bb8 100644 --- a/etna/transforms/math/scalers.py +++ b/etna/transforms/math/scalers.py @@ -63,7 +63,7 @@ def __init__( self.with_std = with_std super().__init__( in_column=in_column, - transformer=StandardScaler(with_mean=self.with_mean, with_std=self.with_std, copy=True), + transformer=StandardScaler(with_mean=self.with_mean, with_std=self.with_std, copy=False), out_column=out_column, inplace=inplace, mode=mode, @@ -140,7 +140,7 @@ def __init__( with_scaling=self.with_scaling, quantile_range=self.quantile_range, unit_variance=self.unit_variance, - copy=True, + copy=False, ), mode=mode, ) @@ -199,7 +199,7 @@ def __init__( in_column=in_column, inplace=inplace, out_column=out_column, - transformer=MinMaxScaler(feature_range=self.feature_range, clip=self.clip, copy=True), + transformer=MinMaxScaler(feature_range=self.feature_range, clip=self.clip, copy=False), mode=mode, ) @@ -248,7 +248,7 @@ def __init__( in_column=in_column, inplace=inplace, out_column=out_column, - transformer=MaxAbsScaler(copy=True), + transformer=MaxAbsScaler(copy=False), mode=mode, ) diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index 21aa438d9..ac7cfe37a 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -224,6 +224,7 @@ def inconsistent_target_components_names_duplication_df(target_components_df): @pytest.fixture def inconsistent_target_components_values_df(target_components_df): + target_components_df.loc[target_components_df.index[-1], pd.IndexSlice["1", "target_component_a"]] = 100 target_components_df.loc[target_components_df.index[10], pd.IndexSlice["1", "target_component_a"]] = 100 return target_components_df diff --git a/tests/test_models/test_simple_models.py b/tests/test_models/test_simple_models.py index 3d30231de..3a31c4855 100644 --- a/tests/test_models/test_simple_models.py +++ b/tests/test_models/test_simple_models.py @@ -729,3 +729,48 @@ def test_deadline_model_forecast_correct_with_big_horizons(two_month_ts): ) def test_save_load(model, example_tsds): assert_model_equals_loaded_original(model=model, ts=example_tsds, transforms=[], horizon=3) + + +@pytest.mark.parametrize("method_name", ("forecast", "predict")) +@pytest.mark.parametrize( + "window, seasonality, expected_components_names", + ((1, 7, ["target_component_lag_7"]), (2, 7, ["target_component_lag_7", "target_component_lag_14"])), +) +def test_sma_model_predict_components_correct_names( + example_tsds, method_name, window, seasonality, expected_components_names, horizon=10 +): + model = SeasonalMovingAverageModel(window=window, seasonality=seasonality) + model.fit(example_tsds) + to_call = getattr(model, method_name) + forecast = to_call(ts=example_tsds, prediction_size=horizon, return_components=True) + assert sorted(forecast.target_components_names) == sorted(expected_components_names) + + +@pytest.mark.parametrize("method_name", ("forecast", "predict")) +@pytest.mark.parametrize("window", (1, 3, 5)) +@pytest.mark.parametrize("seasonality", (1, 7, 14)) +def test_sma_model_predict_components_sum_up_to_target(example_tsds, method_name, window, seasonality, horizon=10): + model = SeasonalMovingAverageModel(window=window, seasonality=seasonality) + model.fit(example_tsds) + to_call = getattr(model, method_name) + forecast = to_call(ts=example_tsds, prediction_size=horizon, return_components=True) + + target = forecast.to_pandas(features=["target"]) + target_components_df = forecast.get_target_components() + np.testing.assert_allclose(target.values, target_components_df.sum(axis=1, level="segment").values) + + +@pytest.mark.parametrize( + "method_name, expected_values", + (("forecast", [[44, 4], [45, 6], [44, 4]]), ("predict", [[44, 4], [45, 6], [46, 8]])), +) +def test_sma_model_predict_components_correct( + simple_df, method_name, expected_values, window=1, seasonality=2, horizon=3 +): + model = SeasonalMovingAverageModel(window=window, seasonality=seasonality) + model.fit(simple_df) + to_call = getattr(model, method_name) + forecast = to_call(ts=simple_df, prediction_size=horizon, return_components=True) + + target_components_df = forecast.get_target_components() + np.testing.assert_allclose(target_components_df.values, expected_values)