Skip to content

Implement forecast decomposition for SMA-based models #1180

Merged
merged 17 commits into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased
### Added
- Forecast decomposition for `SeasonalMovingAverageModel`([#1180](https://github.com/tinkoff-ai/etna/pull/1180))
- Target components logic into base classes of pipelines ([#1173](https://github.com/tinkoff-ai/etna/pull/1173))
- Method `predict_components` for forecast decomposition in `_SklearnAdapter` and `_LinearAdapter` for linear models ([#1164](https://github.com/tinkoff-ai/etna/pull/1164))
- Target components logic into base classes of models ([#1158](https://github.com/tinkoff-ai/etna/pull/1158))
Expand Down
2 changes: 1 addition & 1 deletion etna/datasets/tsdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1167,7 +1167,7 @@ def add_target_components(self, target_components_df: pd.DataFrame):
)

components_sum = target_components_df.sum(axis=1, level="segment")
if not np.array_equal(components_sum.values, self[..., "target"].values):
if not np.allclose(components_sum.values, self[..., "target"].values):
raise ValueError("Components don't sum up to target!")

self._target_components_names = components_names
Expand Down
5 changes: 5 additions & 0 deletions etna/models/moving_average.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ class MovingAverageModel(SeasonalMovingAverageModel):
y_{t} = \\frac{\\sum_{i=1}^{n} y_{t-i} }{n},

where :math:`n` is window size.

Notes
-----
This model supports in-sample and out-of-sample prediction decomposition.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This model supports in-sample and out-of-sample prediction decomposition.

I'm not sure that it is an implementation detail and should be in Notes.

Prediction components are corresponding target lags with weights of 1/window

May it could be better to write :math:1 / window

Prediction components are corresponding target lags with weights of :math:`1/window`.
"""

def __init__(self, window: int = 5):
Expand Down
5 changes: 5 additions & 0 deletions etna/models/naive.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ class NaiveModel(SeasonalMovingAverageModel):
y_{t} = y_{t-s},

where :math:`s` is lag.

Notes
-----
This model supports in-sample and out-of-sample prediction decomposition.
Prediction component here is the corresponding target lag.
"""

def __init__(self, lag: int = 1):
Expand Down
78 changes: 60 additions & 18 deletions etna/models/seasonal_ma.py
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ class SeasonalMovingAverageModel(
y_{t} = \\frac{\\sum_{i=1}^{n} y_{t-is} }{n},

where :math:`s` is seasonality, :math:`n` is window size (how many history values are taken for forecast).

Notes
-----
This model supports in-sample and out-of-sample prediction decomposition.
Prediction components are corresponding target lags with weights of :math:`1/window`.
"""

def __init__(self, window: int = 5, seasonality: int = 7):
Expand Down Expand Up @@ -81,7 +86,41 @@ def _validate_context(self, df: pd.DataFrame, prediction_size: int):
"Given context isn't big enough, try to decrease context_size, prediction_size or increase length of given dataframe!"
)

def _forecast(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
def _predict_components(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
"""Estimate forecast components.

Parameters
----------
df:
DatаFrame with target, containing lags that was used to make a prediction
prediction_size:
Number of last timestamps to leave after making prediction.
Previous timestamps will be used as a context.

Returns
-------
:
DataFrame with target components
"""
self._validate_context(df=df, prediction_size=prediction_size)

all_transformed_features = []
segments = sorted(set(df.columns.get_level_values("segment")))
lags = list(range(self.seasonality, self.context_size + 1, self.seasonality))

target = df.loc[:, pd.IndexSlice[:, "target"]]
for lag in lags:
brsnw250 marked this conversation as resolved.
Show resolved Hide resolved
transformed_features = target.shift(lag)
transformed_features.columns = pd.MultiIndex.from_product(
[segments, [f"target_component_lag_{lag}"]], names=("segment", "feature")
)
all_transformed_features.append(transformed_features)

target_components_df = pd.concat(all_transformed_features, axis=1) / self.window
target_components_df = target_components_df.iloc[-prediction_size:]
return target_components_df

def _forecast(self, df: pd.DataFrame, prediction_size: int) -> np.ndarray:
"""Make autoregressive forecasts on a wide dataframe."""
self._validate_context(df=df, prediction_size=prediction_size)

Expand All @@ -96,10 +135,8 @@ def _forecast(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
for i in range(self.context_size, len(res)):
res[i] = res[i - self.context_size : i : self.seasonality].mean(axis=0)

df = df.iloc[-prediction_size:]
y_pred = res[-prediction_size:]
df.loc[:, pd.IndexSlice[:, "target"]] = y_pred
return df
return y_pred

def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset:
"""Make autoregressive forecasts.
Expand Down Expand Up @@ -128,15 +165,19 @@ def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool
ValueError:
if forecast context contains NaNs
"""
if return_components:
raise NotImplementedError("This mode isn't currently implemented!")

df = ts.to_pandas()
new_df = self._forecast(df=df, prediction_size=prediction_size)
ts.df = new_df
y_pred = self._forecast(df=df, prediction_size=prediction_size)
ts.df = ts.df.iloc[-prediction_size:]
ts.df.loc[:, pd.IndexSlice[:, "target"]] = y_pred

if return_components:
# We use predicted targets as lags in autoregressive style
df.loc[df.index[-prediction_size:], pd.IndexSlice[:, "target"]] = y_pred
target_components_df = self._predict_components(df=df, prediction_size=prediction_size)
ts.add_target_components(target_components_df=target_components_df)
return ts

def _predict(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
def _predict(self, df: pd.DataFrame, prediction_size: int) -> np.ndarray:
"""Make predictions on a wide dataframe using true values as autoregression context."""
self._validate_context(df=df, prediction_size=prediction_size)

Expand All @@ -151,10 +192,8 @@ def _predict(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
for res_idx, context_idx in enumerate(range(self.context_size, len(context))):
res[res_idx] = context[context_idx - self.context_size : context_idx : self.seasonality].mean(axis=0)

df = df.iloc[-prediction_size:]
y_pred = res[-prediction_size:]
df.loc[:, pd.IndexSlice[:, "target"]] = y_pred
return df
return y_pred

def predict(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset:
"""Make predictions using true values as autoregression context (teacher forcing).
Expand Down Expand Up @@ -183,12 +222,15 @@ def predict(self, ts: TSDataset, prediction_size: int, return_components: bool =
ValueError:
if forecast context contains NaNs
"""
if return_components:
raise NotImplementedError("This mode isn't currently implemented!")

df = ts.to_pandas()
new_df = self._predict(df=df, prediction_size=prediction_size)
ts.df = new_df
y_pred = self._predict(df=df, prediction_size=prediction_size)
ts.df = ts.df.iloc[-prediction_size:]
ts.df.loc[:, pd.IndexSlice[:, "target"]] = y_pred

if return_components:
# We use true targets as lags
target_components_df = self._predict_components(df=df, prediction_size=prediction_size)
Mr-Geekman marked this conversation as resolved.
Show resolved Hide resolved
ts.add_target_components(target_components_df=target_components_df)
return ts


Expand Down
2 changes: 1 addition & 1 deletion etna/transforms/math/differencing.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ def _fit(self, df: pd.DataFrame) -> "DifferencingTransform":
if NaNs are present inside the segment
"""
# this is made because transforms of high order may need some columns created by transforms of lower order
result_df = df.copy()
result_df = df
for transform in self._differencing_transforms:
result_df = transform._fit_transform(result_df)
self._fit_segments = df.columns.get_level_values("segment").unique().tolist()
Expand Down
2 changes: 1 addition & 1 deletion etna/transforms/math/lags.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def _transform(self, df: pd.DataFrame) -> pd.DataFrame:
result: pd.Dataframe
transformed dataframe
"""
result = df.copy()
result = df
segments = sorted(set(df.columns.get_level_values("segment")))
all_transformed_features = []
features = df.loc[:, pd.IndexSlice[:, self.in_column]]
Expand Down
8 changes: 4 additions & 4 deletions etna/transforms/math/scalers.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def __init__(
self.with_std = with_std
super().__init__(
in_column=in_column,
transformer=StandardScaler(with_mean=self.with_mean, with_std=self.with_std, copy=True),
transformer=StandardScaler(with_mean=self.with_mean, with_std=self.with_std, copy=False),
out_column=out_column,
inplace=inplace,
mode=mode,
Expand Down Expand Up @@ -140,7 +140,7 @@ def __init__(
with_scaling=self.with_scaling,
quantile_range=self.quantile_range,
unit_variance=self.unit_variance,
copy=True,
copy=False,
),
mode=mode,
)
Expand Down Expand Up @@ -199,7 +199,7 @@ def __init__(
in_column=in_column,
inplace=inplace,
out_column=out_column,
transformer=MinMaxScaler(feature_range=self.feature_range, clip=self.clip, copy=True),
transformer=MinMaxScaler(feature_range=self.feature_range, clip=self.clip, copy=False),
mode=mode,
)

Expand Down Expand Up @@ -248,7 +248,7 @@ def __init__(
in_column=in_column,
inplace=inplace,
out_column=out_column,
transformer=MaxAbsScaler(copy=True),
transformer=MaxAbsScaler(copy=False),
mode=mode,
)

Expand Down
1 change: 1 addition & 0 deletions tests/test_datasets/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ def inconsistent_target_components_names_duplication_df(target_components_df):

@pytest.fixture
def inconsistent_target_components_values_df(target_components_df):
target_components_df.loc[target_components_df.index[-1], pd.IndexSlice["1", "target_component_a"]] = 100
target_components_df.loc[target_components_df.index[10], pd.IndexSlice["1", "target_component_a"]] = 100
return target_components_df

Expand Down
45 changes: 45 additions & 0 deletions tests/test_models/test_simple_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,3 +729,48 @@ def test_deadline_model_forecast_correct_with_big_horizons(two_month_ts):
)
def test_save_load(model, example_tsds):
assert_model_equals_loaded_original(model=model, ts=example_tsds, transforms=[], horizon=3)


@pytest.mark.parametrize("method_name", ("forecast", "predict"))
@pytest.mark.parametrize(
"window, seasonality, expected_components_names",
((1, 7, ["target_component_lag_7"]), (2, 7, ["target_component_lag_7", "target_component_lag_14"])),
)
def test_sma_model_predict_components_correct_names(
example_tsds, method_name, window, seasonality, expected_components_names, horizon=10
):
model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
model.fit(example_tsds)
to_call = getattr(model, method_name)
forecast = to_call(ts=example_tsds, prediction_size=horizon, return_components=True)
assert sorted(forecast.target_components_names) == sorted(expected_components_names)


@pytest.mark.parametrize("method_name", ("forecast", "predict"))
@pytest.mark.parametrize("window", (1, 3, 5))
@pytest.mark.parametrize("seasonality", (1, 7, 14))
def test_sma_model_predict_components_sum_up_to_target(example_tsds, method_name, window, seasonality, horizon=10):
model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
model.fit(example_tsds)
to_call = getattr(model, method_name)
forecast = to_call(ts=example_tsds, prediction_size=horizon, return_components=True)

target = forecast.to_pandas(features=["target"])
target_components_df = forecast.get_target_components()
np.testing.assert_allclose(target.values, target_components_df.sum(axis=1, level="segment").values)


@pytest.mark.parametrize(
"method_name, expected_values",
(("forecast", [[44, 4], [45, 6], [44, 4]]), ("predict", [[44, 4], [45, 6], [46, 8]])),
)
def test_sma_model_predict_components_correct(
brsnw250 marked this conversation as resolved.
Show resolved Hide resolved
simple_df, method_name, expected_values, window=1, seasonality=2, horizon=3
):
model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
model.fit(simple_df)
to_call = getattr(model, method_name)
forecast = to_call(ts=simple_df, prediction_size=horizon, return_components=True)

target_components_df = forecast.get_target_components()
np.testing.assert_allclose(target_components_df.values, expected_values)