Skip to content

Fix outliers transforms on future with gap #1147

Merged
merged 4 commits into from
Mar 3, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fix inference tests on new segments for `DeepARModel` and `TFTModel` ([#1109](https://github.com/tinkoff-ai/etna/pull/1109))
- Fix alignment during forecasting in new NNs, add validation of context size during forecasting in new NNs, add validation of batch in `MLPNet` ([#1108](https://github.com/tinkoff-ai/etna/pull/1108))
- Fix `MeanSegmentEncoderTransform` to work with subset of segments and raise error on new segments ([#1104](https://github.com/tinkoff-ai/etna/pull/1104))
-
- Fix outliers transforms on future with gap ([#1147](https://github.com/tinkoff-ai/etna/pull/1147))
- Fix `SegmentEncoderTransform` to work with subset of segments and raise error on new segments ([#1103](https://github.com/tinkoff-ai/etna/pull/1103))
- Fix `SklearnTransform` in per-segment mode to work on subset of segments and raise error on new segments ([#1107](https://github.com/tinkoff-ai/etna/pull/1107))
- Fix `OutliersTransform` and its children to raise error on new segments ([#1139](https://github.com/tinkoff-ai/etna/pull/1139))
Expand Down
4 changes: 3 additions & 1 deletion etna/transforms/outliers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,9 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
segments = df.columns.get_level_values("segment").unique().tolist()
self._validate_segments(segments)
for segment in segments:
result_df.loc[self.outliers_timestamps[segment], pd.IndexSlice[segment, self.in_column]] = np.NaN
# to locate only present indices
index_to_check = result_df.index.intersection(self.outliers_timestamps[segment])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May be outliers_timestamps_segment or sth like this

result_df.loc[index_to_check, pd.IndexSlice[segment, self.in_column]] = np.NaN
return result_df

def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ line_length = 120
minversion = "6.0"
doctest_optionflags = "NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL NUMBER"
filterwarnings = [
"error",
"ignore: Torchmetrics v0.9 introduced a new argument class property called `full_state_update` that",
"ignore: TSDataset freq can't be inferred",
"ignore: test_size, test_start and test_end cannot be",
Expand Down
20 changes: 4 additions & 16 deletions tests/test_transforms/test_inference/test_inverse_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -1391,6 +1391,10 @@ def _test_inverse_transform_future_with_target(
"ts_to_fill",
{},
),
# outliers
(DensityOutliersTransform(in_column="target"), "ts_with_outliers", {}),
(MedianOutliersTransform(in_column="target"), "ts_with_outliers", {}),
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers", {}),
# timestamp
(
DateFlagsTransform(out_column="res"),
Expand Down Expand Up @@ -1448,22 +1452,6 @@ def test_inverse_transform_future_with_target_fail_resample(
ts = request.getfixturevalue(dataset_name)
self._test_inverse_transform_future_with_target(ts, transform, expected_changes=expected_changes)

@to_be_fixed(raises=Exception)
@pytest.mark.parametrize(
"transform, dataset_name, expected_changes",
[
# outliers
(DensityOutliersTransform(in_column="target"), "ts_with_outliers", {}),
(MedianOutliersTransform(in_column="target"), "ts_with_outliers", {}),
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers", {}),
],
)
def test_inverse_transform_future_with_target_failed_error(
self, transform, dataset_name, expected_changes, request
):
ts = request.getfixturevalue(dataset_name)
self._test_inverse_transform_future_with_target(ts, transform, expected_changes=expected_changes)


class TestInverseTransformFutureWithoutTarget:
"""Test inverse transform on future dataset with unknown target.
Expand Down
18 changes: 4 additions & 14 deletions tests/test_transforms/test_inference/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -1255,6 +1255,10 @@ def _test_transform_future_with_target(self, ts, transform, expected_changes, ga
"ts_to_fill",
{},
),
# outliers
(DensityOutliersTransform(in_column="target"), "ts_with_outliers", {}),
(MedianOutliersTransform(in_column="target"), "ts_with_outliers", {}),
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers", {}),
# timestamp
(
DateFlagsTransform(out_column="res"),
Expand All @@ -1279,20 +1283,6 @@ def test_transform_future_with_target(self, transform, dataset_name, expected_ch
ts = request.getfixturevalue(dataset_name)
self._test_transform_future_with_target(ts, transform, expected_changes=expected_changes)

@to_be_fixed(raises=Exception)
@pytest.mark.parametrize(
"transform, dataset_name, expected_changes",
[
# outliers
(DensityOutliersTransform(in_column="target"), "ts_with_outliers", {}),
(MedianOutliersTransform(in_column="target"), "ts_with_outliers", {}),
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers", {}),
],
)
def test_transform_future_with_target_failed_error(self, transform, dataset_name, expected_changes, request):
ts = request.getfixturevalue(dataset_name)
self._test_transform_future_with_target(ts, transform, expected_changes=expected_changes)


class TestTransformFutureWithoutTarget:
"""Test transform on future dataset with unknown target.
Expand Down