Skip to content

Fix performance of DeepARModel and TFTModel #1322

Merged
merged 3 commits into from
Jul 24, 2023
Merged

Conversation

Mr-Geekman
Copy link
Contributor

@Mr-Geekman Mr-Geekman commented Jul 20, 2023

Before submitting (must do checklist)

  • Did you read the contribution guide?
  • Did you update the docs? We use Numpy format for all the methods and classes.
  • Did you write any new necessary tests?
  • Did you update the CHANGELOG?

Proposed Changes

Optmize creation of time_idx feature.

Closing issues

@Mr-Geekman Mr-Geekman self-assigned this Jul 20, 2023
@Mr-Geekman
Copy link
Contributor Author

Script for old version (1.15.1):

import time
import random

import torch
import pandas as pd
import numpy as np
from loguru import logger

from etna.datasets.tsdataset import TSDataset
from etna.datasets import generate_ar_df
from etna.pipeline import Pipeline
from etna.metrics import SMAPE, MAPE, MAE
from etna.transforms import DateFlagsTransform
from etna.transforms import PytorchForecastingTransform

from etna.models.nn import TFTModel


HORIZON = 7


def generate_tsdataset(dataset_config) -> TSDataset:
    periods, n_segments, regressors, exogs, horizon = (
        dataset_config["periods"],
        dataset_config["n_segments"],
        dataset_config["regressors"],
        dataset_config["exogs"],
        dataset_config["horizon"],
    )
    df = generate_ar_df(
        periods=periods,
        start_time="2021-06-01",
        n_segments=n_segments,
        freq="D",
    )

    df_exog = None
    if exogs:
        df_exog = generate_ar_df(
            periods=periods + horizon,
            start_time="2021-06-01",
            n_segments=n_segments,
            freq="D",
        )
        df_exog = df_exog.rename(columns={"target": "exog"})
        if regressors:
            df_regressors = generate_ar_df(
                periods=periods + horizon,
                start_time="2021-06-01",
                n_segments=n_segments,
                freq="D",
            )
            df_regressors = df_regressors.rename(columns={"target": "regressor"})
            df_exog = pd.concat((df_exog, df_regressors[["regressor"]]), axis=1)
        df_exog = TSDataset.to_dataset(df_exog)

    df = TSDataset.to_dataset(df)
    ts = TSDataset(
        df=df,
        freq="D",
        df_exog=df_exog,
        known_future=["regressor"] if regressors else (),
    )
    return ts


def set_seed(seed: int = 42):
    """Set random seed for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


def main():
    set_seed()

    # original_df = pd.read_csv("examples/data/example_dataset.csv")
    # df = TSDataset.to_dataset(original_df)
    # ts = TSDataset(df, freq="D")

    data_config = {
        "n_segments": 100,
        "periods": 500,
        "exogs": True,
        "regressors": True,
        "horizon": 14,
    }
    ts = generate_tsdataset(data_config)

    set_seed()

    dft = DateFlagsTransform(day_number_in_week=True, day_number_in_month=False, out_column="regressor_dateflag")
    pft = PytorchForecastingTransform(
        max_encoder_length=21,
        min_encoder_length=21,
        max_prediction_length=HORIZON,
        time_varying_known_reals=["time_idx"],
        time_varying_known_categoricals=["regressor_dateflag_day_number_in_week"],
        time_varying_unknown_reals=["target"],
        static_categoricals=["segment"],
        target_normalizer=None,
    )
    model_tft = TFTModel(trainer_kwargs=dict(max_epochs=1))
    transforms = [dft, pft]

    pipeline_tft = Pipeline(model=model_tft, transforms=transforms, horizon=HORIZON)

    start_time = time.perf_counter()
    metrics_tft, forecast_tft, fold_info_tft = pipeline_tft.backtest(
        ts, metrics=[SMAPE(), MAPE(), MAE()], n_folds=3, n_jobs=1
    )
    run_time = time.perf_counter() - start_time

    logger.info(f"Run time: {run_time:.3f}")
    logger.info(f"Metrics: {metrics_tft['MAE'].mean():.3f}")


if __name__ == "__main__":
    main()

Results:

  • Time: 178.9
  • MAE: 16.682

@Mr-Geekman
Copy link
Contributor Author

Script for new version:

import time
import random

import torch
import pandas as pd
import numpy as np
from loguru import logger

from etna.datasets.tsdataset import TSDataset
from etna.datasets import generate_ar_df
from etna.pipeline import Pipeline
from etna.metrics import SMAPE, MAPE, MAE
from etna.transforms import DateFlagsTransform
from etna.models.nn.utils import PytorchForecastingDatasetBuilder

from etna.models.nn import TFTModel


HORIZON = 7


def generate_tsdataset(dataset_config) -> TSDataset:
    periods, n_segments, regressors, exogs, horizon = (
        dataset_config["periods"],
        dataset_config["n_segments"],
        dataset_config["regressors"],
        dataset_config["exogs"],
        dataset_config["horizon"],
    )
    df = generate_ar_df(
        periods=periods,
        start_time="2021-06-01",
        n_segments=n_segments,
        freq="D",
    )

    df_exog = None
    if exogs:
        df_exog = generate_ar_df(
            periods=periods + horizon,
            start_time="2021-06-01",
            n_segments=n_segments,
            freq="D",
        )
        df_exog = df_exog.rename(columns={"target": "exog"})
        if regressors:
            df_regressors = generate_ar_df(
                periods=periods + horizon,
                start_time="2021-06-01",
                n_segments=n_segments,
                freq="D",
            )
            df_regressors = df_regressors.rename(columns={"target": "regressor"})
            df_exog = pd.concat((df_exog, df_regressors[["regressor"]]), axis=1)
        df_exog = TSDataset.to_dataset(df_exog)

    df = TSDataset.to_dataset(df)
    ts = TSDataset(
        df=df,
        freq="D",
        df_exog=df_exog,
        known_future=["regressor"] if regressors else (),
    )
    return ts


def set_seed(seed: int = 42):
    """Set random seed for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


def main():
    set_seed()

    # original_df = pd.read_csv("examples/data/example_dataset.csv")
    # df = TSDataset.to_dataset(original_df)
    # ts = TSDataset(df, freq="D")

    data_config = {
        "n_segments": 100,
        "periods": 500,
        "exogs": True,
        "regressors": True,
        "horizon": 14,
    }
    ts = generate_tsdataset(data_config)

    set_seed()

    dft = DateFlagsTransform(day_number_in_week=True, day_number_in_month=False, out_column="regressor_dateflag")
    model_tft = TFTModel(
        dataset_builder=PytorchForecastingDatasetBuilder(
            max_encoder_length=21,
            min_encoder_length=21,
            max_prediction_length=HORIZON,
            time_varying_known_reals=["time_idx"],
            time_varying_known_categoricals=["regressor_dateflag_day_number_in_week"],
            time_varying_unknown_reals=["target"],
            static_categoricals=["segment"],
            target_normalizer=None,
        ),
        trainer_params=dict(max_epochs=1),
    )
    transforms = [dft]

    pipeline_tft = Pipeline(model=model_tft, transforms=transforms, horizon=HORIZON)

    start_time = time.perf_counter()
    metrics_tft, forecast_tft, fold_info_tft = pipeline_tft.backtest(
        ts, metrics=[SMAPE(), MAPE(), MAE()], n_folds=3, n_jobs=1
    )
    run_time = time.perf_counter() - start_time

    logger.info(f"Run time: {run_time:.3f}")
    logger.info(f"Metrics: {metrics_tft['MAE'].mean():.3f}")


if __name__ == "__main__":
    main()

Results:

  • Time: 167.4
  • MAE: 16.095

@codecov-commenter
Copy link

codecov-commenter commented Jul 20, 2023

Codecov Report

Merging #1322 (0f994a0) into master (75e8fc1) will increase coverage by 0.14%.
The diff coverage is 100.00%.

❗ Your organization is not using the GitHub App Integration. As a result you may experience degraded service beginning May 15th. Please install the Github App Integration for your organization. Read more.

@@            Coverage Diff             @@
##           master    #1322      +/-   ##
==========================================
+ Coverage   88.95%   89.09%   +0.14%     
==========================================
  Files         193      204      +11     
  Lines       12319    12638     +319     
==========================================
+ Hits        10958    11260     +302     
- Misses       1361     1378      +17     
Impacted Files Coverage Δ
etna/models/nn/utils.py 85.61% <100.00%> (+0.40%) ⬆️

... and 11 files with indirect coverage changes

📣 We’re building smart automated test selection to slash your CI/CD build times. Learn more

@github-actions
Copy link

@github-actions github-actions bot temporarily deployed to pull request July 20, 2023 11:25 Inactive
@alex-hse-repository alex-hse-repository merged commit cd93027 into master Jul 24, 2023
12 checks passed
This pull request was closed.
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants