From 97f31db5895838dff092e88384bc240de36f75ee Mon Sep 17 00:00:00 2001 From: Lyubov Yamshchikova <43475193+YamLyubov@users.noreply.github.com> Date: Tue, 22 Aug 2023 17:19:52 +0300 Subject: [PATCH] Replace exogenous example (#1108) * Replace exogenous example * Remove print --- .../time_series_forecasting/exogenous.py | 154 +++++++----------- fedot/api/main.py | 2 +- fedot/core/pipelines/pipeline.py | 5 - 3 files changed, 57 insertions(+), 104 deletions(-) diff --git a/examples/advanced/time_series_forecasting/exogenous.py b/examples/advanced/time_series_forecasting/exogenous.py index 9f3a191632..a7788364a0 100644 --- a/examples/advanced/time_series_forecasting/exogenous.py +++ b/examples/advanced/time_series_forecasting/exogenous.py @@ -1,64 +1,23 @@ -import timeit +import os import warnings import numpy as np import pandas as pd -from matplotlib import pyplot as plt -from sklearn.metrics import mean_squared_error, mean_absolute_error +from fedot.api.main import Fedot from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup from fedot.core.data.multi_modal import MultiModalData -from fedot.core.pipelines.node import PipelineNode -from fedot.core.pipelines.pipeline import Pipeline +from fedot.core.pipelines.pipeline_builder import PipelineBuilder from fedot.core.repository.dataset_types import DataTypesEnum from fedot.core.repository.tasks import Task, TsForecastingParams, TaskTypesEnum from fedot.core.utils import fedot_project_root -from fedot.core.utils import set_random_seed warnings.filterwarnings('ignore') +np.random.seed(2020) -def make_forecast(pipeline, train: InputData, predict: InputData, - train_exog: InputData, predict_exog: InputData): - """ - Function for predicting values in a time series - - :return predicted_values: numpy array, forecast of model - """ - - # Fit it - start_time = timeit.default_timer() - - second_node_name = 'exog_ts' - - if train_exog is None: - second_node_name = 'data_source_ts/2' - train_exog = train - predict_exog = predict - - train_dataset = MultiModalData({ - 'data_source_ts/1': train, - second_node_name: train_exog}) - - predict_dataset = MultiModalData({ - 'data_source_ts/1': predict, - second_node_name: predict_exog}) - - pipeline.fit_from_scratch(train_dataset) - amount_of_seconds = timeit.default_timer() - start_time - - print(f'\nIt takes {amount_of_seconds:.2f} seconds to train pipeline\n') - - # Predict - predicted_values = pipeline.predict(predict_dataset) - predicted_values = predicted_values.predict - - return predicted_values - - -def run_exogenous_experiment(path_to_file, len_forecast=250, with_exog=True, - visualization=False) -> None: +def run_exogenous_experiment(path_to_file, len_forecast=250, with_exog=True, visualization=False) -> np.array: """ Function with example how time series forecasting can be made with using exogenous features @@ -68,72 +27,71 @@ def run_exogenous_experiment(path_to_file, len_forecast=250, with_exog=True, :param visualization: is it needed to make visualizations """ + # Read the file df = pd.read_csv(path_to_file) time_series = np.array(df['Level']) exog_variable = np.array(df['Neighboring level']) task = Task(TaskTypesEnum.ts_forecasting, TsForecastingParams(forecast_length=len_forecast)) - # Source time series - train_input, predict_input = train_test_data_setup(InputData(idx=range(len(time_series)), - features=time_series, - target=time_series, - task=task, - data_type=DataTypesEnum.ts)) - - # Exogenous time series - predict_input_exog = InputData(idx=np.arange(len(exog_variable)), - features=exog_variable, target=time_series, - task=task, data_type=DataTypesEnum.ts) - - train_input_exog, predict_input_exog = train_test_data_setup(predict_input_exog) + valiadion_blocks = 2 - if with_exog is True: - # Example with exogenous time series - node_source = PipelineNode('data_source_ts/1') - node_lagged = PipelineNode('lagged', nodes_from=[node_source]) + # Target time series for lagged transformation + train_lagged, predict_lagged = train_test_data_setup(InputData(idx=np.arange(len(time_series)), + features=time_series, + target=time_series, + task=task, + data_type=DataTypesEnum.ts), + validation_blocks=valiadion_blocks) - node_exog = PipelineNode('exog_ts') - - node_final = PipelineNode('ridge', nodes_from=[node_lagged, node_exog]) - pipeline = Pipeline(node_final) + # Exogenous time series + train_exog, predict_exog = train_test_data_setup(InputData(idx=np.arange(len(exog_variable)), + features=exog_variable, + target=time_series, + task=task, + data_type=DataTypesEnum.ts), + validation_blocks=valiadion_blocks) + + if with_exog: + train_dataset = MultiModalData({ + 'lagged': train_lagged, + 'exog_ts': train_exog + }) + + predict_dataset = MultiModalData({ + 'lagged': predict_lagged, + 'exog_ts': predict_exog + }) + + # Create a pipeline with different data sources in th nodes + pipeline = PipelineBuilder().add_node('lagged', 0).add_node('exog_ts', 1).join_branches('ridge').build() else: - # Simple example without exogenous time series - node_source_1 = PipelineNode('data_source_ts/1') - node_source_2 = PipelineNode('data_source_ts/2') - - node_lagged_1 = PipelineNode('lagged', nodes_from=[node_source_1]) - node_lagged_2 = PipelineNode('lagged', nodes_from=[node_source_2]) + train_dataset = train_lagged + predict_dataset = predict_lagged - node_ridge_1 = PipelineNode('ridge', nodes_from=[node_lagged_1]) - node_ridge_2 = PipelineNode('ridge', nodes_from=[node_lagged_2]) - node_final = PipelineNode('ridge', nodes_from=[node_ridge_1, node_ridge_2]) - train_input_exog = None - predict_input_exog = None - pipeline = Pipeline(node_final) - - predicted = make_forecast(pipeline, train_input, predict_input, train_input_exog, predict_input_exog) - - predicted = np.ravel(np.array(predicted)) - test_data = np.ravel(predict_input.target) + # Simple example without exogenous time series + pipeline = PipelineBuilder().add_sequence('lagged', 'ridge').build() - print(f'Predicted values: {predicted[:5]}') - print(f'Actual values: {test_data[:5]}') + # Fit it + fedot = Fedot(problem='ts_forecasting', + task_params=task.task_params, + timeout=10, + initial_assumption=pipeline, + max_pipeline_fit_time=1, + n_jobs=-1) + fedot.fit(train_dataset) - mse_before = mean_squared_error(test_data, predicted, squared=False) - mae_before = mean_absolute_error(test_data, predicted) - print(f'RMSE - {mse_before:.4f}') - print(f'MAE - {mae_before:.4f}\n') + # Predict + predicted = fedot.predict(predict_dataset, validation_blocks=valiadion_blocks) + print(fedot.get_metrics(metric_names='mae', validation_blocks=valiadion_blocks)) if visualization: - plt.plot(range(0, len(time_series)), time_series, label='Actual time series') - plt.plot(range(len(train_input.target), len(time_series)), predicted, label='Forecast') - plt.legend() - plt.grid() - plt.show() + fedot.current_pipeline.show() + # Plot predictions and true values + fedot.plot_prediction(target='lagged') + return predicted -if __name__ == '__main__': - set_random_seed(2020) - data_path = fedot_project_root().joinpath('examples/data/ts', 'ts_sea_level.csv') +if __name__ == '__main__': + data_path = os.path.join(f'{fedot_project_root()}', 'examples/data/ts', 'ts_sea_level.csv') run_exogenous_experiment(path_to_file=data_path, len_forecast=250, with_exog=True, visualization=True) diff --git a/fedot/api/main.py b/fedot/api/main.py index 9ecdad0968..26132652c5 100644 --- a/fedot/api/main.py +++ b/fedot/api/main.py @@ -493,7 +493,7 @@ def get_metrics(self, if target is not None: if self.test_data is None: - self.test_data = InputData(idx=range(len(self.prediction.predict)), + self.test_data = InputData(idx=np.arange(len(self.prediction.predict)), features=None, target=target[:len(self.prediction.predict)], task=self.train_data.task, diff --git a/fedot/core/pipelines/pipeline.py b/fedot/core/pipelines/pipeline.py index a9a608c2c9..beba96c5ed 100644 --- a/fedot/core/pipelines/pipeline.py +++ b/fedot/core/pipelines/pipeline.py @@ -380,11 +380,6 @@ def _assign_data_to_nodes(self, input_data: Union[InputData, MultiModalData]) -> node.node_data = input_data[node.operation.operation_type] node.direct_set = True else: - print(f'Node info: operation={node.operation}, operation_type{node.operation.operation_type},' - f' input_data{input_data}, all_nodes={self.nodes},' - f' pipeline_nodes={[node for node in self.nodes if isinstance(node, PipelineNode)]},', - f' true_node_types={[type(node) for node in self.nodes]},', - f' primary_nodes={[node for node in self.nodes if node.is_primary]}') raise ValueError(f'No data for primary node {node}') return None return input_data