Skip to content

Commit

Permalink
Fix random FEDOT results with set seed (#1143)
Browse files Browse the repository at this point in the history
* Fix random list elements order in case of set -> list transform

* Add test of Fedot forecast reproducing

* Fix random generator in hypeparams

* Fix random models

* Fix some random fit operation with RandomStateHandler

* "Fix RandomStateHandler in FEDOT"

* Change RandomStateHandler to ImplementationRandomStateHandler

* Fix ImplementationRandomStateHandler parameters

* Fix seed for torch

* Fix constant seed in ImplementationRandomStateHandler

* Fix test and random seed in CGRU
  • Loading branch information
kasyanovse authored Aug 22, 2023
1 parent 8cb0468 commit 09da40c
Show file tree
Hide file tree
Showing 17 changed files with 136 additions and 36 deletions.
2 changes: 1 addition & 1 deletion fedot/api/api_utils/presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def filter_operations_by_preset(self, data_type: Optional[DataTypesEnum] = None)
filtered_operations = set(available_operations).difference(set(excluded_tree))
available_operations = list(filtered_operations)

return available_operations
return sorted(available_operations)

@staticmethod
def new_operations_without_heavy(excluded_operations, available_operations) -> list:
Expand Down
2 changes: 1 addition & 1 deletion fedot/core/data/multi_modal.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def from_csv_time_series(cls,
df = get_df_from_csv(file_path, delimiter, index_col, possible_idx_keywords, columns_to_use=columns_to_use)
idx = df.index.to_numpy()
if not columns_to_use:
columns_to_use = list(set(df.columns) - {index_col})
columns_to_use = sorted(list(set(df.columns) - {index_col}))

if is_predict:
raise NotImplementedError(
Expand Down
5 changes: 4 additions & 1 deletion fedot/core/operations/evaluation/custom.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import warnings
from typing import Optional

from fedot.core.data.data import InputData, OutputData
from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy
from fedot.core.operations.evaluation.operation_implementations.models.custom_model import CustomModelImplementation
from fedot.core.operations.operation_parameters import OperationParameters
from fedot.utilities.random import ImplementationRandomStateHandler

warnings.filterwarnings("ignore", category=UserWarning)

Expand All @@ -24,7 +26,8 @@ def __init__(self, operation_type: Optional[str], params: Optional[OperationPara

def fit(self, train_data: InputData):
""" Fit method for custom strategy"""
self.operation_impl.fit(train_data)
with ImplementationRandomStateHandler(implementation=self.operation_impl):
self.operation_impl.fit(train_data)
return self.operation_impl

def predict(self, trained_operation, predict_data: InputData) -> OutputData:
Expand Down
12 changes: 6 additions & 6 deletions fedot/core/operations/evaluation/evaluation_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,12 +225,12 @@ def fit(self, train_data: InputData):

# Multi-output task or not
is_multi_target = is_multi_output_task(train_data)
if is_model_not_support_multi and is_multi_target:
# Manually wrap the regressor into multi-output model
operation_implementation = convert_to_multivariate_model(operation_implementation,
train_data)
else:
with ImplementationRandomStateHandler(implementation=operation_implementation):
with ImplementationRandomStateHandler(implementation=operation_implementation):
if is_model_not_support_multi and is_multi_target:
# Manually wrap the regressor into multi-output model
operation_implementation = convert_to_multivariate_model(operation_implementation,
train_data)
else:
operation_implementation.fit(train_data.features, train_data.target)
return operation_implementation

Expand Down
3 changes: 1 addition & 2 deletions fedot/core/operations/evaluation/gpu/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from golem.utilities.requirements_notificator import warn_requirement

from fedot.core.operations.operation_parameters import OperationParameters
from fedot.utilities.random import ImplementationRandomStateHandler

try:
from cuml import KMeans
Expand All @@ -16,8 +15,8 @@
from typing import Optional

from fedot.core.data.data import InputData, OutputData

from fedot.core.operations.evaluation.gpu.common import CuMLEvaluationStrategy
from fedot.utilities.random import ImplementationRandomStateHandler


class CumlClusteringStrategy(CuMLEvaluationStrategy):
Expand Down
3 changes: 1 addition & 2 deletions fedot/core/operations/evaluation/gpu/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

from golem.utilities.requirements_notificator import warn_requirement

from fedot.utilities.random import ImplementationRandomStateHandler

try:
import cudf
import cuml
Expand All @@ -25,6 +23,7 @@
from fedot.core.operations.evaluation.evaluation_interfaces import SkLearnEvaluationStrategy
from fedot.core.repository.operation_types_repository import OperationTypesRepository
from fedot.core.repository.tasks import TaskTypesEnum
from fedot.utilities.random import ImplementationRandomStateHandler


class CuMLEvaluationStrategy(SkLearnEvaluationStrategy):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def _apply_label_encoder(self, categorical_column: np.array, categorical_id: int
encoder_classes = list(column_encoder.classes_)

# If the column contains categories not previously encountered
for label in list(set(categorical_column)):
for label in sorted(list(set(categorical_column))):
if label not in encoder_classes:
encoder_classes.append(label)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(self, params: OperationParameters):
self.optimizer = self.optim_dict[params.get("optimizer")]
self.criterion = self.loss_dict[params.get("loss")]()
self.scheduler = MultiStepLR(self.optimizer, milestones=[30, 80], gamma=0.5)
self.seed = None

@property
def learning_rate(self) -> float:
Expand All @@ -64,6 +65,9 @@ def fit(self, train_data: InputData):
:param train_data: data with features, target and ids to process
"""
if self.seed is not None:
torch.manual_seed(self.seed)
self.model.seed = self.seed
self.model.init_linear(train_data.task.task_params.forecast_length)
self.model = self.model.to(self.device)
data_loader = self._create_dataloader(train_data)
Expand Down Expand Up @@ -174,12 +178,18 @@ def __init__(self,
self.gru = nn.GRU(cnn2_output_size, self.hidden_size, dropout=0.1)
self.hidden_cell = None
self.linear = None
self.seed = None

def init_linear(self, forecast_length):
self.linear = nn.Linear(self.hidden_size, forecast_length)

def init_hidden(self, batch_size, device):
self.hidden_cell = torch.randn(1, batch_size, self.hidden_size).to(device)
kwargs = dict()
if self.seed is not None:
g = torch.Generator()
g.manual_seed(self.seed)
kwargs['generator'] = g
self.hidden_cell = torch.randn(1, batch_size, self.hidden_size, **kwargs).to(device)

def forward(self, x):
if self.hidden_cell is None:
Expand Down Expand Up @@ -217,13 +227,19 @@ def __init__(self,
self.lstm = nn.LSTM(cnn2_output_size, self.hidden_size, dropout=0.1)
self.hidden_cell = None
self.linear = nn.Linear(self.hidden_size * 2, 1)
self.seed = None

def init_linear(self, forecast_length):
self.linear = nn.Linear(self.hidden_size * 2, forecast_length)

def init_hidden(self, batch_size, device):
self.hidden_cell = (torch.randn(1, batch_size, self.hidden_size).to(device),
torch.randn(1, batch_size, self.hidden_size).to(device))
kwargs = dict()
if self.seed is not None:
g = torch.Generator()
g.manual_seed(self.seed)
kwargs['generator'] = g
self.hidden_cell = (torch.randn(1, batch_size, self.hidden_size, **kwargs).to(device),
torch.randn(1, batch_size, self.hidden_size, **kwargs).to(device))

def forward(self, x):
if self.hidden_cell is None:
Expand Down
12 changes: 7 additions & 5 deletions fedot/core/operations/evaluation/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

from fedot.core.operations.operation_parameters import OperationParameters
from fedot.utilities.random import ImplementationRandomStateHandler

warnings.filterwarnings("ignore", category=UserWarning)

Expand All @@ -32,7 +33,8 @@ def fit(self, train_data: InputData):

features_list = self._convert_to_one_dim(train_data.features)

self.vectorizer.fit(features_list)
with ImplementationRandomStateHandler(implementation=self.vectorizer):
self.vectorizer.fit(features_list)

return self.vectorizer

Expand Down Expand Up @@ -83,8 +85,8 @@ def fit(self, train_data: InputData):
:param InputData train_data: data used for operation training
:return: trained model
"""

self.text_processor.fit(train_data)
with ImplementationRandomStateHandler(implementation=self.text_processor):
self.text_processor.fit(train_data)
return self.text_processor

def predict(self, trained_operation, predict_data: InputData) -> OutputData:
Expand Down Expand Up @@ -134,8 +136,8 @@ def fit(self, train_data: InputData):
:param train_data: data with features, target and ids to process
"""

self.vectorizer.fit(train_data)
with ImplementationRandomStateHandler(implementation=self.vectorizer):
self.vectorizer.fit(train_data)
return self.vectorizer

def predict(self, trained_operation, predict_data: InputData) -> OutputData:
Expand Down
8 changes: 6 additions & 2 deletions fedot/core/operations/evaluation/time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from fedot.core.operations.evaluation.operation_implementations.models.ts_implementations.poly import \
PolyfitImplementation
from fedot.core.operations.operation_parameters import OperationParameters
from fedot.utilities.random import ImplementationRandomStateHandler

warnings.filterwarnings("ignore", category=UserWarning)

Expand Down Expand Up @@ -58,7 +59,8 @@ def fit(self, train_data: InputData):
warnings.filterwarnings("ignore", category=RuntimeWarning)
model = self.operation(self.params_for_fit)

model.fit(train_data)
with ImplementationRandomStateHandler(implementation=model):
model.fit(train_data)
return model

def predict(self, trained_operation, predict_data: InputData) -> OutputData:
Expand Down Expand Up @@ -126,8 +128,10 @@ def fit(self, train_data: InputData):
:return: trained operation (if it is needed for applying)
"""
warnings.filterwarnings("ignore", category=RuntimeWarning)

transformation_operation = self.operation(self.params_for_fit)
transformation_operation.fit(train_data)
with ImplementationRandomStateHandler(implementation=transformation_operation):
transformation_operation.fit(train_data)
return transformation_operation

def predict(self, trained_operation, predict_data: InputData) -> OutputData:
Expand Down
2 changes: 1 addition & 1 deletion fedot/core/operations/operation.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def _eval_strategy_for_task(operation_type: str, current_task_type: TaskTypesEnu
globally_compatible_task_types = compatible_task_types(current_task_type)
globally_set = set(globally_compatible_task_types)

comp_types_acceptable_for_operation = list(set_acceptable_types.intersection(globally_set))
comp_types_acceptable_for_operation = sorted(list(set_acceptable_types.intersection(globally_set)))
if len(comp_types_acceptable_for_operation) == 0:
raise ValueError(f'Operation {operation_type} can not be used as a part of {current_task_type}.')
current_task_type = comp_types_acceptable_for_operation[0]
Expand Down
4 changes: 2 additions & 2 deletions fedot/core/pipelines/pipeline_node_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def get_node(self,
def _return_node(candidates) -> Optional[OptNode]:
if not candidates:
return None
return OptNode(content={'name': choice(candidates)})
return OptNode(content={'name': choice(sorted(candidates))})

@staticmethod
def filter_specific_candidates(candidates: list):
return list(filter(lambda x: not check_for_specific_operations(x), candidates))
return sorted(list(filter(lambda x: not check_for_specific_operations(x), candidates)))
4 changes: 3 additions & 1 deletion fedot/core/pipelines/tuning/hyperparams.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import random

import numpy as np
from golem.core.log import default_log
from golem.core.tuning.hyperopt_tuner import get_parameter_hyperopt_space
from hyperopt.pyll.stochastic import sample as hp_sample
Expand Down Expand Up @@ -84,7 +85,8 @@ def _random_change(parameter_name, **kwargs):
parameter_name=parameter_name,
label=parameter_name)
# Randomly choose new value
new_value = hp_sample(space)
rng = np.random.default_rng(random.randint(0, np.iinfo(np.int32).max))
new_value = hp_sample(space, rng=rng)
return {parameter_name: new_value}

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion fedot/core/pipelines/verification_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def has_no_data_flow_conflicts_in_ts_pipeline(pipeline: Pipeline):
""" Function checks the correctness of connection between nodes """
task = Task(TaskTypesEnum.ts_forecasting)
ts_models = get_operations_for_task(task=task, mode='model', tags=["non_lagged"])
non_ts_models = list(set(get_operations_for_task(task=task, mode='model')).difference(set(ts_models)))
non_ts_models = sorted(list(set(get_operations_for_task(task=task, mode='model')).difference(set(ts_models))))

# Preprocessing not only for time series
non_ts_data_operations = get_operations_for_task(task=task,
Expand Down
6 changes: 3 additions & 3 deletions fedot/core/repository/pipeline_operation_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def from_available_operations(self, task: Task, preset: str,
available_operations: List[str]):
""" Initialize repository from available operations, task and preset """
operations_by_task_preset = OperationsPreset(task, preset).filter_operations_by_preset()
all_operations = list(set.intersection(set(operations_by_task_preset), set(available_operations)))
all_operations = sorted(list(set.intersection(set(operations_by_task_preset), set(available_operations))))
primary_operations, secondary_operations = \
self.divide_operations(all_operations, task)
self.operations_by_keys = {'primary': primary_operations, 'secondary': secondary_operations}
Expand All @@ -40,7 +40,7 @@ def get_operations(self, is_primary: bool) -> List[str]:

def get_all_operations(self) -> List[str]:
""" Get all pipeline operations with all keys """
return list(itertools.chain(*self.operations_by_keys.values()))
return sorted(list(itertools.chain(*self.operations_by_keys.values())))

@staticmethod
def divide_operations(available_operations, task):
Expand All @@ -61,7 +61,7 @@ def divide_operations(available_operations, task):
ts_primary_operations = ts_data_operations + ts_primary_models

# Filter - remain only operations, which were in available ones
primary_operations = list(set(ts_primary_operations).intersection(available_operations))
primary_operations = sorted(list(set(ts_primary_operations).intersection(available_operations)))
secondary_operations = available_operations
else:
primary_operations = available_operations
Expand Down
5 changes: 1 addition & 4 deletions fedot/utilities/random.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
from typing import Optional, Union

from golem.core.utilities.random import RandomStateHandler

from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import \
DataOperationImplementation, ModelImplementation
from fedot.core.utils import RandomStateHandler


class ImplementationRandomStateHandler(RandomStateHandler):
MODEL_FITTING_SEED = 0

def __init__(self, seed: Optional[int] = None,
implementation: Union[DataOperationImplementation, ModelImplementation] = None):
super().__init__(seed)
Expand Down
Loading

0 comments on commit 09da40c

Please sign in to comment.