Fix multiobj tuning (#1156)

* Fix tuner builder for multi objective tuning * Correct docs * Fix tests * Fix PEP8
aimclub · Aug 24, 2023 · cebd493 · cebd493
1 parent 97f31db
commit cebd493
Show file tree

Hide file tree

Showing 3 changed files with 214 additions and 65 deletions.
diff --git a/docs/source/advanced/hyperparameters_tuning.rst b/docs/source/advanced/hyperparameters_tuning.rst
@@ -1,8 +1,9 @@
+
 Tuning of Hyperparameters
 =========================
 To tune pipeline hyperparameters you can use GOLEM. There are two ways:
 
-1. Tuning of all models hyperparameters simultaneously. Implemented via ``SimultaneousTuner`` and ``IOptTuner`` classes.
+1. Tuning of all models hyperparameters simultaneously. Implemented via ``SimultaneousTuner``, ``OptunaTuner`` and ``IOptTuner`` classes.
 
 2. Tuning of models hyperparameters sequentially node by node optimizing metric value for the whole pipeline or tuning
    only one node hyperparametrs. Implemented via ``SequentialTuner`` class.
@@ -16,22 +17,25 @@ using ``SimultaneousTuner`` is applied for composed pipeline and ``metric`` valu
 FEDOT uses tuners implementation from GOLEM, see `GOLEM documentation`_ for more information.
 
 .. list-table:: Tuners comparison
-   :widths: 10 30 30 30
+   :widths: 10 30 30 30 30
    :header-rows: 1
 
    * -
      - ``SimultaneousTuner``
      - ``SequentialTuner``
      - ``IOptTuner``
+     - ``OptunaTuner``
    * - Based on
      - Hyperopt
      - Hyperopt
      - iOpt
+     - Optuna
    * - Type of tuning
      - Simultaneous
      - | Sequential or
        | for one node only
      - Simultaneous
+     - Simultaneous
    * - | Optimized
        | parameters
      - | categorical
@@ -42,10 +46,14 @@ FEDOT uses tuners implementation from GOLEM, see `GOLEM documentation`_ for more
        | continuous
      - | discrete
        | continuous
+     - | categorical
+       | discrete
+       | continuous
    * - Algorithm type
      - stochastic
      - stochastic
      - deterministic
+     - stochastic
    * - | Supported
        | constraints
      - | timeout
@@ -58,11 +66,22 @@ FEDOT uses tuners implementation from GOLEM, see `GOLEM documentation`_ for more
        | eval_time_constraint
      - | iterations
        | eval_time_constraint
+     - | timeout
+       | iterations
+       | early_stopping_rounds
+       | eval_time_constraint
    * - | Supports initial
        | point
      - Yes
      - No
      - No
+     - Yes
+   * - | Supports multi
+       | objective tuning
+     - No
+     - No
+     - No
+     - Yes
 
 Hyperopt based tuners usually take less time for one iteration, but ``IOptTuner`` is able to obtain much more stable results.
 
@@ -488,7 +507,91 @@ Tuned pipeline structure:
     {'depth': 2, 'length': 3, 'nodes': [knnreg, knnreg, rfr]}
     knnreg - {'n_neighbors': 51}
     knnreg - {'n_neighbors': 40}
-    rfr - {'n_jobs': 1, 'max_features': 0.05324707031250003, 'min_samples_split': 12, 'min_samples_leaf': 11}
+    rfr - {'n_jobs': 1, 'max_features': 0.05324, 'min_samples_split': 12, 'min_samples_leaf': 11}
+
+Example for ``OptunaTuner``:
+
+.. code-block:: python
+
+    from golem.core.tuning.optuna_tuner import OptunaTuner
+    from fedot.core.data.data import InputData
+    from fedot.core.pipelines.pipeline_builder import PipelineBuilder
+    from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
+    from fedot.core.repository.quality_metrics_repository import RegressionMetricsEnum
+    from fedot.core.repository.tasks import TaskTypesEnum, Task
+
+    task = Task(TaskTypesEnum.regression)
+
+    tuner = OptunaTuner
+
+    metric = RegressionMetricsEnum.MSE
+
+    iterations = 100
+
+    train_data = InputData.from_csv('train_data.csv', task='regression')
+
+    pipeline = PipelineBuilder().add_node('knnreg', branch_idx=0).add_branch('rfr', branch_idx=1) \
+        .join_branches('knnreg').build()
+
+    pipeline_tuner = TunerBuilder(task) \
+        .with_tuner(tuner) \
+        .with_metric(metric) \
+        .with_iterations(iterations) \
+        .build(train_data)
+
+    tuned_pipeline = pipeline_tuner.tune(pipeline)
+
+    tuned_pipeline.print_structure()
+
+Tuned pipeline structure:
+
+.. code-block:: python
+
+    Pipeline structure:
+    {'depth': 2, 'length': 3, 'nodes': [knnreg, knnreg, rfr]}
+    knnreg - {'n_neighbors': 51}
+    knnreg - {'n_neighbors': 40}
+    rfr - {'n_jobs': 1, 'max_features': 0.05, 'min_samples_split': 12, 'min_samples_leaf': 11}
+
+
+Multi objective tuning
+^^^^^^^^^^^^^^^^^^^^^^
+
+Multi objective tuning is available only for ``OptunaTuner``. Pass a list of metrics to ``.with_metric()``
+and obtain a list of tuned pipelines representing a pareto front after tuning.
+
+.. code-block:: python
+
+    from typing import Iterable
+    from golem.core.tuning.optuna_tuner import OptunaTuner
+    from fedot.core.data.data import InputData
+    from fedot.core.pipelines.pipeline import Pipeline
+    from fedot.core.pipelines.pipeline_builder import PipelineBuilder
+    from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
+    from fedot.core.repository.quality_metrics_repository import RegressionMetricsEnum
+    from fedot.core.repository.tasks import TaskTypesEnum, Task
+
+    task = Task(TaskTypesEnum.regression)
+
+    tuner = OptunaTuner
+
+    metric = [RegressionMetricsEnum.MSE, RegressionMetricsEnum.MAE]
+
+    iterations = 100
+
+    train_data = InputData.from_csv('train_data.csv', task='regression')
+
+    pipeline = PipelineBuilder().add_node('knnreg', branch_idx=0).add_branch('rfr', branch_idx=1) \
+        .join_branches('knnreg').build()
+
+    pipeline_tuner = TunerBuilder(task) \
+        .with_tuner(tuner) \
+        .with_metric(metric) \
+        .with_iterations(iterations) \
+        .build(train_data)
+
+    pareto_front: Iterable[Pipeline] = pipeline_tuner.tune(pipeline)
+
 
 Sequential tuning
 -----------------

diff --git a/fedot/core/pipelines/tuning/tuner_builder.py b/fedot/core/pipelines/tuning/tuner_builder.py
@@ -1,8 +1,10 @@
 from datetime import timedelta
-from typing import Type, Union
+from typing import Type, Union, Iterable, Sequence
 
+from golem.core.tuning.optuna_tuner import OptunaTuner
 from golem.core.tuning.simultaneous import SimultaneousTuner
 from golem.core.tuning.tuner_interface import BaseTuner
+from golem.core.utilities.data_structures import ensure_wrapped_in_sequence
 
 from fedot.core.constants import DEFAULT_TUNING_ITERATIONS_NUMBER
 from fedot.core.data.data import InputData
@@ -23,7 +25,7 @@ def __init__(self, task: Task):
         self.cv_folds = None
         self.validation_blocks = None
         self.n_jobs = -1
-        self.metric: MetricsEnum = MetricByTask.get_default_quality_metrics(task.task_type)[0]
+        self.metric: Sequence[MetricsEnum] = MetricByTask.get_default_quality_metrics(task.task_type)
         self.iterations = DEFAULT_TUNING_ITERATIONS_NUMBER
         self.early_stopping_rounds = None
         self.timeout = timedelta(minutes=5)
@@ -53,8 +55,8 @@ def with_n_jobs(self, n_jobs: int):
         self.n_jobs = n_jobs
         return self
 
-    def with_metric(self, metric: MetricType):
-        self.metric = metric
+    def with_metric(self, metrics: Union[MetricType, Iterable[MetricType]]):
+        self.metric = ensure_wrapped_in_sequence(metrics)
         return self
 
     def with_iterations(self, iterations: int):
@@ -88,11 +90,16 @@ def with_adapter(self, adapter):
         return self
 
     def with_additional_params(self, **parameters):
-        self.additional_params = parameters
+        self.additional_params.update(parameters)
         return self
 
     def build(self, data: InputData) -> BaseTuner:
-        objective = MetricsObjective(self.metric)
+        if len(self.metric) > 1:
+            if self.tuner_class is OptunaTuner:
+                self.additional_params.update({'objectives_number': len(self.metric)})
+            else:
+                raise ValueError('Multi objective tuning applicable only for OptunaTuner.')
+        objective = MetricsObjective(self.metric, is_multi_objective=len(self.metric) > 1)
         data_splitter = DataSourceSplitter(self.cv_folds, validation_blocks=self.validation_blocks)
         data_producer = data_splitter.build(data)
         objective_evaluate = PipelineObjectiveEvaluate(objective, data_producer,