microsoft · bpkroth · Jan 16, 2024 · Jan 10, 2024 · Jan 11, 2024 · Jan 11, 2024
@@ -153,6 +153,8 @@ def name(self) -> str:
         """
         return self.__class__.__name__
 
+    # TODO: Expand these properties for multi-objective.
+
     @property
     def is_min(self) -> bool:
         """
@@ -167,6 +169,13 @@ def target(self) -> str:
         """
         return self._opt_target
 
+    @property
+    def direction(self) -> str:
+        """
+        The direction to optimize the target metric (e.g., min or max).
+        """
+        return 'min' if self.is_min else 'max'
+
     @property
     def supports_preload(self) -> bool:
         """

@@ -86,7 +86,8 @@ def _optimize(*,
             trial_id=trial_id,
             root_env_config=root_env_config,
             description=env.name,
-            opt_target=opt.target
+            opt_target=opt.target,
+            opt_direction=opt.direction,
          ) as exp:
 
         _LOG.info("Experiment: %s Env: %s Optimizer: %s", exp, env, opt)
@@ -118,9 +119,13 @@ def _optimize(*,
                 config_id = -1
 
             trial = exp.new_trial(tunables, config={
+                # Add some additional metadata to track for the trial such as the
+                # optimizer config used.
+                # TODO: Improve for supporting multi-objective
+                # (e.g., opt_target_1, opt_target_2, ...)
                 "optimizer": opt.name,
                 "opt_target": opt.target,
-                "opt_direction": "min" if opt.is_min else "max",
+                "opt_direction": opt.direction,
             })
             _run(env_context, opt_context, trial, global_config)
 

@@ -56,6 +56,19 @@ def root_env_config(self) -> Tuple[str, str, str]:
     def __repr__(self) -> str:
         return f"Experiment :: {self._exp_id}: '{self._description}'"
 
+    @property
+    @abstractmethod
+    def objectives(self) -> Dict[str, str]:
+        """
+        Retrieve the experiment's objectives data from the storage.
+
+        Returns
+        -------
+        objectives : Dict[str, objectives]
+            A dictionary of the experiment's objective names (optimization_targets)
+            and their directions (e.g., min or max).
+        """
+
     @property
     @abstractmethod
     def trials(self) -> Dict[int, TrialData]:

@@ -83,7 +83,8 @@ def experiment(self, *,
                    trial_id: int,
                    root_env_config: str,
                    description: str,
-                   opt_target: str) -> 'Storage.Experiment':
+                   opt_target: str,
+                   opt_direction: str) -> 'Storage.Experiment':
         """
         Create a new experiment in the storage.
 
@@ -103,6 +104,8 @@ def experiment(self, *,
             Human-readable description of the experiment.
         opt_target : str
             Name of metric we're optimizing for.
+        opt_direction: str
+            Direction to optimize the metric (e.g., min or max)
 
         Returns
         -------
@@ -112,15 +115,28 @@ def experiment(self, *,
         """
 
     class Experiment(metaclass=ABCMeta):
+        # pylint: disable=too-many-instance-attributes
         """
         Base interface for storing the results of the experiment.
         This class is instantiated in the `Storage.experiment()` method.
         """
 
-        def __init__(self, tunables: TunableGroups, experiment_id: str, root_env_config: str):
+        def __init__(self,
+                     *,
+                     tunables: TunableGroups,
+                     experiment_id: str,
+                     trial_id: int,
+                     root_env_config: str,
+                     description: str,
+                     opt_target: str,
+                     opt_direction: str):
             self._tunables = tunables.copy()
+            self._trial_id = trial_id
             self._experiment_id = experiment_id
             (self._git_repo, self._git_commit, self._root_env_config) = get_git_info(root_env_config)
+            self._description = description
+            self._opt_target = opt_target
+            self._opt_direction = opt_direction
 
         def __enter__(self) -> 'Storage.Experiment':
             """
@@ -172,6 +188,31 @@ def _teardown(self, is_ok: bool) -> None:
                 True if there were no exceptions during the experiment, False otherwise.
             """
 
+        @property
+        def experiment_id(self) -> str:
+            """Get the Experiment's ID"""
+            return self._experiment_id
+
+        @property
+        def trial_id(self) -> int:
+            """Get the current Trial ID"""
+            return self._trial_id
+
+        @property
+        def description(self) -> str:
+            """Get the Experiment's description"""
+            return self._description
+
+        @property
+        def opt_target(self) -> str:
+            """Get the Experiment's optimization target"""
+            return self._opt_target
+
+        @property
+        def opt_direction(self) -> str:
+            """Get the Experiment's optimization target"""
+            return self._opt_direction
+
         @abstractmethod
         def merge(self, experiment_ids: List[str]) -> None:
             """
@@ -249,12 +290,15 @@ class Trial(metaclass=ABCMeta):
 
         def __init__(self, *,
                      tunables: TunableGroups, experiment_id: str, trial_id: int,
-                     config_id: int, opt_target: str, config: Optional[Dict[str, Any]] = None):
+                     config_id: int, opt_target: str, opt_direction: str,
+                     config: Optional[Dict[str, Any]] = None):
             self._tunables = tunables
             self._experiment_id = experiment_id
             self._trial_id = trial_id
             self._config_id = config_id
             self._opt_target = opt_target
+            assert opt_direction in {"min", "max"}
+            self._opt_direction = opt_direction
             self._config = config or {}
 
         def __repr__(self) -> str:
@@ -274,6 +318,20 @@ def config_id(self) -> int:
             """
             return self._config_id
 
+        @property
+        def opt_target(self) -> str:
+            """
+            Get the Trial's optimization target.
+            """
+            return self._opt_target
+
+        @property
+        def opt_direction(self) -> str:
+            """
+            Get the Trial's optimization target.
+            """
+            return self._opt_target
+
         @property
         def tunables(self) -> TunableGroups:
             """

@@ -35,13 +35,18 @@ def __init__(self, *,
                  trial_id: int,
                  root_env_config: str,
                  description: str,
-                 opt_target: str):
-        super().__init__(tunables, experiment_id, root_env_config)
+                 opt_target: str,
+                 opt_direction: str):
+        super().__init__(
+            tunables=tunables,
+            experiment_id=experiment_id,
+            trial_id=trial_id,
+            root_env_config=root_env_config,
+            description=description,
+            opt_target=opt_target,
+            opt_direction=opt_direction)
         self._engine = engine
         self._schema = schema
-        self._trial_id = trial_id
-        self._description = description
-        self._opt_target = opt_target
 
     def _setup(self) -> None:
         super()._setup()
@@ -76,11 +81,21 @@ def _setup(self) -> None:
                     git_commit=self._git_commit,
                     root_env_config=self._root_env_config,
                 ))
+                # TODO: Expand for multiple objectives.
+                conn.execute(self._schema.objectives.insert().values(
+                    exp_id=self._experiment_id,
+                    optimization_target=self._opt_target,
+                    optimization_direction=self._opt_direction,
+                ))
             else:
                 if exp_info.trial_id is not None:
                     self._trial_id = exp_info.trial_id + 1
                 _LOG.info("Continue experiment: %s last trial: %s resume from: %d",
                           self._experiment_id, exp_info.trial_id, self._trial_id)
+                # TODO: Sanity check that certain critical configs (e.g.,
+                # objectives) haven't changed to be incompatible such that a new
+                # experiment should be started (possibly by prewarming with the
+                # previous one).
                 if exp_info.git_commit != self._git_commit:
                     _LOG.warning("Experiment %s git expected: %s %s",
                                  self, exp_info.git_repo, exp_info.git_commit)
@@ -131,6 +146,8 @@ def load(self, opt_target: Optional[str] = None) -> Tuple[List[dict], List[Optio
                     self._schema.trial.c.trial_id.asc(),
                 )
             )
+            # Note: this iterative approach is somewhat expensive.
+            # TODO: Look into a better bulk fetch option.
             for trial in cur_trials.fetchall():
                 tunables = self._get_params(
                     conn, self._schema.config_param, config_id=trial.config_id)
@@ -180,6 +197,7 @@ def pending_trials(self) -> Iterator[Storage.Trial]:
                     trial_id=trial.trial_id,
                     config_id=trial.config_id,
                     opt_target=self._opt_target,
+                    opt_direction=self._opt_direction,
                     config=config,
                 )
 
@@ -232,6 +250,7 @@ def new_trial(self, tunables: TunableGroups,
                     trial_id=self._trial_id,
                     config_id=config_id,
                     opt_target=self._opt_target,
+                    opt_direction=self._opt_direction,
                     config=config,
                 )
                 self._trial_id += 1

@@ -7,6 +7,8 @@
 """
 from typing import Dict
 
+import logging
+
 import pandas
 from sqlalchemy import Engine
 
@@ -16,6 +18,8 @@
 from mlos_bench.storage.base_trial_data import TrialData
 from mlos_bench.storage.sql.trial_data import TrialSqlData
 
+_LOG = logging.getLogger(__name__)
+
 
 class ExperimentSqlData(ExperimentData):
     """
@@ -34,6 +38,55 @@ def __init__(self, *, engine: Engine, schema: DbSchema, exp_id: str,
         self._engine = engine
         self._schema = schema
 
+    @property
+    def objectives(self) -> Dict[str, str]:
+        objectives: Dict[str, str] = {}
+        # First try to lookup the objectives from the experiment metadata in the storage layer.
+        if hasattr(self._schema, "objectives"):
+            with self._engine.connect() as conn:
+                objectives_db_data = conn.execute(
+                    self._schema.objectives.select().where(
+                        self._schema.objectives.c.exp_id == self._exp_id,
+                    ).order_by(
+                        self._schema.objectives.c.optimization_target.asc(),
+                    )
+                )
+                objectives = {
+                    objective.optimization_target: objective.optimization_direction
+                    for objective in objectives_db_data.fetchall()
+                }
+        # Backwards compatibility: try and obtain the objectives from the TrialData and merge them in.
+        # NOTE: The original format of storing opt_target/opt_direction in the Trial
+        # metadata did not support multi-objectives.
+        # Nor does it make it easy to detect when a config change caused a switch in
+        # opt_direction for a given opt_target between run.py executions of an
+        # Experiment.
+        # For now, we simply issue a warning about potentially inconsistent data.
+        for trial in self.trials.values():
+            trial_objs_df = trial.metadata[
+                trial.metadata["parameter"].isin(("opt_target", "opt_direction"))
+            ][["parameter", "value"]]
+            try:
+                opt_targets = trial_objs_df[trial_objs_df["parameter"] == "opt_target"]
+                assert len(opt_targets) == 1, \
+                    "Should only be a single opt_target in the metadata params."
+                opt_target = opt_targets["value"].iloc[0]
+            except KeyError:
+                continue
+            try:
+                opt_directions = trial_objs_df[trial_objs_df["parameter"] == "opt_direction"]
+                assert len(opt_directions) <= 1, \
+                    "Should only be a single opt_direction in the metadata params."
+                opt_direction = opt_directions["value"].iloc[0]
+            except (KeyError, IndexError):
+                opt_direction = None
+            if opt_target not in objectives:
+                objectives[opt_target] = opt_direction
+            elif opt_direction != objectives[opt_target]:
+                _LOG.warning("Experiment %s has multiple trial optimization directions for optimization_target %s=%s",
+                             self, opt_target, objectives[opt_target])
+        return objectives
+
     @property
     def trials(self) -> Dict[int, TrialData]:
         with self._engine.connect() as conn:

@@ -62,10 +62,23 @@ def __init__(self, engine: Engine):
             Column("root_env_config", String(1024), nullable=False),
             Column("git_repo", String(1024), nullable=False),
             Column("git_commit", String(40), nullable=False),
+            Column("optimization_target", String(1024), nullable=True),
+            Column("optimization_direction", String(10), nullable=True),
 
             PrimaryKeyConstraint("exp_id"),
         )
 
+        self.objectives = Table(
+            "objectives",
+            self._meta,
+            Column("exp_id"),
+            Column("optimization_target", String(1024), nullable=False),
+            Column("optimization_direction", String(4), nullable=False),
+
+            PrimaryKeyConstraint("exp_id", "optimization_target"),
+            ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]),
+        )
+
         # A workaround for SQLAlchemy issue with autoincrement in DuckDB:
         if engine.dialect.name == "duckdb":
             seq_config_id = Sequence('seq_config_id')

@@ -62,7 +62,8 @@ def experiment(self, *,
                    trial_id: int,
                    root_env_config: str,
                    description: str,
-                   opt_target: str) -> Storage.Experiment:
+                   opt_target: str,
+                   opt_direction: str) -> Storage.Experiment:
         return Experiment(
             engine=self._engine,
             schema=self._schema,
@@ -72,6 +73,7 @@ def experiment(self, *,
             root_env_config=root_env_config,
             description=description,
             opt_target=opt_target,
+            opt_direction=opt_direction,
         )
 
     @property

@@ -29,13 +29,15 @@ class Trial(Storage.Trial):
     def __init__(self, *,
                  engine: Engine, schema: DbSchema, tunables: TunableGroups,
                  experiment_id: str, trial_id: int, config_id: int,
-                 opt_target: str, config: Optional[Dict[str, Any]] = None):
+                 opt_target: str, opt_direction: str,
+                 config: Optional[Dict[str, Any]] = None):
         super().__init__(
             tunables=tunables,
             experiment_id=experiment_id,
             trial_id=trial_id,
             config_id=config_id,
             opt_target=opt_target,
+            opt_direction=opt_direction,
             config=config,
         )
         self._engine = engine