Skip to content

Commit

Permalink
Add support for storing the optimization targets and direction of an …
Browse files Browse the repository at this point in the history
…experiment (#628)

This PR is useful for mlos-viz and dabl wrapper (#624) to be able to
automatically graph the results for a given optimization target, for
instance via something like the following:

```python
for opt_target in exp.objectives:
    dabl.plot(exp.results, opt_target)
```

Since the prior efforts on capturing this data in the Trial metadata are
somewhat problematic (allow conflicting changes between runs of an
experiment, don't support multi-objective), we extend them to also store
values directly as a part of the Experiment, which is a somewhat more
appropriate location. Upon retrieval, an attempt is also made to merge
the two data sources for backwards compatibility.

This PR does not enforce strictness on that metadata, but future
versions could (e.g., disallow resuming an Experiment if it looks like
the objective targets have changed. In that case the prior Trial results
can potentially still be used to prewarm a new Experiment's optimizer).

---------

Co-authored-by: Sergiy Matusevych <sergiy.matusevych@gmail.com>
  • Loading branch information
bpkroth and motus authored Jan 16, 2024
1 parent 42a149f commit 221cee3
Show file tree
Hide file tree
Showing 12 changed files with 274 additions and 16 deletions.
9 changes: 9 additions & 0 deletions mlos_bench/mlos_bench/optimizers/base_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ def name(self) -> str:
"""
return self.__class__.__name__

# TODO: Expand these properties for multi-objective.

@property
def is_min(self) -> bool:
"""
Expand All @@ -167,6 +169,13 @@ def target(self) -> str:
"""
return self._opt_target

@property
def direction(self) -> str:
"""
The direction to optimize the target metric (e.g., min or max).
"""
return 'min' if self.is_min else 'max'

@property
def supports_preload(self) -> bool:
"""
Expand Down
9 changes: 7 additions & 2 deletions mlos_bench/mlos_bench/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ def _optimize(*,
trial_id=trial_id,
root_env_config=root_env_config,
description=env.name,
opt_target=opt.target
opt_target=opt.target,
opt_direction=opt.direction,
) as exp:

_LOG.info("Experiment: %s Env: %s Optimizer: %s", exp, env, opt)
Expand Down Expand Up @@ -118,9 +119,13 @@ def _optimize(*,
config_id = -1

trial = exp.new_trial(tunables, config={
# Add some additional metadata to track for the trial such as the
# optimizer config used.
# TODO: Improve for supporting multi-objective
# (e.g., opt_target_1, opt_target_2, ... and opt_direction_1, opt_direction_2, ...)
"optimizer": opt.name,
"opt_target": opt.target,
"opt_direction": "min" if opt.is_min else "max",
"opt_direction": opt.direction,
})
_run(env_context, opt_context, trial, global_config)

Expand Down
13 changes: 13 additions & 0 deletions mlos_bench/mlos_bench/storage/base_experiment_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,19 @@ def root_env_config(self) -> Tuple[str, str, str]:
def __repr__(self) -> str:
return f"Experiment :: {self._exp_id}: '{self._description}'"

@property
@abstractmethod
def objectives(self) -> Dict[str, str]:
"""
Retrieve the experiment's objectives data from the storage.
Returns
-------
objectives : Dict[str, objective]
A dictionary of the experiment's objective names (optimization_targets)
and their directions (e.g., min or max).
"""

@property
@abstractmethod
def trials(self) -> Dict[int, TrialData]:
Expand Down
65 changes: 62 additions & 3 deletions mlos_bench/mlos_bench/storage/base_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ def experiment(self, *,
trial_id: int,
root_env_config: str,
description: str,
opt_target: str) -> 'Storage.Experiment':
opt_target: str,
opt_direction: Optional[str]) -> 'Storage.Experiment':
"""
Create a new experiment in the storage.
Expand All @@ -103,6 +104,8 @@ def experiment(self, *,
Human-readable description of the experiment.
opt_target : str
Name of metric we're optimizing for.
opt_direction: Optional[str]
Direction to optimize the metric (e.g., min or max)
Returns
-------
Expand All @@ -112,15 +115,29 @@ def experiment(self, *,
"""

class Experiment(metaclass=ABCMeta):
# pylint: disable=too-many-instance-attributes
"""
Base interface for storing the results of the experiment.
This class is instantiated in the `Storage.experiment()` method.
"""

def __init__(self, tunables: TunableGroups, experiment_id: str, root_env_config: str):
def __init__(self,
*,
tunables: TunableGroups,
experiment_id: str,
trial_id: int,
root_env_config: str,
description: str,
opt_target: str,
opt_direction: Optional[str]):
self._tunables = tunables.copy()
self._trial_id = trial_id
self._experiment_id = experiment_id
(self._git_repo, self._git_commit, self._root_env_config) = get_git_info(root_env_config)
self._description = description
self._opt_target = opt_target
assert opt_direction in {None, "min", "max"}
self._opt_direction = opt_direction

def __enter__(self) -> 'Storage.Experiment':
"""
Expand Down Expand Up @@ -172,6 +189,31 @@ def _teardown(self, is_ok: bool) -> None:
True if there were no exceptions during the experiment, False otherwise.
"""

@property
def experiment_id(self) -> str:
"""Get the Experiment's ID"""
return self._experiment_id

@property
def trial_id(self) -> int:
"""Get the current Trial ID"""
return self._trial_id

@property
def description(self) -> str:
"""Get the Experiment's description"""
return self._description

@property
def opt_target(self) -> str:
"""Get the Experiment's optimization target"""
return self._opt_target

@property
def opt_direction(self) -> Optional[str]:
"""Get the Experiment's optimization target"""
return self._opt_direction

@abstractmethod
def merge(self, experiment_ids: List[str]) -> None:
"""
Expand Down Expand Up @@ -249,12 +291,15 @@ class Trial(metaclass=ABCMeta):

def __init__(self, *,
tunables: TunableGroups, experiment_id: str, trial_id: int,
config_id: int, opt_target: str, config: Optional[Dict[str, Any]] = None):
config_id: int, opt_target: str, opt_direction: Optional[str],
config: Optional[Dict[str, Any]] = None):
self._tunables = tunables
self._experiment_id = experiment_id
self._trial_id = trial_id
self._config_id = config_id
self._opt_target = opt_target
assert opt_direction in {None, "min", "max"}
self._opt_direction = opt_direction
self._config = config or {}

def __repr__(self) -> str:
Expand All @@ -274,6 +319,20 @@ def config_id(self) -> int:
"""
return self._config_id

@property
def opt_target(self) -> str:
"""
Get the Trial's optimization target.
"""
return self._opt_target

@property
def opt_direction(self) -> Optional[str]:
"""
Get the Trial's optimization direction (e.g., min or max)
"""
return self._opt_direction

@property
def tunables(self) -> TunableGroups:
"""
Expand Down
29 changes: 24 additions & 5 deletions mlos_bench/mlos_bench/storage/sql/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,18 @@ def __init__(self, *,
trial_id: int,
root_env_config: str,
description: str,
opt_target: str):
super().__init__(tunables, experiment_id, root_env_config)
opt_target: str,
opt_direction: Optional[str]):
super().__init__(
tunables=tunables,
experiment_id=experiment_id,
trial_id=trial_id,
root_env_config=root_env_config,
description=description,
opt_target=opt_target,
opt_direction=opt_direction)
self._engine = engine
self._schema = schema
self._trial_id = trial_id
self._description = description
self._opt_target = opt_target

def _setup(self) -> None:
super()._setup()
Expand Down Expand Up @@ -76,11 +81,21 @@ def _setup(self) -> None:
git_commit=self._git_commit,
root_env_config=self._root_env_config,
))
# TODO: Expand for multiple objectives.
conn.execute(self._schema.objectives.insert().values(
exp_id=self._experiment_id,
optimization_target=self._opt_target,
optimization_direction=self._opt_direction,
))
else:
if exp_info.trial_id is not None:
self._trial_id = exp_info.trial_id + 1
_LOG.info("Continue experiment: %s last trial: %s resume from: %d",
self._experiment_id, exp_info.trial_id, self._trial_id)
# TODO: Sanity check that certain critical configs (e.g.,
# objectives) haven't changed to be incompatible such that a new
# experiment should be started (possibly by prewarming with the
# previous one).
if exp_info.git_commit != self._git_commit:
_LOG.warning("Experiment %s git expected: %s %s",
self, exp_info.git_repo, exp_info.git_commit)
Expand Down Expand Up @@ -131,6 +146,8 @@ def load(self, opt_target: Optional[str] = None) -> Tuple[List[dict], List[Optio
self._schema.trial.c.trial_id.asc(),
)
)
# Note: this iterative approach is somewhat expensive.
# TODO: Look into a better bulk fetch option.
for trial in cur_trials.fetchall():
tunables = self._get_params(
conn, self._schema.config_param, config_id=trial.config_id)
Expand Down Expand Up @@ -180,6 +197,7 @@ def pending_trials(self) -> Iterator[Storage.Trial]:
trial_id=trial.trial_id,
config_id=trial.config_id,
opt_target=self._opt_target,
opt_direction=self._opt_direction,
config=config,
)

Expand Down Expand Up @@ -232,6 +250,7 @@ def new_trial(self, tunables: TunableGroups,
trial_id=self._trial_id,
config_id=config_id,
opt_target=self._opt_target,
opt_direction=self._opt_direction,
config=config,
)
self._trial_id += 1
Expand Down
54 changes: 54 additions & 0 deletions mlos_bench/mlos_bench/storage/sql/experiment_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
"""
from typing import Dict

import logging

import pandas
from sqlalchemy import Engine

Expand All @@ -16,6 +18,8 @@
from mlos_bench.storage.base_trial_data import TrialData
from mlos_bench.storage.sql.trial_data import TrialSqlData

_LOG = logging.getLogger(__name__)


class ExperimentSqlData(ExperimentData):
"""
Expand All @@ -34,6 +38,56 @@ def __init__(self, *, engine: Engine, schema: DbSchema, exp_id: str,
self._engine = engine
self._schema = schema

@property
def objectives(self) -> Dict[str, str]:
objectives: Dict[str, str] = {}
# First try to lookup the objectives from the experiment metadata in the storage layer.
if hasattr(self._schema, "objectives"):
with self._engine.connect() as conn:
objectives_db_data = conn.execute(
self._schema.objectives.select().where(
self._schema.objectives.c.exp_id == self._exp_id,
).order_by(
self._schema.objectives.c.weight.desc(),
self._schema.objectives.c.optimization_target.asc(),
)
)
objectives = {
objective.optimization_target: objective.optimization_direction
for objective in objectives_db_data.fetchall()
}
# Backwards compatibility: try and obtain the objectives from the TrialData and merge them in.
# NOTE: The original format of storing opt_target/opt_direction in the Trial
# metadata did not support multi-objectives.
# Nor does it make it easy to detect when a config change caused a switch in
# opt_direction for a given opt_target between run.py executions of an
# Experiment.
# For now, we simply issue a warning about potentially inconsistent data.
for trial in self.trials.values():
trial_objs_df = trial.metadata[
trial.metadata["parameter"].isin(("opt_target", "opt_direction"))
][["parameter", "value"]]
try:
opt_targets = trial_objs_df[trial_objs_df["parameter"] == "opt_target"]
assert len(opt_targets) == 1, \
"Should only be a single opt_target in the metadata params."
opt_target = opt_targets["value"].iloc[0]
except KeyError:
continue
try:
opt_directions = trial_objs_df[trial_objs_df["parameter"] == "opt_direction"]
assert len(opt_directions) <= 1, \
"Should only be a single opt_direction in the metadata params."
opt_direction = opt_directions["value"].iloc[0]
except (KeyError, IndexError):
opt_direction = None
if opt_target not in objectives:
objectives[opt_target] = opt_direction
elif opt_direction != objectives[opt_target]:
_LOG.warning("Experiment %s has multiple trial optimization directions for optimization_target %s=%s",
self, opt_target, objectives[opt_target])
return objectives

@property
def trials(self) -> Dict[int, TrialData]:
with self._engine.connect() as conn:
Expand Down
20 changes: 19 additions & 1 deletion mlos_bench/mlos_bench/storage/sql/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from sqlalchemy import (
Engine, MetaData, Dialect, create_mock_engine,
Table, Column, Sequence, Integer, String, DateTime,
Table, Column, Sequence, Integer, Float, String, DateTime,
PrimaryKeyConstraint, ForeignKeyConstraint, UniqueConstraint,
)

Expand Down Expand Up @@ -62,10 +62,28 @@ def __init__(self, engine: Engine):
Column("root_env_config", String(1024), nullable=False),
Column("git_repo", String(1024), nullable=False),
Column("git_commit", String(40), nullable=False),
Column("optimization_target", String(1024), nullable=True),
Column("optimization_direction", String(10), nullable=True),

PrimaryKeyConstraint("exp_id"),
)

self.objectives = Table(
"objectives",
self._meta,
Column("exp_id"),
Column("optimization_target", String(1024), nullable=False),
Column("optimization_direction", String(4), nullable=False),
# TODO: Note: weight is not fully supported yet as currently
# multi-objective is expected to explore each objective equally.
# Will need to adjust the insert and return values to support this
# eventually.
Column("weight", Float, nullable=True),

PrimaryKeyConstraint("exp_id", "optimization_target"),
ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]),
)

# A workaround for SQLAlchemy issue with autoincrement in DuckDB:
if engine.dialect.name == "duckdb":
seq_config_id = Sequence('seq_config_id')
Expand Down
4 changes: 3 additions & 1 deletion mlos_bench/mlos_bench/storage/sql/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ def experiment(self, *,
trial_id: int,
root_env_config: str,
description: str,
opt_target: str) -> Storage.Experiment:
opt_target: str,
opt_direction: Optional[str]) -> Storage.Experiment:
return Experiment(
engine=self._engine,
schema=self._schema,
Expand All @@ -72,6 +73,7 @@ def experiment(self, *,
root_env_config=root_env_config,
description=description,
opt_target=opt_target,
opt_direction=opt_direction,
)

@property
Expand Down
Loading

0 comments on commit 221cee3

Please sign in to comment.