Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for storing the optimization targets and direction of an experiment #628

Merged
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
fdcc3e3
Add support for storing the optimization target and direction of an e…
bpkroth Jan 10, 2024
cf69674
Rename and improve tunable vs config API and documentation
bpkroth Jan 11, 2024
5cb4258
comments
bpkroth Jan 11, 2024
a0c21ca
data checking
bpkroth Jan 11, 2024
cfd3e4e
actually do the insert, and add some todo comments
bpkroth Jan 11, 2024
7536d59
consolidate logic
bpkroth Jan 11, 2024
b14e710
wip: add a fallback
bpkroth Jan 11, 2024
3498ff2
pylint
bpkroth Jan 11, 2024
4fdf957
TODOs
bpkroth Jan 11, 2024
eeea732
fixups
bpkroth Jan 11, 2024
c26ba06
Merge branch 'main' into store-and-expose-optimization-target-info
bpkroth Jan 11, 2024
6e49010
add objective info to storage schema
bpkroth Jan 10, 2024
687501f
todo comments
bpkroth Jan 11, 2024
aa9e545
stubs for tests
bpkroth Jan 11, 2024
814a0dd
fixup
bpkroth Jan 11, 2024
a2673da
move some attrs to the base class
bpkroth Jan 11, 2024
fdf64d2
fixups
bpkroth Jan 11, 2024
d77a806
basic test
bpkroth Jan 11, 2024
c15123b
add some more test handling
bpkroth Jan 11, 2024
5ffcd67
reorg
bpkroth Jan 11, 2024
d6ba89e
Update mlos_bench/mlos_bench/run.py
bpkroth Jan 16, 2024
8683aad
making opt_direction optional
bpkroth Jan 16, 2024
8d12b72
adding todo comments and stubbing out weighted multi-objective support
bpkroth Jan 16, 2024
bd5a2a6
Merge branch 'main' into store-and-expose-optimization-target-info
bpkroth Jan 16, 2024
667119a
make optimization direction non-nullable
bpkroth Jan 16, 2024
d4d8f50
Update mlos_bench/mlos_bench/storage/base_experiment_data.py
bpkroth Jan 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions mlos_bench/mlos_bench/optimizers/base_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ def name(self) -> str:
"""
return self.__class__.__name__

# TODO: Expand these properties for multi-objective.

@property
def is_min(self) -> bool:
"""
Expand All @@ -167,6 +169,13 @@ def target(self) -> str:
"""
return self._opt_target

@property
def direction(self) -> str:
"""
The direction to optimize the target metric (e.g., min or max).
"""
return 'min' if self.is_min else 'max'

@property
def supports_preload(self) -> bool:
"""
Expand Down
9 changes: 7 additions & 2 deletions mlos_bench/mlos_bench/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ def _optimize(*,
trial_id=trial_id,
root_env_config=root_env_config,
description=env.name,
opt_target=opt.target
opt_target=opt.target,
opt_direction=opt.direction,
) as exp:

_LOG.info("Experiment: %s Env: %s Optimizer: %s", exp, env, opt)
Expand Down Expand Up @@ -118,9 +119,13 @@ def _optimize(*,
config_id = -1

trial = exp.new_trial(tunables, config={
# Add some additional metadata to track for the trial such as the
# optimizer config used.
# TODO: Improve for supporting multi-objective
# (e.g., opt_target_1, opt_target_2, ...)
bpkroth marked this conversation as resolved.
Show resolved Hide resolved
"optimizer": opt.name,
"opt_target": opt.target,
"opt_direction": "min" if opt.is_min else "max",
"opt_direction": opt.direction,
})
_run(env_context, opt_context, trial, global_config)

Expand Down
13 changes: 13 additions & 0 deletions mlos_bench/mlos_bench/storage/base_experiment_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,19 @@ def root_env_config(self) -> Tuple[str, str, str]:
def __repr__(self) -> str:
return f"Experiment :: {self._exp_id}: '{self._description}'"

@property
@abstractmethod
def objectives(self) -> Dict[str, str]:
"""
Retrieve the experiment's objectives data from the storage.

Returns
-------
objectives : Dict[str, objectives]
bpkroth marked this conversation as resolved.
Show resolved Hide resolved
A dictionary of the experiment's objective names (optimization_targets)
and their directions (e.g., min or max).
"""

@property
@abstractmethod
def trials(self) -> Dict[int, TrialData]:
Expand Down
64 changes: 61 additions & 3 deletions mlos_bench/mlos_bench/storage/base_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ def experiment(self, *,
trial_id: int,
root_env_config: str,
description: str,
opt_target: str) -> 'Storage.Experiment':
opt_target: str,
opt_direction: str) -> 'Storage.Experiment':
bpkroth marked this conversation as resolved.
Show resolved Hide resolved
"""
Create a new experiment in the storage.

Expand All @@ -103,6 +104,8 @@ def experiment(self, *,
Human-readable description of the experiment.
opt_target : str
Name of metric we're optimizing for.
opt_direction: str
Direction to optimize the metric (e.g., min or max)

Returns
-------
Expand All @@ -112,15 +115,28 @@ def experiment(self, *,
"""

class Experiment(metaclass=ABCMeta):
# pylint: disable=too-many-instance-attributes
"""
Base interface for storing the results of the experiment.
This class is instantiated in the `Storage.experiment()` method.
"""

def __init__(self, tunables: TunableGroups, experiment_id: str, root_env_config: str):
def __init__(self,
*,
tunables: TunableGroups,
experiment_id: str,
trial_id: int,
root_env_config: str,
description: str,
opt_target: str,
opt_direction: str):
bpkroth marked this conversation as resolved.
Show resolved Hide resolved
self._tunables = tunables.copy()
self._trial_id = trial_id
self._experiment_id = experiment_id
(self._git_repo, self._git_commit, self._root_env_config) = get_git_info(root_env_config)
self._description = description
self._opt_target = opt_target
self._opt_direction = opt_direction

def __enter__(self) -> 'Storage.Experiment':
"""
Expand Down Expand Up @@ -172,6 +188,31 @@ def _teardown(self, is_ok: bool) -> None:
True if there were no exceptions during the experiment, False otherwise.
"""

@property
def experiment_id(self) -> str:
"""Get the Experiment's ID"""
return self._experiment_id

@property
def trial_id(self) -> int:
"""Get the current Trial ID"""
return self._trial_id

@property
def description(self) -> str:
"""Get the Experiment's description"""
return self._description

@property
def opt_target(self) -> str:
"""Get the Experiment's optimization target"""
return self._opt_target

@property
def opt_direction(self) -> str:
"""Get the Experiment's optimization target"""
return self._opt_direction

@abstractmethod
def merge(self, experiment_ids: List[str]) -> None:
"""
Expand Down Expand Up @@ -249,12 +290,15 @@ class Trial(metaclass=ABCMeta):

def __init__(self, *,
tunables: TunableGroups, experiment_id: str, trial_id: int,
config_id: int, opt_target: str, config: Optional[Dict[str, Any]] = None):
config_id: int, opt_target: str, opt_direction: str,
config: Optional[Dict[str, Any]] = None):
self._tunables = tunables
self._experiment_id = experiment_id
self._trial_id = trial_id
self._config_id = config_id
self._opt_target = opt_target
assert opt_direction in {"min", "max"}
self._opt_direction = opt_direction
self._config = config or {}

def __repr__(self) -> str:
Expand All @@ -274,6 +318,20 @@ def config_id(self) -> int:
"""
return self._config_id

@property
def opt_target(self) -> str:
"""
Get the Trial's optimization target.
"""
return self._opt_target

@property
def opt_direction(self) -> str:
"""
Get the Trial's optimization target.
"""
return self._opt_target

@property
def tunables(self) -> TunableGroups:
"""
Expand Down
29 changes: 24 additions & 5 deletions mlos_bench/mlos_bench/storage/sql/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,18 @@ def __init__(self, *,
trial_id: int,
root_env_config: str,
description: str,
opt_target: str):
super().__init__(tunables, experiment_id, root_env_config)
opt_target: str,
opt_direction: str):
super().__init__(
tunables=tunables,
experiment_id=experiment_id,
trial_id=trial_id,
root_env_config=root_env_config,
description=description,
opt_target=opt_target,
opt_direction=opt_direction)
self._engine = engine
self._schema = schema
self._trial_id = trial_id
self._description = description
self._opt_target = opt_target

def _setup(self) -> None:
super()._setup()
Expand Down Expand Up @@ -76,11 +81,21 @@ def _setup(self) -> None:
git_commit=self._git_commit,
root_env_config=self._root_env_config,
))
# TODO: Expand for multiple objectives.
conn.execute(self._schema.objectives.insert().values(
exp_id=self._experiment_id,
optimization_target=self._opt_target,
optimization_direction=self._opt_direction,
))
else:
if exp_info.trial_id is not None:
self._trial_id = exp_info.trial_id + 1
_LOG.info("Continue experiment: %s last trial: %s resume from: %d",
self._experiment_id, exp_info.trial_id, self._trial_id)
# TODO: Sanity check that certain critical configs (e.g.,
# objectives) haven't changed to be incompatible such that a new
# experiment should be started (possibly by prewarming with the
# previous one).
if exp_info.git_commit != self._git_commit:
_LOG.warning("Experiment %s git expected: %s %s",
self, exp_info.git_repo, exp_info.git_commit)
Expand Down Expand Up @@ -131,6 +146,8 @@ def load(self, opt_target: Optional[str] = None) -> Tuple[List[dict], List[Optio
self._schema.trial.c.trial_id.asc(),
)
)
# Note: this iterative approach is somewhat expensive.
# TODO: Look into a better bulk fetch option.
for trial in cur_trials.fetchall():
tunables = self._get_params(
conn, self._schema.config_param, config_id=trial.config_id)
Expand Down Expand Up @@ -180,6 +197,7 @@ def pending_trials(self) -> Iterator[Storage.Trial]:
trial_id=trial.trial_id,
config_id=trial.config_id,
opt_target=self._opt_target,
opt_direction=self._opt_direction,
config=config,
)

Expand Down Expand Up @@ -232,6 +250,7 @@ def new_trial(self, tunables: TunableGroups,
trial_id=self._trial_id,
config_id=config_id,
opt_target=self._opt_target,
opt_direction=self._opt_direction,
config=config,
)
self._trial_id += 1
Expand Down
53 changes: 53 additions & 0 deletions mlos_bench/mlos_bench/storage/sql/experiment_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
"""
from typing import Dict

import logging

import pandas
from sqlalchemy import Engine

Expand All @@ -16,6 +18,8 @@
from mlos_bench.storage.base_trial_data import TrialData
from mlos_bench.storage.sql.trial_data import TrialSqlData

_LOG = logging.getLogger(__name__)


class ExperimentSqlData(ExperimentData):
"""
Expand All @@ -34,6 +38,55 @@ def __init__(self, *, engine: Engine, schema: DbSchema, exp_id: str,
self._engine = engine
self._schema = schema

@property
def objectives(self) -> Dict[str, str]:
objectives: Dict[str, str] = {}
# First try to lookup the objectives from the experiment metadata in the storage layer.
if hasattr(self._schema, "objectives"):
with self._engine.connect() as conn:
objectives_db_data = conn.execute(
self._schema.objectives.select().where(
self._schema.objectives.c.exp_id == self._exp_id,
).order_by(
self._schema.objectives.c.optimization_target.asc(),
bpkroth marked this conversation as resolved.
Show resolved Hide resolved
)
)
objectives = {
objective.optimization_target: objective.optimization_direction
for objective in objectives_db_data.fetchall()
}
# Backwards compatibility: try and obtain the objectives from the TrialData and merge them in.
# NOTE: The original format of storing opt_target/opt_direction in the Trial
# metadata did not support multi-objectives.
# Nor does it make it easy to detect when a config change caused a switch in
# opt_direction for a given opt_target between run.py executions of an
# Experiment.
# For now, we simply issue a warning about potentially inconsistent data.
for trial in self.trials.values():
trial_objs_df = trial.metadata[
trial.metadata["parameter"].isin(("opt_target", "opt_direction"))
][["parameter", "value"]]
try:
opt_targets = trial_objs_df[trial_objs_df["parameter"] == "opt_target"]
assert len(opt_targets) == 1, \
"Should only be a single opt_target in the metadata params."
opt_target = opt_targets["value"].iloc[0]
except KeyError:
continue
try:
opt_directions = trial_objs_df[trial_objs_df["parameter"] == "opt_direction"]
assert len(opt_directions) <= 1, \
"Should only be a single opt_direction in the metadata params."
opt_direction = opt_directions["value"].iloc[0]
except (KeyError, IndexError):
opt_direction = None
if opt_target not in objectives:
objectives[opt_target] = opt_direction
elif opt_direction != objectives[opt_target]:
_LOG.warning("Experiment %s has multiple trial optimization directions for optimization_target %s=%s",
self, opt_target, objectives[opt_target])
return objectives

@property
def trials(self) -> Dict[int, TrialData]:
with self._engine.connect() as conn:
Expand Down
13 changes: 13 additions & 0 deletions mlos_bench/mlos_bench/storage/sql/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,23 @@ def __init__(self, engine: Engine):
Column("root_env_config", String(1024), nullable=False),
Column("git_repo", String(1024), nullable=False),
Column("git_commit", String(40), nullable=False),
Column("optimization_target", String(1024), nullable=True),
Column("optimization_direction", String(10), nullable=True),

PrimaryKeyConstraint("exp_id"),
)

self.objectives = Table(
"objectives",
self._meta,
Column("exp_id"),
Column("optimization_target", String(1024), nullable=False),
Column("optimization_direction", String(4), nullable=False),

PrimaryKeyConstraint("exp_id", "optimization_target"),
ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]),
)

# A workaround for SQLAlchemy issue with autoincrement in DuckDB:
if engine.dialect.name == "duckdb":
seq_config_id = Sequence('seq_config_id')
Expand Down
4 changes: 3 additions & 1 deletion mlos_bench/mlos_bench/storage/sql/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ def experiment(self, *,
trial_id: int,
root_env_config: str,
description: str,
opt_target: str) -> Storage.Experiment:
opt_target: str,
opt_direction: str) -> Storage.Experiment:
return Experiment(
engine=self._engine,
schema=self._schema,
Expand All @@ -72,6 +73,7 @@ def experiment(self, *,
root_env_config=root_env_config,
description=description,
opt_target=opt_target,
opt_direction=opt_direction,
)

@property
Expand Down
4 changes: 3 additions & 1 deletion mlos_bench/mlos_bench/storage/sql/trial.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,15 @@ class Trial(Storage.Trial):
def __init__(self, *,
engine: Engine, schema: DbSchema, tunables: TunableGroups,
experiment_id: str, trial_id: int, config_id: int,
opt_target: str, config: Optional[Dict[str, Any]] = None):
opt_target: str, opt_direction: str,
bpkroth marked this conversation as resolved.
Show resolved Hide resolved
config: Optional[Dict[str, Any]] = None):
super().__init__(
tunables=tunables,
experiment_id=experiment_id,
trial_id=trial_id,
config_id=config_id,
opt_target=opt_target,
opt_direction=opt_direction,
config=config,
)
self._engine = engine
Expand Down
Loading
Loading