Skip to content

Commit

Permalink
Refactor mlos_bench.storage and add TunableConfigTrialGroup prope…
Browse files Browse the repository at this point in the history
…rty for `TrialData` and `ExperimentData` (#648)

Useful for grouping trials by the config they used. In use by upcoming
#633 for generating graphs with variance error bars for repeated
configs.

Also refactors a number of other things:
- Standardize on `experiment_id` instead of `exp_id`
  (but not the db schema for now)
- Standardize on `tunable_config_id` instead of `config_id` in the API
since we also call it `tunable_config` for the object fetching property
to distinguish from the `config` dict used internally.
  (but not the db schema for now)
- Rework the idea of `TunableConfigTrialGroup` as an object inaddition
to an ID (further methods can be added later to move back and forth
between types when doing interactive analysis).
- Rework the idea of a `TunableConfig` as an object for fetching tunable
value assignments (similar justification - easier grouping in the future
by fetching trial across experiments based on config - eventually could
be used to house the experiment merge logic).
- Rename `results` APIs to `results_df` (similar for others that return
`pandas.DataFrame`) to match the `results_dict` that return `dict`
- Refactor test fixtures to match other styles and for future use (moved
to #644).
- Expand tests

NOTE:
- We cut a new version with this commit since there are potentially
breaking API changes (e.g., `results` -> to `results_df` and `exp_id` ->
`experiment_id`).

Currently builds off of #644 and splits work out of #633

---------

Co-authored-by: Sergiy Matusevych <sergiym@microsoft.com>
  • Loading branch information
bpkroth and motus authored Jan 23, 2024
1 parent c786a25 commit d8ea64f
Show file tree
Hide file tree
Showing 33 changed files with 1,001 additions and 314 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.3.2
current_version = 0.4.0
commit = True
tag = True

Expand Down
2 changes: 1 addition & 1 deletion doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
author = 'GSL'

# The full version, including alpha/beta/rc tags
release = '0.3.2'
release = '0.4.0'

try:
from setuptools_scm import get_version
Expand Down
33 changes: 29 additions & 4 deletions doc/source/overview.rst
Original file line number Diff line number Diff line change
Expand Up @@ -220,13 +220,33 @@ Optimizer Adapters

Storage
=======
Base Runtime Backends
---------------------
.. currentmodule:: mlos_bench.storage
.. autosummary::
:toctree: generated/
:template: class.rst

Storage

.. currentmodule:: mlos_bench.storage.storage_factory
.. autosummary::
:toctree: generated/
:template: function.rst

from_config

SQL DB Storage Backend
----------------------
.. currentmodule:: mlos_bench.storage.sql.storage
.. autosummary::
:toctree: generated/
:template: class.rst

SqlStorage

Analysis Client Access APIs
---------------------------
.. currentmodule:: mlos_bench.storage.base_experiment_data
.. autosummary::
:toctree: generated/
Expand All @@ -241,14 +261,19 @@ Storage

TrialData

SQL DB Storage
--------------
.. currentmodule:: mlos_bench.storage.sql.storage
.. currentmodule:: mlos_bench.storage.base_tunable_config_data
.. autosummary::
:toctree: generated/
:template: class.rst

SqlStorage
TunableConfigData

.. currentmodule:: mlos_bench.storage.base_tunable_config_trial_group_data
.. autosummary::
:toctree: generated/
:template: class.rst

TunableConfigTrialGroupData

#############################
mlos-viz API
Expand Down
2 changes: 1 addition & 1 deletion mlos_bench/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
"""

# NOTE: This should be managed by bumpversion.
_VERSION = '0.3.2'
_VERSION = '0.4.0'
1 change: 1 addition & 0 deletions mlos_bench/mlos_bench/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class Launcher:
"""

def __init__(self, description: str, long_text: str = "", argv: Optional[List[str]] = None):
# pylint: disable=too-many-statements
_LOG.info("Launch: %s", description)
epilog = """
Additional --key=value pairs can be specified to augment or override values listed in --globals.
Expand Down
67 changes: 51 additions & 16 deletions mlos_bench/mlos_bench/storage/base_experiment_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,44 +3,55 @@
# Licensed under the MIT License.
#
"""
Base interface for accessing the stored benchmark data.
Base interface for accessing the stored benchmark experiment data.
"""

from abc import ABCMeta, abstractmethod
from typing import Dict, Tuple
from typing import Dict, Tuple, TYPE_CHECKING

import pandas

from mlos_bench.storage.base_trial_data import TrialData
from mlos_bench.storage.base_tunable_config_data import TunableConfigData

if TYPE_CHECKING:
from mlos_bench.storage.base_trial_data import TrialData
from mlos_bench.storage.base_tunable_config_trial_group_data import TunableConfigTrialGroupData


class ExperimentData(metaclass=ABCMeta):
"""
Base interface for accessing the stored benchmark data.
Base interface for accessing the stored experiment benchmark data.
An experiment groups together a set of trials that are run with a given set of
scripts and mlos_bench configuration files.
"""

RESULT_COLUMN_PREFIX = "result."
CONFIG_COLUMN_PREFIX = "config."

def __init__(self, *, exp_id: str, description: str,
root_env_config: str, git_repo: str, git_commit: str):
self._exp_id = exp_id
def __init__(self, *,
experiment_id: str,
description: str,
root_env_config: str,
git_repo: str,
git_commit: str):
self._experiment_id = experiment_id
self._description = description
self._root_env_config = root_env_config
self._git_repo = git_repo
self._git_commit = git_commit

@property
def exp_id(self) -> str:
def experiment_id(self) -> str:
"""
ID of the current experiment.
ID of the experiment.
"""
return self._exp_id
return self._experiment_id

@property
def description(self) -> str:
"""
Description of the current experiment.
Description of the experiment.
"""
return self._description

Expand All @@ -57,7 +68,7 @@ def root_env_config(self) -> Tuple[str, str, str]:
return (self._root_env_config, self._git_repo, self._git_commit)

def __repr__(self) -> str:
return f"Experiment :: {self._exp_id}: '{self._description}'"
return f"Experiment :: {self._experiment_id}: '{self._description}'"

@property
@abstractmethod
Expand All @@ -74,9 +85,9 @@ def objectives(self) -> Dict[str, str]:

@property
@abstractmethod
def trials(self) -> Dict[int, TrialData]:
def trials(self) -> Dict[int, "TrialData"]:
"""
Retrieve the trials' data from the storage.
Retrieve the experiment's trials' data from the storage.
Returns
-------
Expand All @@ -86,15 +97,39 @@ def trials(self) -> Dict[int, TrialData]:

@property
@abstractmethod
def results(self) -> pandas.DataFrame:
def tunable_configs(self) -> Dict[int, TunableConfigData]:
"""
Retrieve the experiment's (tunable) configs' data from the storage.
Returns
-------
trials : Dict[int, TunableConfigData]
A dictionary of the configs' data, keyed by (tunable) config id.
"""

@property
@abstractmethod
def tunable_config_trial_groups(self) -> Dict[int, "TunableConfigTrialGroupData"]:
"""
Retrieve the Experiment's (Tunable) Config Trial Group data from the storage.
Returns
-------
trials : Dict[int, TunableConfigTrialGroupData]
A dictionary of the trials' data, keyed by (tunable) by config id.
"""

@property
@abstractmethod
def results_df(self) -> pandas.DataFrame:
"""
Retrieve all experimental results as a single DataFrame.
Returns
-------
results : pandas.DataFrame
A DataFrame with configurations and results from all trials of the experiment.
Has columns [trial_id, config_id, ts_start, ts_end, status]
Has columns [trial_id, tunable_config_id, tunable_config_trial_group_id, ts_start, ts_end, status]
followed by tunable config parameters (prefixed with "config.") and
trial results (prefixed with "result."). The latter can be NULLs if the
trial was not successful.
Expand Down
12 changes: 6 additions & 6 deletions mlos_bench/mlos_bench/storage/base_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,19 +293,19 @@ class Trial(metaclass=ABCMeta):

def __init__(self, *,
tunables: TunableGroups, experiment_id: str, trial_id: int,
config_id: int, opt_target: str, opt_direction: Optional[str],
tunable_config_id: int, opt_target: str, opt_direction: Optional[str],
config: Optional[Dict[str, Any]] = None):
self._tunables = tunables
self._experiment_id = experiment_id
self._trial_id = trial_id
self._config_id = config_id
self._tunable_config_id = tunable_config_id
self._opt_target = opt_target
assert opt_direction in {None, "min", "max"}
self._opt_direction = opt_direction
self._config = config or {}

def __repr__(self) -> str:
return f"{self._experiment_id}:{self._trial_id}:{self._config_id}"
return f"{self._experiment_id}:{self._trial_id}:{self._tunable_config_id}"

@property
def trial_id(self) -> int:
Expand All @@ -315,11 +315,11 @@ def trial_id(self) -> int:
return self._trial_id

@property
def config_id(self) -> int:
def tunable_config_id(self) -> int:
"""
ID of the current trial configuration.
ID of the current trial (tunable) configuration.
"""
return self._config_id
return self._tunable_config_id

@property
def opt_target(self) -> str:
Expand Down
Loading

0 comments on commit d8ea64f

Please sign in to comment.