Refactor mlos_bench.storage and add TunableConfigTrialGroup prope…

…rty for `TrialData` and `ExperimentData` (#648) Useful for grouping trials by the config they used. In use by upcoming #633 for generating graphs with variance error bars for repeated configs. Also refactors a number of other things: - Standardize on `experiment_id` instead of `exp_id` (but not the db schema for now) - Standardize on `tunable_config_id` instead of `config_id` in the API since we also call it `tunable_config` for the object fetching property to distinguish from the `config` dict used internally. (but not the db schema for now) - Rework the idea of `TunableConfigTrialGroup` as an object inaddition to an ID (further methods can be added later to move back and forth between types when doing interactive analysis). - Rework the idea of a `TunableConfig` as an object for fetching tunable value assignments (similar justification - easier grouping in the future by fetching trial across experiments based on config - eventually could be used to house the experiment merge logic). - Rename `results` APIs to `results_df` (similar for others that return `pandas.DataFrame`) to match the `results_dict` that return `dict` - Refactor test fixtures to match other styles and for future use (moved to #644). - Expand tests NOTE: - We cut a new version with this commit since there are potentially breaking API changes (e.g., `results` -> to `results_df` and `exp_id` -> `experiment_id`). Currently builds off of #644 and splits work out of #633 --------- Co-authored-by: Sergiy Matusevych <sergiym@microsoft.com>
microsoft · Jan 23, 2024 · d8ea64f · d8ea64f
1 parent c786a25
commit d8ea64f
Show file tree

Hide file tree

Showing 33 changed files with 1,001 additions and 314 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.2
+current_version = 0.4.0
 commit = True
 tag = True
 

diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -36,7 +36,7 @@
 author = 'GSL'
 
 # The full version, including alpha/beta/rc tags
-release = '0.3.2'
+release = '0.4.0'
 
 try:
     from setuptools_scm import get_version

diff --git a/doc/source/overview.rst b/doc/source/overview.rst
@@ -220,13 +220,33 @@ Optimizer Adapters
 
 Storage
 =======
+Base Runtime Backends
+---------------------
 .. currentmodule:: mlos_bench.storage
 .. autosummary::
    :toctree: generated/
    :template: class.rst
 
    Storage
 
+.. currentmodule:: mlos_bench.storage.storage_factory
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+
+   from_config
+
+SQL DB Storage Backend
+----------------------
+.. currentmodule:: mlos_bench.storage.sql.storage
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+
+   SqlStorage
+
+Analysis Client Access APIs
+---------------------------
 .. currentmodule:: mlos_bench.storage.base_experiment_data
 .. autosummary::
    :toctree: generated/
@@ -241,14 +261,19 @@ Storage
 
    TrialData
 
-SQL DB Storage
---------------
-.. currentmodule:: mlos_bench.storage.sql.storage
+.. currentmodule:: mlos_bench.storage.base_tunable_config_data
 .. autosummary::
    :toctree: generated/
    :template: class.rst
 
-   SqlStorage
+   TunableConfigData
+
+.. currentmodule:: mlos_bench.storage.base_tunable_config_trial_group_data
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+
+   TunableConfigTrialGroupData
 
 #############################
 mlos-viz API

diff --git a/mlos_bench/_version.py b/mlos_bench/_version.py
@@ -7,4 +7,4 @@
 """
 
 # NOTE: This should be managed by bumpversion.
-_VERSION = '0.3.2'
+_VERSION = '0.4.0'
diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py
@@ -51,6 +51,7 @@ class Launcher:
     """
 
     def __init__(self, description: str, long_text: str = "", argv: Optional[List[str]] = None):
+        # pylint: disable=too-many-statements
         _LOG.info("Launch: %s", description)
         epilog = """
             Additional --key=value pairs can be specified to augment or override values listed in --globals.

diff --git a/mlos_bench/mlos_bench/storage/base_experiment_data.py b/mlos_bench/mlos_bench/storage/base_experiment_data.py
@@ -3,44 +3,55 @@
 # Licensed under the MIT License.
 #
 """
-Base interface for accessing the stored benchmark data.
+Base interface for accessing the stored benchmark experiment data.
 """
 
 from abc import ABCMeta, abstractmethod
-from typing import Dict, Tuple
+from typing import Dict, Tuple, TYPE_CHECKING
 
 import pandas
 
-from mlos_bench.storage.base_trial_data import TrialData
+from mlos_bench.storage.base_tunable_config_data import TunableConfigData
+
+if TYPE_CHECKING:
+    from mlos_bench.storage.base_trial_data import TrialData
+    from mlos_bench.storage.base_tunable_config_trial_group_data import TunableConfigTrialGroupData
 
 
 class ExperimentData(metaclass=ABCMeta):
     """
-    Base interface for accessing the stored benchmark data.
+    Base interface for accessing the stored experiment benchmark data.
+
+    An experiment groups together a set of trials that are run with a given set of
+    scripts and mlos_bench configuration files.
     """
 
     RESULT_COLUMN_PREFIX = "result."
     CONFIG_COLUMN_PREFIX = "config."
 
-    def __init__(self, *, exp_id: str, description: str,
-                 root_env_config: str, git_repo: str, git_commit: str):
-        self._exp_id = exp_id
+    def __init__(self, *,
+                 experiment_id: str,
+                 description: str,
+                 root_env_config: str,
+                 git_repo: str,
+                 git_commit: str):
+        self._experiment_id = experiment_id
         self._description = description
         self._root_env_config = root_env_config
         self._git_repo = git_repo
         self._git_commit = git_commit
 
     @property
-    def exp_id(self) -> str:
+    def experiment_id(self) -> str:
         """
-        ID of the current experiment.
+        ID of the experiment.
         """
-        return self._exp_id
+        return self._experiment_id
 
     @property
     def description(self) -> str:
         """
-        Description of the current experiment.
+        Description of the experiment.
         """
         return self._description
 
@@ -57,7 +68,7 @@ def root_env_config(self) -> Tuple[str, str, str]:
         return (self._root_env_config, self._git_repo, self._git_commit)
 
     def __repr__(self) -> str:
-        return f"Experiment :: {self._exp_id}: '{self._description}'"
+        return f"Experiment :: {self._experiment_id}: '{self._description}'"
 
     @property
     @abstractmethod
@@ -74,9 +85,9 @@ def objectives(self) -> Dict[str, str]:
 
     @property
     @abstractmethod
-    def trials(self) -> Dict[int, TrialData]:
+    def trials(self) -> Dict[int, "TrialData"]:
         """
-        Retrieve the trials' data from the storage.
+        Retrieve the experiment's trials' data from the storage.
 
         Returns
         -------
@@ -86,15 +97,39 @@ def trials(self) -> Dict[int, TrialData]:
 
     @property
     @abstractmethod
-    def results(self) -> pandas.DataFrame:
+    def tunable_configs(self) -> Dict[int, TunableConfigData]:
+        """
+        Retrieve the experiment's (tunable) configs' data from the storage.
+
+        Returns
+        -------
+        trials : Dict[int, TunableConfigData]
+            A dictionary of the configs' data, keyed by (tunable) config id.
+        """
+
+    @property
+    @abstractmethod
+    def tunable_config_trial_groups(self) -> Dict[int, "TunableConfigTrialGroupData"]:
+        """
+        Retrieve the Experiment's (Tunable) Config Trial Group data from the storage.
+
+        Returns
+        -------
+        trials : Dict[int, TunableConfigTrialGroupData]
+            A dictionary of the trials' data, keyed by (tunable) by config id.
+        """
+
+    @property
+    @abstractmethod
+    def results_df(self) -> pandas.DataFrame:
         """
         Retrieve all experimental results as a single DataFrame.
 
         Returns
         -------
         results : pandas.DataFrame
             A DataFrame with configurations and results from all trials of the experiment.
-            Has columns [trial_id, config_id, ts_start, ts_end, status]
+            Has columns [trial_id, tunable_config_id, tunable_config_trial_group_id, ts_start, ts_end, status]
             followed by tunable config parameters (prefixed with "config.") and
             trial results (prefixed with "result."). The latter can be NULLs if the
             trial was not successful.

diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py
@@ -293,19 +293,19 @@ class Trial(metaclass=ABCMeta):
 
         def __init__(self, *,
                      tunables: TunableGroups, experiment_id: str, trial_id: int,
-                     config_id: int, opt_target: str, opt_direction: Optional[str],
+                     tunable_config_id: int, opt_target: str, opt_direction: Optional[str],
                      config: Optional[Dict[str, Any]] = None):
             self._tunables = tunables
             self._experiment_id = experiment_id
             self._trial_id = trial_id
-            self._config_id = config_id
+            self._tunable_config_id = tunable_config_id
             self._opt_target = opt_target
             assert opt_direction in {None, "min", "max"}
             self._opt_direction = opt_direction
             self._config = config or {}
 
         def __repr__(self) -> str:
-            return f"{self._experiment_id}:{self._trial_id}:{self._config_id}"
+            return f"{self._experiment_id}:{self._trial_id}:{self._tunable_config_id}"
 
         @property
         def trial_id(self) -> int:
@@ -315,11 +315,11 @@ def trial_id(self) -> int:
             return self._trial_id
 
         @property
-        def config_id(self) -> int:
+        def tunable_config_id(self) -> int:
             """
-            ID of the current trial configuration.
+            ID of the current trial (tunable) configuration.
             """
-            return self._config_id
+            return self._tunable_config_id
 
         @property
         def opt_target(self) -> str: