Skip to content

Commit

Permalink
Remove TunableGroups from Storage classes and add TrialData tests (#644)
Browse files Browse the repository at this point in the history
Work split out from #633

- Reorgs the unit test fixtures for reuse by mlos_viz (future PR)
- Removes TunablesGroups from Storage classes. Currently unnecessary,
belongs in Experiment, and causes inconsistencies with ExperimentData
interactions.
- Adjusts the initialization code to match.

---------

Co-authored-by: Sergiy Matusevych <sergiym@microsoft.com>
  • Loading branch information
bpkroth and motus authored Jan 23, 2024
1 parent 08b3a94 commit c786a25
Show file tree
Hide file tree
Showing 27 changed files with 574 additions and 276 deletions.
39 changes: 14 additions & 25 deletions mlos_bench/mlos_bench/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
import logging
import sys

from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
from typing import Any, Dict, Iterable, List, Optional, Tuple

from mlos_bench.config.schemas import ConfigSchema
from mlos_bench.dict_templater import DictTemplater
from mlos_bench.util import BaseTypeVar, try_parse_val
from mlos_bench.util import try_parse_val

from mlos_bench.tunables.tunable import TunableValue
from mlos_bench.tunables.tunable_groups import TunableGroups
Expand Down Expand Up @@ -338,7 +338,12 @@ def _load_optimizer(self, args_optimizer: Optional[str]) -> Optimizer:
config = {key: val for key, val in self.global_config.items() if key in OneShotOptimizer.BASE_SUPPORTED_CONFIG_PROPS}
return OneShotOptimizer(
self.tunables, config=config, service=self._parent_service)
optimizer = self._load(Optimizer, args_optimizer, ConfigSchema.OPTIMIZER) # type: ignore[type-abstract]
class_config = self._config_loader.load_config(args_optimizer, ConfigSchema.OPTIMIZER)
assert isinstance(class_config, Dict)
optimizer = self._config_loader.build_optimizer(tunables=self.tunables,
service=self._parent_service,
config=class_config,
global_config=self.global_config)
return optimizer

def _load_storage(self, args_storage: Optional[str]) -> Storage:
Expand All @@ -350,31 +355,15 @@ def _load_storage(self, args_storage: Optional[str]) -> Storage:
if args_storage is None:
# pylint: disable=import-outside-toplevel
from mlos_bench.storage.sql.storage import SqlStorage
return SqlStorage(self.tunables, service=self._parent_service,
return SqlStorage(service=self._parent_service,
config={
"drivername": "sqlite",
"database": ":memory:",
"lazy_schema_create": True,
})
storage = self._load(Storage, args_storage, ConfigSchema.STORAGE) # type: ignore[type-abstract]
return storage

def _load(self, cls: Type[BaseTypeVar], json_file_name: str, schema_type: Optional[ConfigSchema]) -> BaseTypeVar:
"""
Create a new instance of class `cls` from JSON configuration.
Note: For abstract types, mypy will complain at the call site.
Use "# type: ignore[type-abstract]" to suppress the warning.
See Also: https://github.com/python/mypy/issues/4717
"""
class_config = self._config_loader.load_config(json_file_name, schema_type)
class_config = self._config_loader.load_config(args_storage, ConfigSchema.STORAGE)
assert isinstance(class_config, Dict)
ret = self._config_loader.build_generic(
base_cls=cls,
tunables=self.tunables,
service=self._parent_service,
config=class_config,
global_config=self.global_config
)
assert isinstance(ret, cls)
return ret
storage = self._config_loader.build_storage(service=self._parent_service,
config=class_config,
global_config=self.global_config)
return storage
1 change: 1 addition & 0 deletions mlos_bench/mlos_bench/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def _optimize(*,
trial_id=trial_id,
root_env_config=root_env_config,
description=env.name,
tunables=env.tunable_params,
opt_target=opt.target,
opt_direction=opt.direction,
) as exp:
Expand Down
61 changes: 46 additions & 15 deletions mlos_bench/mlos_bench/services/config_persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,28 @@
import json # For logging only
import logging

from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type, Union
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, TYPE_CHECKING

import json5 # To read configs with comments and other JSON5 syntax features
from jsonschema import ValidationError, SchemaError

from mlos_bench.config.schemas import ConfigSchema
from mlos_bench.environments.base_environment import Environment
from mlos_bench.optimizers.base_optimizer import Optimizer
from mlos_bench.services.base_service import Service
from mlos_bench.services.types.config_loader_type import SupportsConfigLoading
from mlos_bench.tunables.tunable import TunableValue
from mlos_bench.tunables.tunable_groups import TunableGroups
from mlos_bench.util import instantiate_from_config, merge_parameters, path_join, preprocess_dynamic_configs, BaseTypeVar
from mlos_bench.util import instantiate_from_config, merge_parameters, path_join, preprocess_dynamic_configs

if sys.version_info < (3, 10):
from importlib_resources import files
else:
from importlib.resources import files

if TYPE_CHECKING:
from mlos_bench.storage.base_storage import Storage


_LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -228,23 +232,20 @@ def prepare_class_load(self, config: Dict[str, Any],

return (class_name, class_config)

def build_generic(self, *,
base_cls: Type[BaseTypeVar],
tunables: TunableGroups,
service: Service,
config: Dict[str, Any],
global_config: Optional[Dict[str, Any]] = None) -> BaseTypeVar:
def build_optimizer(self, *,
tunables: TunableGroups,
service: Service,
config: Dict[str, Any],
global_config: Optional[Dict[str, Any]] = None) -> Optimizer:
"""
Generic instantiation of mlos_bench objects like Storage and Optimizer
Instantiation of mlos_bench Optimizer
that depend on Service and TunableGroups.
A class *MUST* have a constructor that takes four named arguments:
(tunables, config, global_config, service)
Parameters
----------
base_cls : ClassType
A base class of the object to instantiate.
tunables : TunableGroups
Tunable parameters of the environment. We need them to validate the
configurations of merged-in experiments and restored/pending trials.
Expand All @@ -257,19 +258,49 @@ def build_generic(self, *,
Returns
-------
inst : Any
A new instance of the `cls` class.
inst : Optimizer
A new instance of the `Optimizer` class.
"""
tunables_path = config.get("include_tunables")
if tunables_path is not None:
tunables = self._load_tunables(tunables_path, tunables)
(class_name, class_config) = self.prepare_class_load(config, global_config)
inst = instantiate_from_config(base_cls, class_name,
inst = instantiate_from_config(Optimizer, class_name, # type: ignore[type-abstract]
tunables=tunables,
config=class_config,
global_config=global_config,
service=service)
_LOG.info("Created: %s %s", base_cls.__name__, inst)
_LOG.info("Created: Optimizer %s", inst)
return inst

def build_storage(self, *,
service: Service,
config: Dict[str, Any],
global_config: Optional[Dict[str, Any]] = None) -> "Storage":
"""
Instantiation of mlos_bench Storage objects.
Parameters
----------
service: Service
An optional service object (e.g., providing methods to load config files, etc.)
config : dict
Configuration of the class to instantiate, as loaded from JSON.
global_config : dict
Global configuration parameters (optional).
Returns
-------
inst : Storage
A new instance of the Storage class.
"""
(class_name, class_config) = self.prepare_class_load(config, global_config)
from mlos_bench.storage.base_storage import Storage # pylint: disable=import-outside-toplevel
inst = instantiate_from_config(Storage, class_name, # type: ignore[type-abstract]
config=class_config,
global_config=global_config,
service=service)
_LOG.info("Created: Storage %s", inst)
return inst

def build_environment(self, # pylint: disable=too-many-arguments
Expand Down
2 changes: 1 addition & 1 deletion mlos_bench/mlos_bench/storage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@

__all__ = [
'Storage',
'from_config'
'from_config',
]
5 changes: 3 additions & 2 deletions mlos_bench/mlos_bench/storage/base_experiment_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def results(self) -> pandas.DataFrame:
results : pandas.DataFrame
A DataFrame with configurations and results from all trials of the experiment.
Has columns [trial_id, config_id, ts_start, ts_end, status]
followed by tunable config parameters and trial results. The latter can be NULLs
if the trial was not successful.
followed by tunable config parameters (prefixed with "config.") and
trial results (prefixed with "result."). The latter can be NULLs if the
trial was not successful.
"""
16 changes: 9 additions & 7 deletions mlos_bench/mlos_bench/storage/base_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ class Storage(metaclass=ABCMeta):
"""

def __init__(self,
tunables: TunableGroups,
config: Dict[str, Any],
global_config: Optional[dict] = None,
service: Optional[Service] = None):
Expand All @@ -39,15 +38,11 @@ def __init__(self,
Parameters
----------
tunables : TunableGroups
Tunable parameters of the environment. We need them to validate the
configurations of merged-in experiments and restored/pending trials.
config : dict
Free-format key/value pairs of configuration parameters.
"""
_LOG.debug("Storage config: %s", config)
self._validate_json_config(config)
self._tunables = tunables.copy()
self._service = service
self._config = config.copy()
self._global_config = global_config or {}
Expand Down Expand Up @@ -83,6 +78,7 @@ def experiment(self, *,
trial_id: int,
root_env_config: str,
description: str,
tunables: TunableGroups,
opt_target: str,
opt_direction: Optional[str]) -> 'Storage.Experiment':
"""
Expand All @@ -102,6 +98,7 @@ def experiment(self, *,
A path to the root JSON configuration file of the benchmarking environment.
description : str
Human-readable description of the experiment.
tunables : TunableGroups
opt_target : str
Name of metric we're optimizing for.
opt_direction: Optional[str]
Expand Down Expand Up @@ -204,6 +201,11 @@ def description(self) -> str:
"""Get the Experiment's description"""
return self._description

@property
def tunables(self) -> TunableGroups:
"""Get the Experiment's tunables"""
return self._tunables

@property
def opt_target(self) -> str:
"""Get the Experiment's optimization target"""
Expand Down Expand Up @@ -271,7 +273,7 @@ def new_trial(self, tunables: TunableGroups,
Parameters
----------
tunables : TunableGroups
Tunable parameters of the experiment.
Tunable parameters to use for the trial.
config : dict
Key/value pairs of additional non-tunable parameters of the trial.
Expand Down Expand Up @@ -303,7 +305,7 @@ def __init__(self, *,
self._config = config or {}

def __repr__(self) -> str:
return f"{self._experiment_id}:{self._trial_id}"
return f"{self._experiment_id}:{self._trial_id}:{self._config_id}"

@property
def trial_id(self) -> int:
Expand Down
Loading

0 comments on commit c786a25

Please sign in to comment.