Skip to content

Commit

Permalink
Use number of bins instead of quantization interval in mlos_bench tun…
Browse files Browse the repository at this point in the history
…ables (#835)

Closes #803 

> Future PR will rename the config schema in order to reduce confusion
on the change in semantics, but also keep this PR smaller.

---------

Co-authored-by: Brian Kroth <bpkroth@users.noreply.github.com>
Co-authored-by: Brian Kroth <bpkroth@microsoft.com>
  • Loading branch information
3 people authored Aug 16, 2024
1 parent f3eb624 commit 2e4cfa2
Show file tree
Hide file tree
Showing 12 changed files with 179 additions and 79 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@
},
"quantization": {
"description": "The number of buckets to quantize the range into.",
"$comment": "type left unspecified here"
"type": "integer",
"exclusiveMinimum": 1
},
"log_scale": {
"description": "Whether to use log instead of linear scale for the range search.",
Expand Down Expand Up @@ -217,9 +218,7 @@
"$ref": "#/$defs/tunable_param_distribution"
},
"quantization": {
"$ref": "#/$defs/quantization",
"type": "integer",
"exclusiveMinimum": 1
"$ref": "#/$defs/quantization"
},
"log": {
"$ref": "#/$defs/log_scale"
Expand Down Expand Up @@ -267,9 +266,7 @@
"$ref": "#/$defs/tunable_param_distribution"
},
"quantization": {
"$ref": "#/$defs/quantization",
"type": "number",
"exclusiveMinimum": 0
"$ref": "#/$defs/quantization"
},
"log": {
"$ref": "#/$defs/log_scale"
Expand Down
56 changes: 40 additions & 16 deletions mlos_bench/mlos_bench/optimizers/convert_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,17 @@

from ConfigSpace import (
Beta,
BetaFloatHyperparameter,
BetaIntegerHyperparameter,
CategoricalHyperparameter,
Configuration,
ConfigurationSpace,
EqualsCondition,
Float,
Integer,
Normal,
NormalFloatHyperparameter,
NormalIntegerHyperparameter,
Uniform,
UniformFloatHyperparameter,
UniformIntegerHyperparameter,
)
from ConfigSpace.functional import quantize
from ConfigSpace.hyperparameters import NumericalHyperparameter
from ConfigSpace.types import NotSet

from mlos_bench.tunables.tunable import Tunable, TunableValue
Expand Down Expand Up @@ -53,6 +49,37 @@ def _normalize_weights(weights: List[float]) -> List[float]:
return [w / total for w in weights]


def _monkey_patch_quantization(hp: NumericalHyperparameter, quantization_bins: int) -> None:
"""
Monkey-patch quantization into the Hyperparameter.
Parameters
----------
hp : NumericalHyperparameter
ConfigSpace hyperparameter to patch.
quantization_bins : int
Number of bins to quantize the hyperparameter into.
"""
if quantization_bins <= 1:
raise ValueError(f"{quantization_bins=} :: must be greater than 1.")

# Temporary workaround to dropped quantization support in ConfigSpace 1.0
# See Also: https://github.com/automl/ConfigSpace/issues/390
if not hasattr(hp, "sample_value_mlos_orig"):
setattr(hp, "sample_value_mlos_orig", hp.sample_value)

assert hasattr(hp, "sample_value_mlos_orig")
setattr(
hp,
"sample_value",
lambda size=None, **kwargs: quantize(
hp.sample_value_mlos_orig(size, **kwargs),
bounds=(hp.lower, hp.upper),
bins=quantization_bins,
).astype(type(hp.default_value)),
)


def _tunable_to_configspace(
tunable: Tunable,
group_name: Optional[str] = None,
Expand All @@ -77,6 +104,7 @@ def _tunable_to_configspace(
cs : ConfigurationSpace
A ConfigurationSpace object that corresponds to the Tunable.
"""
# pylint: disable=too-complex
meta: Dict[Hashable, TunableValue] = {"cost": cost}
if group_name is not None:
meta["group"] = group_name
Expand Down Expand Up @@ -110,20 +138,12 @@ def _tunable_to_configspace(
elif tunable.distribution is not None:
raise TypeError(f"Invalid Distribution Type: {tunable.distribution}")

range_hp: Union[
BetaFloatHyperparameter,
BetaIntegerHyperparameter,
NormalFloatHyperparameter,
NormalIntegerHyperparameter,
UniformFloatHyperparameter,
UniformIntegerHyperparameter,
]
range_hp: NumericalHyperparameter
if tunable.type == "int":
range_hp = Integer(
name=tunable.name,
bounds=(int(tunable.range[0]), int(tunable.range[1])),
log=bool(tunable.is_log),
# TODO: Restore quantization support (#803).
distribution=distribution,
default=(
int(tunable.default)
Expand All @@ -137,7 +157,6 @@ def _tunable_to_configspace(
name=tunable.name,
bounds=tunable.range,
log=bool(tunable.is_log),
# TODO: Restore quantization support (#803).
distribution=distribution,
default=(
float(tunable.default)
Expand All @@ -149,6 +168,11 @@ def _tunable_to_configspace(
else:
raise TypeError(f"Invalid Parameter Type: {tunable.type}")

if tunable.quantization:
# Temporary workaround to dropped quantization support in ConfigSpace 1.0
# See Also: https://github.com/automl/ConfigSpace/issues/390
_monkey_patch_quantization(range_hp, tunable.quantization)

if not tunable.special:
return ConfigurationSpace({tunable.name: range_hp})

Expand Down
6 changes: 3 additions & 3 deletions mlos_bench/mlos_bench/optimizers/grid_search_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(
self._suggested_configs: Set[Tuple[TunableValue, ...]] = set()

def _sanity_check(self) -> None:
size = np.prod([tunable.cardinality for (tunable, _group) in self._tunables])
size = np.prod([tunable.cardinality or np.inf for (tunable, _group) in self._tunables])
if size == np.inf:
raise ValueError(
f"Unquantized tunables are not supported for grid search: {self._tunables}"
Expand Down Expand Up @@ -79,9 +79,9 @@ def _get_grid(self) -> Tuple[Tuple[str, ...], Dict[Tuple[TunableValue, ...], Non
for config in generate_grid(
self.config_space,
{
tunable.name: int(tunable.cardinality)
tunable.name: tunable.cardinality or 0 # mypy wants an int
for (tunable, _group) in self._tunables
if tunable.quantization or tunable.type == "int"
if tunable.is_numerical and tunable.cardinality
},
)
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"type": "float",
"default": 10,
"range": [1, 500],
"quantization": 0 // <-- should be greater than 0
"quantization": 1 // <-- should be greater than 1
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"description": "Int",
"type": "int",
"default": 10,
"range": [1, 500],
"range": [0, 500],
"meta": {"suffix": "MB"},
"special": [-1],
"special_weights": [0.1],
Expand All @@ -26,7 +26,7 @@
"description": "Int",
"type": "int",
"default": 10,
"range": [1, 500],
"range": [0, 500],
"meta": {"suffix": "MB"},
"special": [-1],
"special_weights": [0.1],
Expand All @@ -48,7 +48,7 @@
"meta": {"scale": 1000, "prefix": "/proc/var/random/", "base": 2.71828},
"range": [1.1, 111.1],
"special": [-1.1],
"quantization": 10,
"quantization": 11,
"distribution": {
"type": "uniform"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import random
from typing import Dict, List

import numpy as np
import pytest

from mlos_bench.environments.status import Status
Expand Down Expand Up @@ -40,7 +41,7 @@ def grid_search_tunables_config() -> dict:
"type": "float",
"range": [0, 1],
"default": 0.5,
"quantization": 0.25,
"quantization": 5,
},
},
},
Expand Down Expand Up @@ -99,7 +100,9 @@ def test_grid_search_grid(
) -> None:
"""Make sure that grid search optimizer initializes and works correctly."""
# Check the size.
expected_grid_size = math.prod(tunable.cardinality for tunable, _group in grid_search_tunables)
expected_grid_size = math.prod(
tunable.cardinality or np.inf for tunable, _group in grid_search_tunables
)
assert expected_grid_size > len(grid_search_tunables)
assert len(grid_search_tunables_grid) == expected_grid_size
# Check for specific example configs inclusion.
Expand Down
1 change: 1 addition & 0 deletions mlos_bench/mlos_bench/tests/tunable_groups_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
"type": "int",
"default": 2000000,
"range": [0, 1000000000],
"quantization": 11,
"log": false
}
}
Expand Down
31 changes: 21 additions & 10 deletions mlos_bench/mlos_bench/tests/tunables/test_tunables_size_props.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#
"""Unit tests for checking tunable size properties."""

import numpy as np
import pytest

from mlos_bench.tunables.tunable import Tunable
Expand All @@ -23,9 +22,9 @@ def test_tunable_int_size_props() -> None:
"default": 3,
},
)
assert tunable.span == 4
assert tunable.cardinality == 5
expected = [1, 2, 3, 4, 5]
assert tunable.span == 4
assert tunable.cardinality == len(expected)
assert list(tunable.quantized_values or []) == expected
assert list(tunable.values or []) == expected

Expand All @@ -41,7 +40,7 @@ def test_tunable_float_size_props() -> None:
},
)
assert tunable.span == 3.5
assert tunable.cardinality == np.inf
assert tunable.cardinality is None
assert tunable.quantized_values is None
assert tunable.values is None

Expand All @@ -68,11 +67,17 @@ def test_tunable_quantized_int_size_props() -> None:
"""Test quantized tunable int size properties."""
tunable = Tunable(
name="test",
config={"type": "int", "range": [100, 1000], "default": 100, "quantization": 100},
config={
"type": "int",
"range": [100, 1000],
"default": 100,
"quantization": 10,
},
)
assert tunable.span == 900
assert tunable.cardinality == 10
expected = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
assert tunable.span == 900
assert tunable.cardinality == len(expected)
assert tunable.quantization == len(expected)
assert list(tunable.quantized_values or []) == expected
assert list(tunable.values or []) == expected

Expand All @@ -81,10 +86,16 @@ def test_tunable_quantized_float_size_props() -> None:
"""Test quantized tunable float size properties."""
tunable = Tunable(
name="test",
config={"type": "float", "range": [0, 1], "default": 0, "quantization": 0.1},
config={
"type": "float",
"range": [0, 1],
"default": 0,
"quantization": 11,
},
)
assert tunable.span == 1
assert tunable.cardinality == 11
expected = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
assert tunable.span == 1
assert tunable.cardinality == len(expected)
assert tunable.quantization == len(expected)
assert pytest.approx(list(tunable.quantized_values or []), 0.0001) == expected
assert pytest.approx(list(tunable.values or []), 0.0001) == expected
Original file line number Diff line number Diff line change
Expand Up @@ -234,13 +234,15 @@ def test_numerical_quantization(tunable_type: TunableValueTypeName) -> None:
{{
"type": "{tunable_type}",
"range": [0, 100],
"quantization": 10,
"quantization": 11,
"default": 0
}}
"""
config = json.loads(json_config)
tunable = Tunable(name="test", config=config)
assert tunable.quantization == 10
expected = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
assert tunable.quantization == len(expected)
assert pytest.approx(list(tunable.quantized_values or []), 1e-8) == expected
assert not tunable.is_log


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
"""Unit tests for ConfigSpace quantization monkey patching."""

import numpy as np
from ConfigSpace import UniformFloatHyperparameter, UniformIntegerHyperparameter
from numpy.random import RandomState

from mlos_bench.optimizers.convert_configspace import _monkey_patch_quantization
from mlos_bench.tests import SEED


def test_configspace_quant_int() -> None:
"""Check the quantization of an integer hyperparameter."""
quantized_values = set(range(0, 101, 10))
hp = UniformIntegerHyperparameter("hp", lower=0, upper=100, log=False)

# Before patching: expect that at least one value is not quantized.
assert not set(hp.sample_value(100)).issubset(quantized_values)

_monkey_patch_quantization(hp, 11)
# After patching: *all* values must belong to the set of quantized values.
assert hp.sample_value() in quantized_values # check scalar type
assert set(hp.sample_value(100)).issubset(quantized_values) # batch version


def test_configspace_quant_float() -> None:
"""Check the quantization of a float hyperparameter."""
quantized_values = set(np.linspace(0, 1, num=5, endpoint=True))
hp = UniformFloatHyperparameter("hp", lower=0, upper=1, log=False)

# Before patching: expect that at least one value is not quantized.
assert not set(hp.sample_value(100)).issubset(quantized_values)

# 5 is a nice number of bins to avoid floating point errors.
_monkey_patch_quantization(hp, 5)
# After patching: *all* values must belong to the set of quantized values.
assert hp.sample_value() in quantized_values # check scalar type
assert set(hp.sample_value(100)).issubset(quantized_values) # batch version


def test_configspace_quant_repatch() -> None:
"""Repatch the same hyperparameter with different number of bins."""
quantized_values = set(range(0, 101, 10))
hp = UniformIntegerHyperparameter("hp", lower=0, upper=100, log=False)

# Before patching: expect that at least one value is not quantized.
assert not set(hp.sample_value(100)).issubset(quantized_values)

_monkey_patch_quantization(hp, 11)
# After patching: *all* values must belong to the set of quantized values.
samples = hp.sample_value(100, seed=RandomState(SEED))
assert set(samples).issubset(quantized_values)

# Patch the same hyperparameter again and check that the results are the same.
_monkey_patch_quantization(hp, 11)
# After patching: *all* values must belong to the set of quantized values.
assert all(samples == hp.sample_value(100, seed=RandomState(SEED)))

# Repatch with the higher number of bins and make sure we get new values.
_monkey_patch_quantization(hp, 21)
samples_set = set(hp.sample_value(100, seed=RandomState(SEED)))
quantized_values_new = set(range(5, 96, 10))
assert samples_set.issubset(set(range(0, 101, 5)))
assert len(samples_set - quantized_values_new) < len(samples_set)
Loading

0 comments on commit 2e4cfa2

Please sign in to comment.