Use number of bins instead of quantization interval in mlos_bench tun…

…ables (#835) Closes #803 > Future PR will rename the config schema in order to reduce confusion on the change in semantics, but also keep this PR smaller. --------- Co-authored-by: Brian Kroth <bpkroth@users.noreply.github.com> Co-authored-by: Brian Kroth <bpkroth@microsoft.com>
microsoft · Aug 16, 2024 · 2e4cfa2 · 2e4cfa2
1 parent f3eb624
commit 2e4cfa2
Show file tree

Hide file tree

Showing 12 changed files with 179 additions and 79 deletions.
diff --git a/mlos_bench/mlos_bench/config/schemas/tunables/tunable-params-schema.json b/mlos_bench/mlos_bench/config/schemas/tunables/tunable-params-schema.json
@@ -125,7 +125,8 @@
         },
         "quantization": {
             "description": "The number of buckets to quantize the range into.",
-            "$comment": "type left unspecified here"
+            "type": "integer",
+            "exclusiveMinimum": 1
         },
         "log_scale": {
             "description": "Whether to use log instead of linear scale for the range search.",
@@ -217,9 +218,7 @@
                     "$ref": "#/$defs/tunable_param_distribution"
                 },
                 "quantization": {
-                    "$ref": "#/$defs/quantization",
-                    "type": "integer",
-                    "exclusiveMinimum": 1
+                    "$ref": "#/$defs/quantization"
                 },
                 "log": {
                     "$ref": "#/$defs/log_scale"
@@ -267,9 +266,7 @@
                     "$ref": "#/$defs/tunable_param_distribution"
                 },
                 "quantization": {
-                    "$ref": "#/$defs/quantization",
-                    "type": "number",
-                    "exclusiveMinimum": 0
+                    "$ref": "#/$defs/quantization"
                 },
                 "log": {
                     "$ref": "#/$defs/log_scale"

diff --git a/mlos_bench/mlos_bench/optimizers/convert_configspace.py b/mlos_bench/mlos_bench/optimizers/convert_configspace.py
@@ -11,21 +11,17 @@
 
 from ConfigSpace import (
     Beta,
-    BetaFloatHyperparameter,
-    BetaIntegerHyperparameter,
     CategoricalHyperparameter,
     Configuration,
     ConfigurationSpace,
     EqualsCondition,
     Float,
     Integer,
     Normal,
-    NormalFloatHyperparameter,
-    NormalIntegerHyperparameter,
     Uniform,
-    UniformFloatHyperparameter,
-    UniformIntegerHyperparameter,
 )
+from ConfigSpace.functional import quantize
+from ConfigSpace.hyperparameters import NumericalHyperparameter
 from ConfigSpace.types import NotSet
 
 from mlos_bench.tunables.tunable import Tunable, TunableValue
@@ -53,6 +49,37 @@ def _normalize_weights(weights: List[float]) -> List[float]:
     return [w / total for w in weights]
 
 
+def _monkey_patch_quantization(hp: NumericalHyperparameter, quantization_bins: int) -> None:
+    """
+    Monkey-patch quantization into the Hyperparameter.
+
+    Parameters
+    ----------
+    hp : NumericalHyperparameter
+        ConfigSpace hyperparameter to patch.
+    quantization_bins : int
+        Number of bins to quantize the hyperparameter into.
+    """
+    if quantization_bins <= 1:
+        raise ValueError(f"{quantization_bins=} :: must be greater than 1.")
+
+    # Temporary workaround to dropped quantization support in ConfigSpace 1.0
+    # See Also: https://github.com/automl/ConfigSpace/issues/390
+    if not hasattr(hp, "sample_value_mlos_orig"):
+        setattr(hp, "sample_value_mlos_orig", hp.sample_value)
+
+    assert hasattr(hp, "sample_value_mlos_orig")
+    setattr(
+        hp,
+        "sample_value",
+        lambda size=None, **kwargs: quantize(
+            hp.sample_value_mlos_orig(size, **kwargs),
+            bounds=(hp.lower, hp.upper),
+            bins=quantization_bins,
+        ).astype(type(hp.default_value)),
+    )
+
+
 def _tunable_to_configspace(
     tunable: Tunable,
     group_name: Optional[str] = None,
@@ -77,6 +104,7 @@ def _tunable_to_configspace(
     cs : ConfigurationSpace
         A ConfigurationSpace object that corresponds to the Tunable.
     """
+    # pylint: disable=too-complex
     meta: Dict[Hashable, TunableValue] = {"cost": cost}
     if group_name is not None:
         meta["group"] = group_name
@@ -110,20 +138,12 @@ def _tunable_to_configspace(
     elif tunable.distribution is not None:
         raise TypeError(f"Invalid Distribution Type: {tunable.distribution}")
 
-    range_hp: Union[
-        BetaFloatHyperparameter,
-        BetaIntegerHyperparameter,
-        NormalFloatHyperparameter,
-        NormalIntegerHyperparameter,
-        UniformFloatHyperparameter,
-        UniformIntegerHyperparameter,
-    ]
+    range_hp: NumericalHyperparameter
     if tunable.type == "int":
         range_hp = Integer(
             name=tunable.name,
             bounds=(int(tunable.range[0]), int(tunable.range[1])),
             log=bool(tunable.is_log),
-            # TODO: Restore quantization support (#803).
             distribution=distribution,
             default=(
                 int(tunable.default)
@@ -137,7 +157,6 @@ def _tunable_to_configspace(
             name=tunable.name,
             bounds=tunable.range,
             log=bool(tunable.is_log),
-            # TODO: Restore quantization support (#803).
             distribution=distribution,
             default=(
                 float(tunable.default)
@@ -149,6 +168,11 @@ def _tunable_to_configspace(
     else:
         raise TypeError(f"Invalid Parameter Type: {tunable.type}")
 
+    if tunable.quantization:
+        # Temporary workaround to dropped quantization support in ConfigSpace 1.0
+        # See Also: https://github.com/automl/ConfigSpace/issues/390
+        _monkey_patch_quantization(range_hp, tunable.quantization)
+
     if not tunable.special:
         return ConfigurationSpace({tunable.name: range_hp})
 

diff --git a/mlos_bench/mlos_bench/optimizers/grid_search_optimizer.py b/mlos_bench/mlos_bench/optimizers/grid_search_optimizer.py
@@ -47,7 +47,7 @@ def __init__(
         self._suggested_configs: Set[Tuple[TunableValue, ...]] = set()
 
     def _sanity_check(self) -> None:
-        size = np.prod([tunable.cardinality for (tunable, _group) in self._tunables])
+        size = np.prod([tunable.cardinality or np.inf for (tunable, _group) in self._tunables])
         if size == np.inf:
             raise ValueError(
                 f"Unquantized tunables are not supported for grid search: {self._tunables}"
@@ -79,9 +79,9 @@ def _get_grid(self) -> Tuple[Tuple[str, ...], Dict[Tuple[TunableValue, ...], Non
             for config in generate_grid(
                 self.config_space,
                 {
-                    tunable.name: int(tunable.cardinality)
+                    tunable.name: tunable.cardinality or 0  # mypy wants an int
                     for (tunable, _group) in self._tunables
-                    if tunable.quantization or tunable.type == "int"
+                    if tunable.is_numerical and tunable.cardinality
                 },
             )
         ]

diff --git a/...mas/tunable-params/test-cases/bad/invalid/tunable-params-int-bad-float-quantization.jsonc b/...mas/tunable-params/test-cases/bad/invalid/tunable-params-int-bad-float-quantization.jsonc
@@ -6,7 +6,7 @@
                 "type": "float",
                 "default": 10,
                 "range": [1, 500],
-                "quantization": 0   // <-- should be greater than 0
+                "quantization": 1   // <-- should be greater than 1
             }
         }
     }

diff --git a/...h/tests/config/schemas/tunable-params/test-cases/good/full/full-tunable-params-test.jsonc b/...h/tests/config/schemas/tunable-params/test-cases/good/full/full-tunable-params-test.jsonc
@@ -7,7 +7,7 @@
                 "description": "Int",
                 "type": "int",
                 "default": 10,
-                "range": [1, 500],
+                "range": [0, 500],
                 "meta": {"suffix": "MB"},
                 "special": [-1],
                 "special_weights": [0.1],
@@ -26,7 +26,7 @@
                 "description": "Int",
                 "type": "int",
                 "default": 10,
-                "range": [1, 500],
+                "range": [0, 500],
                 "meta": {"suffix": "MB"},
                 "special": [-1],
                 "special_weights": [0.1],
@@ -48,7 +48,7 @@
                 "meta": {"scale": 1000, "prefix": "/proc/var/random/", "base": 2.71828},
                 "range": [1.1, 111.1],
                 "special": [-1.1],
-                "quantization": 10,
+                "quantization": 11,
                 "distribution": {
                     "type": "uniform"
                 },

diff --git a/mlos_bench/mlos_bench/tests/optimizers/grid_search_optimizer_test.py b/mlos_bench/mlos_bench/tests/optimizers/grid_search_optimizer_test.py
@@ -9,6 +9,7 @@
 import random
 from typing import Dict, List
 
+import numpy as np
 import pytest
 
 from mlos_bench.environments.status import Status
@@ -40,7 +41,7 @@ def grid_search_tunables_config() -> dict:
                     "type": "float",
                     "range": [0, 1],
                     "default": 0.5,
-                    "quantization": 0.25,
+                    "quantization": 5,
                 },
             },
         },
@@ -99,7 +100,9 @@ def test_grid_search_grid(
 ) -> None:
     """Make sure that grid search optimizer initializes and works correctly."""
     # Check the size.
-    expected_grid_size = math.prod(tunable.cardinality for tunable, _group in grid_search_tunables)
+    expected_grid_size = math.prod(
+        tunable.cardinality or np.inf for tunable, _group in grid_search_tunables
+    )
     assert expected_grid_size > len(grid_search_tunables)
     assert len(grid_search_tunables_grid) == expected_grid_size
     # Check for specific example configs inclusion.

diff --git a/mlos_bench/mlos_bench/tests/tunable_groups_fixtures.py b/mlos_bench/mlos_bench/tests/tunable_groups_fixtures.py
@@ -62,6 +62,7 @@
                 "type": "int",
                 "default": 2000000,
                 "range": [0, 1000000000],
+                "quantization": 11,
                 "log": false
             }
         }

diff --git a/mlos_bench/mlos_bench/tests/tunables/test_tunables_size_props.py b/mlos_bench/mlos_bench/tests/tunables/test_tunables_size_props.py
@@ -4,7 +4,6 @@
 #
 """Unit tests for checking tunable size properties."""
 
-import numpy as np
 import pytest
 
 from mlos_bench.tunables.tunable import Tunable
@@ -23,9 +22,9 @@ def test_tunable_int_size_props() -> None:
             "default": 3,
         },
     )
-    assert tunable.span == 4
-    assert tunable.cardinality == 5
     expected = [1, 2, 3, 4, 5]
+    assert tunable.span == 4
+    assert tunable.cardinality == len(expected)
     assert list(tunable.quantized_values or []) == expected
     assert list(tunable.values or []) == expected
 
@@ -41,7 +40,7 @@ def test_tunable_float_size_props() -> None:
         },
     )
     assert tunable.span == 3.5
-    assert tunable.cardinality == np.inf
+    assert tunable.cardinality is None
     assert tunable.quantized_values is None
     assert tunable.values is None
 
@@ -68,11 +67,17 @@ def test_tunable_quantized_int_size_props() -> None:
     """Test quantized tunable int size properties."""
     tunable = Tunable(
         name="test",
-        config={"type": "int", "range": [100, 1000], "default": 100, "quantization": 100},
+        config={
+            "type": "int",
+            "range": [100, 1000],
+            "default": 100,
+            "quantization": 10,
+        },
     )
-    assert tunable.span == 900
-    assert tunable.cardinality == 10
     expected = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
+    assert tunable.span == 900
+    assert tunable.cardinality == len(expected)
+    assert tunable.quantization == len(expected)
     assert list(tunable.quantized_values or []) == expected
     assert list(tunable.values or []) == expected
 
@@ -81,10 +86,16 @@ def test_tunable_quantized_float_size_props() -> None:
     """Test quantized tunable float size properties."""
     tunable = Tunable(
         name="test",
-        config={"type": "float", "range": [0, 1], "default": 0, "quantization": 0.1},
+        config={
+            "type": "float",
+            "range": [0, 1],
+            "default": 0,
+            "quantization": 11,
+        },
     )
-    assert tunable.span == 1
-    assert tunable.cardinality == 11
     expected = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
+    assert tunable.span == 1
+    assert tunable.cardinality == len(expected)
+    assert tunable.quantization == len(expected)
     assert pytest.approx(list(tunable.quantized_values or []), 0.0001) == expected
     assert pytest.approx(list(tunable.values or []), 0.0001) == expected
diff --git a/mlos_bench/mlos_bench/tests/tunables/tunable_definition_test.py b/mlos_bench/mlos_bench/tests/tunables/tunable_definition_test.py
@@ -234,13 +234,15 @@ def test_numerical_quantization(tunable_type: TunableValueTypeName) -> None:
     {{
         "type": "{tunable_type}",
         "range": [0, 100],
-        "quantization": 10,
+        "quantization": 11,
         "default": 0
     }}
     """
     config = json.loads(json_config)
     tunable = Tunable(name="test", config=config)
-    assert tunable.quantization == 10
+    expected = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
+    assert tunable.quantization == len(expected)
+    assert pytest.approx(list(tunable.quantized_values or []), 1e-8) == expected
     assert not tunable.is_log
 
 

diff --git a/mlos_bench/mlos_bench/tests/tunables/tunable_to_configspace_quant_test.py b/mlos_bench/mlos_bench/tests/tunables/tunable_to_configspace_quant_test.py
@@ -0,0 +1,67 @@
+#
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+"""Unit tests for ConfigSpace quantization monkey patching."""
+
+import numpy as np
+from ConfigSpace import UniformFloatHyperparameter, UniformIntegerHyperparameter
+from numpy.random import RandomState
+
+from mlos_bench.optimizers.convert_configspace import _monkey_patch_quantization
+from mlos_bench.tests import SEED
+
+
+def test_configspace_quant_int() -> None:
+    """Check the quantization of an integer hyperparameter."""
+    quantized_values = set(range(0, 101, 10))
+    hp = UniformIntegerHyperparameter("hp", lower=0, upper=100, log=False)
+
+    # Before patching: expect that at least one value is not quantized.
+    assert not set(hp.sample_value(100)).issubset(quantized_values)
+
+    _monkey_patch_quantization(hp, 11)
+    # After patching: *all* values must belong to the set of quantized values.
+    assert hp.sample_value() in quantized_values  # check scalar type
+    assert set(hp.sample_value(100)).issubset(quantized_values)  # batch version
+
+
+def test_configspace_quant_float() -> None:
+    """Check the quantization of a float hyperparameter."""
+    quantized_values = set(np.linspace(0, 1, num=5, endpoint=True))
+    hp = UniformFloatHyperparameter("hp", lower=0, upper=1, log=False)
+
+    # Before patching: expect that at least one value is not quantized.
+    assert not set(hp.sample_value(100)).issubset(quantized_values)
+
+    # 5 is a nice number of bins to avoid floating point errors.
+    _monkey_patch_quantization(hp, 5)
+    # After patching: *all* values must belong to the set of quantized values.
+    assert hp.sample_value() in quantized_values  # check scalar type
+    assert set(hp.sample_value(100)).issubset(quantized_values)  # batch version
+
+
+def test_configspace_quant_repatch() -> None:
+    """Repatch the same hyperparameter with different number of bins."""
+    quantized_values = set(range(0, 101, 10))
+    hp = UniformIntegerHyperparameter("hp", lower=0, upper=100, log=False)
+
+    # Before patching: expect that at least one value is not quantized.
+    assert not set(hp.sample_value(100)).issubset(quantized_values)
+
+    _monkey_patch_quantization(hp, 11)
+    # After patching: *all* values must belong to the set of quantized values.
+    samples = hp.sample_value(100, seed=RandomState(SEED))
+    assert set(samples).issubset(quantized_values)
+
+    # Patch the same hyperparameter again and check that the results are the same.
+    _monkey_patch_quantization(hp, 11)
+    # After patching: *all* values must belong to the set of quantized values.
+    assert all(samples == hp.sample_value(100, seed=RandomState(SEED)))
+
+    # Repatch with the higher number of bins and make sure we get new values.
+    _monkey_patch_quantization(hp, 21)
+    samples_set = set(hp.sample_value(100, seed=RandomState(SEED)))
+    quantized_values_new = set(range(5, 96, 10))
+    assert samples_set.issubset(set(range(0, 101, 5)))
+    assert len(samples_set - quantized_values_new) < len(samples_set)