Solved issue #139: Reimplemented Latin Hypercube Sampling with SciPy

KernelTuner · Sep 19, 2023 · 9a7b47f · 9a7b47f
1 parent 258e003
commit 9a7b47f
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 27 deletions.
diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 from scipy.stats import norm
+from scipy.stats.qmc import LatinHypercube
 
 # BO imports
 from kernel_tuner.searchspace import Searchspace
@@ -164,8 +165,8 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         "multi-advanced",
     ),
     samplingmethod=(
-        "Method used for initial sampling the parameter space, only random is supported as LHS is deprecated",
-        "random",
+        "Method used for initial sampling the parameter space, either random or Latin Hypercube Sampling (LHS)",
+        "lhs",
     ),
     popsize=("Number of initial samples", 20),
 )
@@ -187,8 +188,7 @@ def __init__(
         # supported hyperparameter values
         self.supported_cov_kernels = ["constantrbf", "rbf", "matern32", "matern52"]
         self.supported_methods = supported_methods
-        self.supported_sampling_methods = ["random"]
-        self.supported_sampling_criterion = ["correlation", "ratio", "maximin", None]
+        self.supported_sampling_methods = ["random", "lhs"]
 
         def get_hyperparam(name: str, default, supported_values=list()):
             value = tuning_options.strategy_options.get(name, default)
@@ -210,9 +210,8 @@ def get_hyperparam(name: str, default, supported_values=list()):
         self.num_initial_samples = get_hyperparam("popsize", 20)
         if self.num_initial_samples < 0:
             raise ValueError(f"Number of initial samples (popsize) must be >= 0 (given: {self.num_initial_samples})")
-        self.sampling_method = get_hyperparam("samplingmethod", "random", self.supported_sampling_methods)
-        self.sampling_crit = get_hyperparam("samplingcriterion", "maximin", self.supported_sampling_criterion)
-        self.sampling_iter = get_hyperparam("samplingiterations", 1000)
+        self.sampling_method = get_hyperparam("samplingmethod", "lhs", self.supported_sampling_methods)
+        # note: more parameters are available for LHS if required: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.qmc.LatinHypercube.html
 
         # set acquisition function hyperparameter defaults where missing
         if "explorationfactor" not in acq_params:
@@ -478,26 +477,32 @@ def draw_random_sample(self) -> Tuple[list, int]:
 
     def draw_latin_hypercube_samples(self, num_samples: int) -> list:
         """Draws an LHS-distributed sample from the search space."""
-        from skopt.sampler import Lhs
-
+        # setup, removes params with single value because they are not in the normalized searchspace
         if self.searchspace_size < num_samples:
             raise ValueError("Can't sample more than the size of the search space")
-        if self.sampling_crit is None:
-            lhs = Lhs(lhs_type="centered", criterion=None)
-        else:
-            lhs = Lhs(lhs_type="classic", criterion=self.sampling_crit, iterations=self.sampling_iter)
-        param_configs = lhs.generate(self.dimensions(), num_samples)
+        values_per_parameter = list(param for param in self.dimensions() if len(param) > 1)
+        num_dimensions = len(values_per_parameter)
+
+        # draw Latin Hypercube samples
+        sampler = LatinHypercube(d=num_dimensions)
+        lower_bounds = [0 for _ in range(num_dimensions)]
+        upper_bounds = [len(param) for param in values_per_parameter]
+        samples = sampler.integers(l_bounds=lower_bounds, u_bounds=upper_bounds, n=num_samples)
+        param_configs = list(tuple(values_per_parameter[p_i][v_i] for p_i, v_i in enumerate(s)) for s in samples)
+
+        # only return valid samples
         indices = list()
         normalized_param_configs = list()
-        for i in range(len(param_configs) - 1):
+        for param_config in param_configs:
+            normalized_param_config = self.normalize_param_config(param_config)
             try:
-                param_config = self.normalize_param_config(param_configs[i])
-                index = self.find_param_config_index(param_config)
+                index = self.find_param_config_index(normalized_param_config)
                 indices.append(index)
-                normalized_param_configs.append(param_config)
+                normalized_param_configs.append(normalized_param_config)
             except ValueError:
-                """Due to search space restrictions, the search space may not be an exact cartesian product of the tunable parameter values.
-                It is thus possible for LHS to generate a parameter combination that is not in the actual searchspace, which must be skipped.
+                """With search space restrictions, the search space may not be a cartesian product of parameter values.
+                It is thus possible for LHS to generate a parameter combination that is not in the actual searchspace.
+                These configurations are skipped and replaced with a randomly drawn configuration.
                 """
                 continue
         return list(zip(normalized_param_configs, indices))
@@ -507,10 +512,7 @@ def initial_sample(self):
         if self.num_initial_samples <= 0:
             raise ValueError("At least one initial sample is required")
         if self.sampling_method == "lhs":
-            raise ImportError(
-                "LHS is no longer available as skopt (scikit-optimize) is no longer maintained, change to random"
-            )
-            # samples = self.draw_latin_hypercube_samples(self.num_initial_samples)
+            samples = self.draw_latin_hypercube_samples(self.num_initial_samples)
         elif self.sampling_method == "random":
             samples = list()
         else:
@@ -576,7 +578,11 @@ def __optimize(self, max_fevals):
             self.fit_observations_to_model()
 
     def __optimize_multi(self, max_fevals):
-        """Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average."""
+        """Optimize with a portfolio of multiple acquisition functions.
+
+        Predictions are always only taken once.
+        Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average.
+        """
         if self.opt_direction != "min":
             raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
         # calculate how many times an AF can suggest a duplicate candidate before the AF is skipped

diff --git a/test/strategies/test_bayesian_optimization.py b/test/strategies/test_bayesian_optimization.py
@@ -3,6 +3,7 @@
 from random import uniform as randfloat
 
 import numpy as np
+from pytest import raises
 
 from kernel_tuner.interface import Options
 from kernel_tuner.searchspace import Searchspace
@@ -11,8 +12,8 @@
 from kernel_tuner.strategies.common import CostFunc
 
 tune_params = dict()
-tune_params["x"] = [1, 2, 3]
-tune_params["y"] = [4, 5, 6]
+tune_params["x"] = [1, 2]
+tune_params["y"] = [4.1, 5, 6.9]
 tune_params["z"] = [7]
 
 strategy_options = dict(popsize=0, max_fevals=10)
@@ -75,6 +76,21 @@ def test_bo_initialization():
     assert len(BO.observations) == len(pruned_parameter_space)
     assert BO.current_optimum == np.PINF
 
+def test_bo_initial_sample_lhs():
+    sample = BO.draw_latin_hypercube_samples(num_samples=1)
+    print(sample)
+    assert isinstance(sample, list)
+    assert len(sample) == 1
+    assert isinstance(sample[0], tuple)
+    assert len(sample[0]) == 2
+    assert isinstance(sample[0][0], tuple)
+    assert isinstance(sample[0][1], int)
+    assert len(sample[0][0]) == 2   # tune_params["z"] is dropped because it only has a single value
+    assert isinstance(sample[0][0][0], float)
+    samples = BO.draw_latin_hypercube_samples(num_samples=3)
+    assert len(samples) == 3
+    with raises(ValueError):
+        samples = BO.draw_latin_hypercube_samples(num_samples=30)
 
 def test_bo_is_better_than():
     BO.opt_direction = 'max'