Skip to content

Commit

Permalink
Solved issue #139: Reimplemented Latin Hypercube Sampling with SciPy
Browse files Browse the repository at this point in the history
  • Loading branch information
fjwillemsen committed Sep 19, 2023
1 parent 258e003 commit 9a7b47f
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 27 deletions.
56 changes: 31 additions & 25 deletions kernel_tuner/strategies/bayes_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import numpy as np
from scipy.stats import norm
from scipy.stats.qmc import LatinHypercube

# BO imports
from kernel_tuner.searchspace import Searchspace
Expand Down Expand Up @@ -164,8 +165,8 @@ def tune(searchspace: Searchspace, runner, tuning_options):
"multi-advanced",
),
samplingmethod=(
"Method used for initial sampling the parameter space, only random is supported as LHS is deprecated",
"random",
"Method used for initial sampling the parameter space, either random or Latin Hypercube Sampling (LHS)",
"lhs",
),
popsize=("Number of initial samples", 20),
)
Expand All @@ -187,8 +188,7 @@ def __init__(
# supported hyperparameter values
self.supported_cov_kernels = ["constantrbf", "rbf", "matern32", "matern52"]
self.supported_methods = supported_methods
self.supported_sampling_methods = ["random"]
self.supported_sampling_criterion = ["correlation", "ratio", "maximin", None]
self.supported_sampling_methods = ["random", "lhs"]

def get_hyperparam(name: str, default, supported_values=list()):
value = tuning_options.strategy_options.get(name, default)
Expand All @@ -210,9 +210,8 @@ def get_hyperparam(name: str, default, supported_values=list()):
self.num_initial_samples = get_hyperparam("popsize", 20)
if self.num_initial_samples < 0:
raise ValueError(f"Number of initial samples (popsize) must be >= 0 (given: {self.num_initial_samples})")
self.sampling_method = get_hyperparam("samplingmethod", "random", self.supported_sampling_methods)
self.sampling_crit = get_hyperparam("samplingcriterion", "maximin", self.supported_sampling_criterion)
self.sampling_iter = get_hyperparam("samplingiterations", 1000)
self.sampling_method = get_hyperparam("samplingmethod", "lhs", self.supported_sampling_methods)
# note: more parameters are available for LHS if required: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.qmc.LatinHypercube.html

# set acquisition function hyperparameter defaults where missing
if "explorationfactor" not in acq_params:
Expand Down Expand Up @@ -478,26 +477,32 @@ def draw_random_sample(self) -> Tuple[list, int]:

def draw_latin_hypercube_samples(self, num_samples: int) -> list:
"""Draws an LHS-distributed sample from the search space."""
from skopt.sampler import Lhs

# setup, removes params with single value because they are not in the normalized searchspace
if self.searchspace_size < num_samples:
raise ValueError("Can't sample more than the size of the search space")
if self.sampling_crit is None:
lhs = Lhs(lhs_type="centered", criterion=None)
else:
lhs = Lhs(lhs_type="classic", criterion=self.sampling_crit, iterations=self.sampling_iter)
param_configs = lhs.generate(self.dimensions(), num_samples)
values_per_parameter = list(param for param in self.dimensions() if len(param) > 1)
num_dimensions = len(values_per_parameter)

# draw Latin Hypercube samples
sampler = LatinHypercube(d=num_dimensions)
lower_bounds = [0 for _ in range(num_dimensions)]
upper_bounds = [len(param) for param in values_per_parameter]
samples = sampler.integers(l_bounds=lower_bounds, u_bounds=upper_bounds, n=num_samples)
param_configs = list(tuple(values_per_parameter[p_i][v_i] for p_i, v_i in enumerate(s)) for s in samples)

# only return valid samples
indices = list()
normalized_param_configs = list()
for i in range(len(param_configs) - 1):
for param_config in param_configs:
normalized_param_config = self.normalize_param_config(param_config)
try:
param_config = self.normalize_param_config(param_configs[i])
index = self.find_param_config_index(param_config)
index = self.find_param_config_index(normalized_param_config)
indices.append(index)
normalized_param_configs.append(param_config)
normalized_param_configs.append(normalized_param_config)
except ValueError:
"""Due to search space restrictions, the search space may not be an exact cartesian product of the tunable parameter values.
It is thus possible for LHS to generate a parameter combination that is not in the actual searchspace, which must be skipped.
"""With search space restrictions, the search space may not be a cartesian product of parameter values.
It is thus possible for LHS to generate a parameter combination that is not in the actual searchspace.
These configurations are skipped and replaced with a randomly drawn configuration.
"""
continue
return list(zip(normalized_param_configs, indices))
Expand All @@ -507,10 +512,7 @@ def initial_sample(self):
if self.num_initial_samples <= 0:
raise ValueError("At least one initial sample is required")
if self.sampling_method == "lhs":
raise ImportError(
"LHS is no longer available as skopt (scikit-optimize) is no longer maintained, change to random"
)
# samples = self.draw_latin_hypercube_samples(self.num_initial_samples)
samples = self.draw_latin_hypercube_samples(self.num_initial_samples)
elif self.sampling_method == "random":
samples = list()
else:
Expand Down Expand Up @@ -576,7 +578,11 @@ def __optimize(self, max_fevals):
self.fit_observations_to_model()

def __optimize_multi(self, max_fevals):
"""Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average."""
"""Optimize with a portfolio of multiple acquisition functions.
Predictions are always only taken once.
Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average.
"""
if self.opt_direction != "min":
raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
# calculate how many times an AF can suggest a duplicate candidate before the AF is skipped
Expand Down
20 changes: 18 additions & 2 deletions test/strategies/test_bayesian_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from random import uniform as randfloat

import numpy as np
from pytest import raises

from kernel_tuner.interface import Options
from kernel_tuner.searchspace import Searchspace
Expand All @@ -11,8 +12,8 @@
from kernel_tuner.strategies.common import CostFunc

tune_params = dict()
tune_params["x"] = [1, 2, 3]
tune_params["y"] = [4, 5, 6]
tune_params["x"] = [1, 2]
tune_params["y"] = [4.1, 5, 6.9]
tune_params["z"] = [7]

strategy_options = dict(popsize=0, max_fevals=10)
Expand Down Expand Up @@ -75,6 +76,21 @@ def test_bo_initialization():
assert len(BO.observations) == len(pruned_parameter_space)
assert BO.current_optimum == np.PINF

def test_bo_initial_sample_lhs():
sample = BO.draw_latin_hypercube_samples(num_samples=1)
print(sample)
assert isinstance(sample, list)
assert len(sample) == 1
assert isinstance(sample[0], tuple)
assert len(sample[0]) == 2
assert isinstance(sample[0][0], tuple)
assert isinstance(sample[0][1], int)
assert len(sample[0][0]) == 2 # tune_params["z"] is dropped because it only has a single value
assert isinstance(sample[0][0][0], float)
samples = BO.draw_latin_hypercube_samples(num_samples=3)
assert len(samples) == 3
with raises(ValueError):
samples = BO.draw_latin_hypercube_samples(num_samples=30)

def test_bo_is_better_than():
BO.opt_direction = 'max'
Expand Down

0 comments on commit 9a7b47f

Please sign in to comment.