Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test for Bayesian Optimization Algo #406

Merged
merged 14 commits into from
Mar 11, 2019
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# python ignore files
__pycache__/
.idea/
.coverage
.pytest_cache
*.egg-info

# Project specific ignore files
*.swp
Expand Down
36 changes: 36 additions & 0 deletions pkg/suggestion/bayesianoptimization/src/acquisition_func.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
""" module for acquisition function"""
import numpy as np
from scipy.stats import norm


class AcquisitionFunc:
"""
Class for acquisition function with options for expected improvement,
probability of improvement, or lower confident bound.
"""

def __init__(self, model, current_optimal, mode="ei", trade_off=0.01):
"""
:param mode: pi: probability of improvement, ei: expected improvement, lcb: lower confident bound
:param trade_off: a parameter to control the trade off between exploiting and exploring
:param model_type: gp: gaussian process, rf: random forest
"""
self.model = model
self.current_optimal = current_optimal
self.mode = mode
self.trade_off = trade_off

def compute(self, X_test):
y_mean, y_std, y_variance = self.model.predict(X_test)

z = (y_mean - self.current_optimal - self.trade_off) / y_std

if self.mode == "ei":
if y_std.any() < 0.000001:
return 0, y_mean, y_variance
result = y_std * (z * norm.cdf(z) + norm.pdf(z))
elif self.mode == "pi":
result = norm.cdf(z)
else:
result = - (y_mean - self.trade_off * y_std)
return np.squeeze(result), np.squeeze(y_mean), np.squeeze(y_variance)

This file was deleted.

35 changes: 13 additions & 22 deletions pkg/suggestion/bayesianoptimization/src/algorithm_manager.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,31 @@
""" module for algorithm manager """

import numpy as np

from pkg.api.python import api_pb2
import logging
from logging import getLogger, StreamHandler, INFO, DEBUG

from .utils import get_logger


def deal_with_discrete(feasible_values, current_value):
""" function to embed the current values to the feasible discrete space"""
diff = np.subtract(feasible_values, current_value)
diff = np.absolute(diff)
return feasible_values[np.argmin(diff)]


def deal_with_categorical(feasible_values, one_hot_values):
""" function to do the one hot encoding of the categorical values """
#index = np.argmax(one_hot_values)
index = one_hot_values.argmax()
index = np.argmax(one_hot_values)
#index = one_hot_values.argmax()
jdplatt marked this conversation as resolved.
Show resolved Hide resolved
return feasible_values[int(index)]


class AlgorithmManager:
""" class for the algorithm manager
provide some helper functions
"""
def __init__(self, study_id, study_config, X_train, y_train, logger=None):
if logger == None:
self.logger = getLogger(__name__)
FORMAT = '%(asctime)-15s StudyID %(studyid)s %(message)s'
logging.basicConfig(format=FORMAT)
handler = StreamHandler()
handler.setLevel(DEBUG)
self.logger.setLevel(DEBUG)
self.logger.addHandler(handler)
self.logger.propagate = False
else:
self.logger = logger
self.logger = logger if (logger is not None) else get_logger()
self._study_id = study_id
self._study_config = study_config
self._goal = self._study_config.optimization_type
Expand Down Expand Up @@ -82,7 +74,7 @@ def lower_bound(self):

@property
def upper_bound(self):
""" return the ipper bound of all the parameters """
""" return the upper bound of all the parameters """
return self._upperbound

@property
Expand Down Expand Up @@ -118,10 +110,10 @@ def y_train(self):
def _parse_config(self):
""" extract info from the study configuration """
for i, param in enumerate(self._study_config.parameter_configs.configs):
self._name_id[param.name]=i
self._name_id[param.name] = i
self._types.append(param.parameter_type)
self._names.append(param.name)
if param.parameter_type == api_pb2.DOUBLE or param.parameter_type == api_pb2.INT:
if param.parameter_type in [api_pb2.DOUBLE, api_pb2.INT]:
self._dim = self._dim + 1
self._lowerbound.append(float(param.feasible.min))
self._upperbound.append(float(param.feasible.max))
Expand Down Expand Up @@ -158,15 +150,15 @@ def _mapping_params(self, parameters_list):
for p in parameters:
self.logger.debug("mapping: %r", p, extra={"StudyID": self._study_id})
map_id = self._name_id[p.name]
if self._types[map_id] == api_pb2.DOUBLE or self._types[map_id] == api_pb2.INT or self._types[map_id] == api_pb2.DISCRETE:
if self._types[map_id] in [api_pb2.DOUBLE, api_pb2.INT, api_pb2.DISCRETE]:
maplist[map_id] = float(p.value)
elif self._types[map_id] == api_pb2.CATEGORICAL:
for ci in self._categorical_info:
if ci["name"] == p.name:
maplist[map_id] = np.zeros(ci["number"])
for i, v in enumerate(ci["values"]):
if v == p.value:
maplist[map_id][i]=1
maplist[map_id][i] = 1
break
self.logger.debug("mapped: %r", maplist, extra={"StudyID": self._study_id})
ret.append(np.hstack(maplist))
Expand Down Expand Up @@ -234,4 +226,3 @@ def convert_to_dict(self, x_next):
})
result.append(tmp)
return result

Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np
from sklearn.preprocessing import MinMaxScaler

from pkg.suggestion.bayesianoptimization.src.global_optimizer.global_optimizer import GlobalOptimizer
from .global_optimizer import GlobalOptimizer


class BOAlgorithm:
Expand Down Expand Up @@ -54,7 +54,7 @@ def get_suggestion(self, request_num):
x_next_list = []
if self.X_train is None and self.y_train is None and self.current_optimal is None:
# randomly pick a point as the first trial
for i in range(request_num):
for _ in range(request_num):
x_next_list.append(np.random.uniform(self.lowerbound, self.upperbound, size=(1, self.dim)))
else:
_, x_next_list_que = self.optimizer.direct(request_num)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
DIRECT algorithm is used in this case
"""
import copy

import numpy as np
from collections import deque

from pkg.suggestion.bayesianoptimization.src.acquisition_func.acquisition_func import AcquisitionFunc
import logging
from logging import getLogger, StreamHandler, INFO, DEBUG
from .acquisition_func import AcquisitionFunc
from .model.gp import GaussianProcessModel
from .model.rf import RandomForestModel
from .utils import get_logger


class RectPack:
Expand Down Expand Up @@ -74,37 +75,31 @@ class GlobalOptimizer:

def __init__(self, N, l, u, scaler, X_train, y_train, current_optimal, mode, trade_off, length_scale,
noise, nu, kernel_type, n_estimators, max_features, model_type, logger=None):
if logger == None:
self.logger = getLogger(__name__)
FORMAT = '%(asctime)-15s StudyID %(studyid)s %(message)s'
logging.basicConfig(format=FORMAT)
handler = StreamHandler()
handler.setLevel(INFO)
self.logger.setLevel(INFO)
self.logger.addHandler(handler)
self.logger.propagate = False
else:
self.logger = logger

self.logger = logger if (logger is not None) else get_logger()
self.N = N
self.l = l
self.u = u
self.scaler = scaler
self.buckets = []
self.dim = None
if model_type == "gp":
model = GaussianProcessModel(
length_scale=length_scale,
noise=noise,
nu=nu,
kernel_type=kernel_type,
)
else:
model = RandomForestModel(
n_estimators=n_estimators,
max_features=max_features,
)
model.fit(X_train, y_train)
self.aq_func = AcquisitionFunc(
X_train=X_train,
y_train=y_train,
model=model,
current_optimal=current_optimal,
mode=mode,
trade_off=trade_off,
length_scale=length_scale,
noise=noise,
nu=nu,
kernel_type=kernel_type,
n_estimators=n_estimators,
max_features=max_features,
model_type=model_type,
)

def potential_opt(self, f_min):
Expand Down Expand Up @@ -174,7 +169,7 @@ def direct(self, request_num):
x_next = first_rect.center
ei_min.append(f_min)

for t in range(self.N):
for _ in range(self.N):
opt_set = self.potential_opt(f_min)

# for bucket in self.buckets:
Expand Down Expand Up @@ -215,7 +210,7 @@ def sample_buckets(self, request_num):
fc_sum -= a.fc
bucket_index.append([-a.fc, a.center])
bucket_index = sorted(bucket_index, key=lambda x: x[0])
for i in range(request_num):
for _ in range(request_num):
sample = np.random.rand()
stick = 0.0
for b in bucket_index:
Expand Down
22 changes: 13 additions & 9 deletions pkg/suggestion/bayesianoptimization/src/model/gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

class GaussianProcessModel:
""" use the gaussian process as a prior """
def __init__(self, length_scale, noise, nu, kernel_type):
def __init__(self, length_scale=0.5, noise=0.00005,
nu=1.5, kernel_type="matern"):
"""
:param length_scale: the larger the length_scale is, the smoother the gaussian prior is. If a float,
an isotropic kernel is used. If an array, an anisotropic kernel is used where each dimension of it defines
Expand All @@ -15,20 +16,23 @@ def __init__(self, length_scale, noise, nu, kernel_type):
approximate function is.
:param kernel_type: "rbf": squared exponential kernel, "matern": Matern kernel.
"""

length_scale = length_scale or 0.5
noise = noise or 0.00005
nu = nu or 1.5
kernel_type = kernel_type or "matern"

if kernel_type == "rbf":
kernel = RBF(length_scale=length_scale)
else:
elif kernel_type == "matern":
kernel = Matern(length_scale=length_scale, nu=nu)

else:
raise Exception("kernel_type must be 'rbf' or 'matern'")
self.gp = GaussianProcessRegressor(
kernel=kernel,
alpha=noise,
random_state=0,
optimizer=None,
)

def fit(self, X_train, y_train):
self.gp.fit(X_train, y_train)

def predict(self, X_test):
y_mean, y_std = self.gp.predict(X_test, return_std=True)
y_variance = y_std ** 2
return y_mean, y_std, y_variance
19 changes: 16 additions & 3 deletions pkg/suggestion/bayesianoptimization/src/model/rf.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@
import numpy as np
import forestci as fci
from sklearn.ensemble import RandomForestRegressor


class RandomForestModel:
def __init__(self, n_estimators, max_features):
n_estimators = n_estimators or 50
max_features = max_features or "auto"

def __init__(self, n_estimators=50, max_features="auto"):
self.rf = RandomForestRegressor(
n_estimators=n_estimators,
max_features=max_features,
)
self.X_train = None

def fit(self, X_train, y_train):
print(X_train.shape, y_train.shape)
self.X_train = X_train
self.rf.fit(X_train, y_train)

def predict(self, X_test):
y_mean = self.rf.predict(X_test)
y_variance = fci.random_forest_error(self.rf, self.X_train, X_test)
y_std = np.sqrt(y_variance)
return y_mean, y_std, y_variance
17 changes: 17 additions & 0 deletions pkg/suggestion/bayesianoptimization/src/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import os
import logging
from logging import getLogger, StreamHandler


FORMAT = '%(asctime)-15s StudyID %(studyid)s %(message)s'
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")


def get_logger(name=__name__):
logger = getLogger(name)
logging.basicConfig(format=FORMAT)
handler = StreamHandler()
logger.setLevel(LOG_LEVEL)
logger.addHandler(handler)
logger.propagate = False
return logger
Loading