From c87d5838b4142fc458f030c1d58be52a554354ac Mon Sep 17 00:00:00 2001 From: jdplatt Date: Mon, 11 Mar 2019 15:54:38 -0400 Subject: [PATCH] Test for Bayesian Optimization Algo (#406) * added tests for acquisition function and models * added tests for global_optimizer * added tests for boa * minor linting * tests for algorithm manager * added discrete parameter to study config * covered all parameter types * moved python script to testing folder * added python tests to unit tests * remembered to uncomment existing tests * fixed path to test script * moved python tests to separate job in workflow * added run command to test script --- .gitignore | 3 + .../src/acquisition_func => }/__init__.py | 0 .../src/acquisition_func.py | 36 +++++++++++ .../src/acquisition_func/acquisition_func.py | 61 ------------------- .../src/algorithm_manager.py | 35 ++++------- .../src/bayesian_optimization_algorithm.py | 4 +- .../global_optimizer.py | 49 +++++++-------- .../bayesianoptimization/src/model/gp.py | 22 ++++--- .../bayesianoptimization/src/model/rf.py | 19 +++++- .../bayesianoptimization/src/utils.py | 17 ++++++ pkg/suggestion/test_requirements.txt | 5 ++ .../global_optimizer => tests}/__init__.py | 0 pkg/suggestion/tests/conftest.py | 49 +++++++++++++++ pkg/suggestion/tests/parameter_values.yaml | 20 ++++++ pkg/suggestion/tests/study_config.yaml | 26 ++++++++ .../tests/test_acquisition_function.py | 23 +++++++ .../tests/test_algorithm_manager.py | 53 ++++++++++++++++ pkg/suggestion/tests/test_boa.py | 23 +++++++ pkg/suggestion/tests/test_global_optimizer.py | 36 +++++++++++ pkg/suggestion/tests/test_models.py | 23 +++++++ pkg/suggestion/tests/test_requirements.txt | 0 setup.py | 4 ++ test/scripts/python-tests.sh | 23 +++++++ test/scripts/run-tests.sh | 3 +- test/workflows/components/workflows.libsonnet | 10 +++ 25 files changed, 419 insertions(+), 125 deletions(-) rename pkg/suggestion/{bayesianoptimization/src/acquisition_func => }/__init__.py (100%) create mode 100644 pkg/suggestion/bayesianoptimization/src/acquisition_func.py delete mode 100644 pkg/suggestion/bayesianoptimization/src/acquisition_func/acquisition_func.py rename pkg/suggestion/bayesianoptimization/src/{global_optimizer => }/global_optimizer.py (90%) create mode 100644 pkg/suggestion/bayesianoptimization/src/utils.py create mode 100644 pkg/suggestion/test_requirements.txt rename pkg/suggestion/{bayesianoptimization/src/global_optimizer => tests}/__init__.py (100%) create mode 100644 pkg/suggestion/tests/conftest.py create mode 100644 pkg/suggestion/tests/parameter_values.yaml create mode 100644 pkg/suggestion/tests/study_config.yaml create mode 100644 pkg/suggestion/tests/test_acquisition_function.py create mode 100644 pkg/suggestion/tests/test_algorithm_manager.py create mode 100644 pkg/suggestion/tests/test_boa.py create mode 100644 pkg/suggestion/tests/test_global_optimizer.py create mode 100644 pkg/suggestion/tests/test_models.py create mode 100644 pkg/suggestion/tests/test_requirements.txt create mode 100644 setup.py create mode 100755 test/scripts/python-tests.sh diff --git a/.gitignore b/.gitignore index e26c1d4356b..9ae1bd8ae1a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ # python ignore files __pycache__/ .idea/ +.coverage +.pytest_cache +*.egg-info # Project specific ignore files *.swp diff --git a/pkg/suggestion/bayesianoptimization/src/acquisition_func/__init__.py b/pkg/suggestion/__init__.py similarity index 100% rename from pkg/suggestion/bayesianoptimization/src/acquisition_func/__init__.py rename to pkg/suggestion/__init__.py diff --git a/pkg/suggestion/bayesianoptimization/src/acquisition_func.py b/pkg/suggestion/bayesianoptimization/src/acquisition_func.py new file mode 100644 index 00000000000..9e061c6bd76 --- /dev/null +++ b/pkg/suggestion/bayesianoptimization/src/acquisition_func.py @@ -0,0 +1,36 @@ +""" module for acquisition function""" +import numpy as np +from scipy.stats import norm + + +class AcquisitionFunc: + """ + Class for acquisition function with options for expected improvement, + probability of improvement, or lower confident bound. + """ + + def __init__(self, model, current_optimal, mode="ei", trade_off=0.01): + """ + :param mode: pi: probability of improvement, ei: expected improvement, lcb: lower confident bound + :param trade_off: a parameter to control the trade off between exploiting and exploring + :param model_type: gp: gaussian process, rf: random forest + """ + self.model = model + self.current_optimal = current_optimal + self.mode = mode + self.trade_off = trade_off + + def compute(self, X_test): + y_mean, y_std, y_variance = self.model.predict(X_test) + + z = (y_mean - self.current_optimal - self.trade_off) / y_std + + if self.mode == "ei": + if y_std.any() < 0.000001: + return 0, y_mean, y_variance + result = y_std * (z * norm.cdf(z) + norm.pdf(z)) + elif self.mode == "pi": + result = norm.cdf(z) + else: + result = - (y_mean - self.trade_off * y_std) + return np.squeeze(result), np.squeeze(y_mean), np.squeeze(y_variance) diff --git a/pkg/suggestion/bayesianoptimization/src/acquisition_func/acquisition_func.py b/pkg/suggestion/bayesianoptimization/src/acquisition_func/acquisition_func.py deleted file mode 100644 index b980f2658f7..00000000000 --- a/pkg/suggestion/bayesianoptimization/src/acquisition_func/acquisition_func.py +++ /dev/null @@ -1,61 +0,0 @@ -""" module for acquisition function""" -import numpy as np -from scipy.stats import norm -import forestci as fci - -from pkg.suggestion.bayesianoptimization.src.model.gp import GaussianProcessModel -from pkg.suggestion.bayesianoptimization.src.model.rf import RandomForestModel - - -class AcquisitionFunc: - """ class for acquisition function - expected improvement in this case - """ - def __init__(self, X_train, y_train, current_optimal, mode, trade_off, length_scale, - noise, nu, kernel_type, n_estimators, max_features, model_type): - """ - :param mode: pi: probability of improvement, ei: expected improvement, lcb: lower confident bound - :param trade_off: a parameter to control the trade off between exploiting and exploring - :param model_type: gp: gaussian process, rf: random forest - """ - self.X_train = X_train - self.y_train = y_train - self.current_optimal = current_optimal - self.mode = mode or "ei" - self.trade_off = trade_off or 0.01 - self.model_type = model_type or "gp" - if self.model_type == "gp": - self.model = GaussianProcessModel( - length_scale=length_scale, - noise=noise, - nu=nu, - kernel_type=kernel_type, - ) - else: - self.model = RandomForestModel( - n_estimators=n_estimators, - max_features=max_features, - ) - - def compute(self, X_test): - if self.model_type == "gp": - self.model.gp.fit(self.X_train, self.y_train) - y_mean, y_std = self.model.gp.predict(X_test, return_std=True) - y_variance = y_std ** 2 - else: - self.model.rf.fit(self.y_train, self.y_train) - y_mean = self.model.rf.predict(X_test) - y_variance = fci.random_forest_error(self.model.rf, self.X_train, X_test) - y_std = np.sqrt(y_variance) - - z = (y_mean - self.current_optimal - self.trade_off) / y_std - - if self.mode == "ei": - if y_std < 0.000001: - return 0, y_mean, y_variance - result = y_std * (z * norm.cdf(z) + norm.pdf(z)) - elif self.mode == "pi": - result = norm.cdf(z) - else: - result = - (y_mean - self.trade_off * y_std) - return np.squeeze(result), np.squeeze(y_mean), np.squeeze(y_variance) diff --git a/pkg/suggestion/bayesianoptimization/src/algorithm_manager.py b/pkg/suggestion/bayesianoptimization/src/algorithm_manager.py index 0898acf4c6c..3ff5115b7b6 100644 --- a/pkg/suggestion/bayesianoptimization/src/algorithm_manager.py +++ b/pkg/suggestion/bayesianoptimization/src/algorithm_manager.py @@ -1,10 +1,10 @@ """ module for algorithm manager """ - import numpy as np from pkg.api.python import api_pb2 -import logging -from logging import getLogger, StreamHandler, INFO, DEBUG + +from .utils import get_logger + def deal_with_discrete(feasible_values, current_value): """ function to embed the current values to the feasible discrete space""" @@ -12,28 +12,20 @@ def deal_with_discrete(feasible_values, current_value): diff = np.absolute(diff) return feasible_values[np.argmin(diff)] + def deal_with_categorical(feasible_values, one_hot_values): """ function to do the one hot encoding of the categorical values """ - #index = np.argmax(one_hot_values) - index = one_hot_values.argmax() + index = np.argmax(one_hot_values) + #index = one_hot_values.argmax() return feasible_values[int(index)] + class AlgorithmManager: """ class for the algorithm manager provide some helper functions """ def __init__(self, study_id, study_config, X_train, y_train, logger=None): - if logger == None: - self.logger = getLogger(__name__) - FORMAT = '%(asctime)-15s StudyID %(studyid)s %(message)s' - logging.basicConfig(format=FORMAT) - handler = StreamHandler() - handler.setLevel(DEBUG) - self.logger.setLevel(DEBUG) - self.logger.addHandler(handler) - self.logger.propagate = False - else: - self.logger = logger + self.logger = logger if (logger is not None) else get_logger() self._study_id = study_id self._study_config = study_config self._goal = self._study_config.optimization_type @@ -82,7 +74,7 @@ def lower_bound(self): @property def upper_bound(self): - """ return the ipper bound of all the parameters """ + """ return the upper bound of all the parameters """ return self._upperbound @property @@ -118,10 +110,10 @@ def y_train(self): def _parse_config(self): """ extract info from the study configuration """ for i, param in enumerate(self._study_config.parameter_configs.configs): - self._name_id[param.name]=i + self._name_id[param.name] = i self._types.append(param.parameter_type) self._names.append(param.name) - if param.parameter_type == api_pb2.DOUBLE or param.parameter_type == api_pb2.INT: + if param.parameter_type in [api_pb2.DOUBLE, api_pb2.INT]: self._dim = self._dim + 1 self._lowerbound.append(float(param.feasible.min)) self._upperbound.append(float(param.feasible.max)) @@ -158,7 +150,7 @@ def _mapping_params(self, parameters_list): for p in parameters: self.logger.debug("mapping: %r", p, extra={"StudyID": self._study_id}) map_id = self._name_id[p.name] - if self._types[map_id] == api_pb2.DOUBLE or self._types[map_id] == api_pb2.INT or self._types[map_id] == api_pb2.DISCRETE: + if self._types[map_id] in [api_pb2.DOUBLE, api_pb2.INT, api_pb2.DISCRETE]: maplist[map_id] = float(p.value) elif self._types[map_id] == api_pb2.CATEGORICAL: for ci in self._categorical_info: @@ -166,7 +158,7 @@ def _mapping_params(self, parameters_list): maplist[map_id] = np.zeros(ci["number"]) for i, v in enumerate(ci["values"]): if v == p.value: - maplist[map_id][i]=1 + maplist[map_id][i] = 1 break self.logger.debug("mapped: %r", maplist, extra={"StudyID": self._study_id}) ret.append(np.hstack(maplist)) @@ -234,4 +226,3 @@ def convert_to_dict(self, x_next): }) result.append(tmp) return result - diff --git a/pkg/suggestion/bayesianoptimization/src/bayesian_optimization_algorithm.py b/pkg/suggestion/bayesianoptimization/src/bayesian_optimization_algorithm.py index 2709cc402e7..6b2e57c3f9d 100644 --- a/pkg/suggestion/bayesianoptimization/src/bayesian_optimization_algorithm.py +++ b/pkg/suggestion/bayesianoptimization/src/bayesian_optimization_algorithm.py @@ -2,7 +2,7 @@ import numpy as np from sklearn.preprocessing import MinMaxScaler -from pkg.suggestion.bayesianoptimization.src.global_optimizer.global_optimizer import GlobalOptimizer +from .global_optimizer import GlobalOptimizer class BOAlgorithm: @@ -54,7 +54,7 @@ def get_suggestion(self, request_num): x_next_list = [] if self.X_train is None and self.y_train is None and self.current_optimal is None: # randomly pick a point as the first trial - for i in range(request_num): + for _ in range(request_num): x_next_list.append(np.random.uniform(self.lowerbound, self.upperbound, size=(1, self.dim))) else: _, x_next_list_que = self.optimizer.direct(request_num) diff --git a/pkg/suggestion/bayesianoptimization/src/global_optimizer/global_optimizer.py b/pkg/suggestion/bayesianoptimization/src/global_optimizer.py similarity index 90% rename from pkg/suggestion/bayesianoptimization/src/global_optimizer/global_optimizer.py rename to pkg/suggestion/bayesianoptimization/src/global_optimizer.py index 3f4333e0ae7..7a307c86af9 100644 --- a/pkg/suggestion/bayesianoptimization/src/global_optimizer/global_optimizer.py +++ b/pkg/suggestion/bayesianoptimization/src/global_optimizer.py @@ -2,12 +2,13 @@ DIRECT algorithm is used in this case """ import copy + import numpy as np -from collections import deque -from pkg.suggestion.bayesianoptimization.src.acquisition_func.acquisition_func import AcquisitionFunc -import logging -from logging import getLogger, StreamHandler, INFO, DEBUG +from .acquisition_func import AcquisitionFunc +from .model.gp import GaussianProcessModel +from .model.rf import RandomForestModel +from .utils import get_logger class RectPack: @@ -74,37 +75,31 @@ class GlobalOptimizer: def __init__(self, N, l, u, scaler, X_train, y_train, current_optimal, mode, trade_off, length_scale, noise, nu, kernel_type, n_estimators, max_features, model_type, logger=None): - if logger == None: - self.logger = getLogger(__name__) - FORMAT = '%(asctime)-15s StudyID %(studyid)s %(message)s' - logging.basicConfig(format=FORMAT) - handler = StreamHandler() - handler.setLevel(INFO) - self.logger.setLevel(INFO) - self.logger.addHandler(handler) - self.logger.propagate = False - else: - self.logger = logger - + self.logger = logger if (logger is not None) else get_logger() self.N = N self.l = l self.u = u self.scaler = scaler self.buckets = [] self.dim = None + if model_type == "gp": + model = GaussianProcessModel( + length_scale=length_scale, + noise=noise, + nu=nu, + kernel_type=kernel_type, + ) + else: + model = RandomForestModel( + n_estimators=n_estimators, + max_features=max_features, + ) + model.fit(X_train, y_train) self.aq_func = AcquisitionFunc( - X_train=X_train, - y_train=y_train, + model=model, current_optimal=current_optimal, mode=mode, trade_off=trade_off, - length_scale=length_scale, - noise=noise, - nu=nu, - kernel_type=kernel_type, - n_estimators=n_estimators, - max_features=max_features, - model_type=model_type, ) def potential_opt(self, f_min): @@ -174,7 +169,7 @@ def direct(self, request_num): x_next = first_rect.center ei_min.append(f_min) - for t in range(self.N): + for _ in range(self.N): opt_set = self.potential_opt(f_min) # for bucket in self.buckets: @@ -215,7 +210,7 @@ def sample_buckets(self, request_num): fc_sum -= a.fc bucket_index.append([-a.fc, a.center]) bucket_index = sorted(bucket_index, key=lambda x: x[0]) - for i in range(request_num): + for _ in range(request_num): sample = np.random.rand() stick = 0.0 for b in bucket_index: diff --git a/pkg/suggestion/bayesianoptimization/src/model/gp.py b/pkg/suggestion/bayesianoptimization/src/model/gp.py index 9f8a750a5ab..446238c0669 100644 --- a/pkg/suggestion/bayesianoptimization/src/model/gp.py +++ b/pkg/suggestion/bayesianoptimization/src/model/gp.py @@ -5,7 +5,8 @@ class GaussianProcessModel: """ use the gaussian process as a prior """ - def __init__(self, length_scale, noise, nu, kernel_type): + def __init__(self, length_scale=0.5, noise=0.00005, + nu=1.5, kernel_type="matern"): """ :param length_scale: the larger the length_scale is, the smoother the gaussian prior is. If a float, an isotropic kernel is used. If an array, an anisotropic kernel is used where each dimension of it defines @@ -15,20 +16,23 @@ def __init__(self, length_scale, noise, nu, kernel_type): approximate function is. :param kernel_type: "rbf": squared exponential kernel, "matern": Matern kernel. """ - - length_scale = length_scale or 0.5 - noise = noise or 0.00005 - nu = nu or 1.5 - kernel_type = kernel_type or "matern" - if kernel_type == "rbf": kernel = RBF(length_scale=length_scale) - else: + elif kernel_type == "matern": kernel = Matern(length_scale=length_scale, nu=nu) - + else: + raise Exception("kernel_type must be 'rbf' or 'matern'") self.gp = GaussianProcessRegressor( kernel=kernel, alpha=noise, random_state=0, optimizer=None, ) + + def fit(self, X_train, y_train): + self.gp.fit(X_train, y_train) + + def predict(self, X_test): + y_mean, y_std = self.gp.predict(X_test, return_std=True) + y_variance = y_std ** 2 + return y_mean, y_std, y_variance diff --git a/pkg/suggestion/bayesianoptimization/src/model/rf.py b/pkg/suggestion/bayesianoptimization/src/model/rf.py index 59818902c66..8778b921e78 100644 --- a/pkg/suggestion/bayesianoptimization/src/model/rf.py +++ b/pkg/suggestion/bayesianoptimization/src/model/rf.py @@ -1,11 +1,24 @@ +import numpy as np +import forestci as fci from sklearn.ensemble import RandomForestRegressor class RandomForestModel: - def __init__(self, n_estimators, max_features): - n_estimators = n_estimators or 50 - max_features = max_features or "auto" + + def __init__(self, n_estimators=50, max_features="auto"): self.rf = RandomForestRegressor( n_estimators=n_estimators, max_features=max_features, ) + self.X_train = None + + def fit(self, X_train, y_train): + print(X_train.shape, y_train.shape) + self.X_train = X_train + self.rf.fit(X_train, y_train) + + def predict(self, X_test): + y_mean = self.rf.predict(X_test) + y_variance = fci.random_forest_error(self.rf, self.X_train, X_test) + y_std = np.sqrt(y_variance) + return y_mean, y_std, y_variance diff --git a/pkg/suggestion/bayesianoptimization/src/utils.py b/pkg/suggestion/bayesianoptimization/src/utils.py new file mode 100644 index 00000000000..7fafc3af390 --- /dev/null +++ b/pkg/suggestion/bayesianoptimization/src/utils.py @@ -0,0 +1,17 @@ +import os +import logging +from logging import getLogger, StreamHandler + + +FORMAT = '%(asctime)-15s StudyID %(studyid)s %(message)s' +LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO") + + +def get_logger(name=__name__): + logger = getLogger(name) + logging.basicConfig(format=FORMAT) + handler = StreamHandler() + logger.setLevel(LOG_LEVEL) + logger.addHandler(handler) + logger.propagate = False + return logger diff --git a/pkg/suggestion/test_requirements.txt b/pkg/suggestion/test_requirements.txt new file mode 100644 index 00000000000..69022052405 --- /dev/null +++ b/pkg/suggestion/test_requirements.txt @@ -0,0 +1,5 @@ +pylint +pytest +pytest-cov +pyyaml +python-box diff --git a/pkg/suggestion/bayesianoptimization/src/global_optimizer/__init__.py b/pkg/suggestion/tests/__init__.py similarity index 100% rename from pkg/suggestion/bayesianoptimization/src/global_optimizer/__init__.py rename to pkg/suggestion/tests/__init__.py diff --git a/pkg/suggestion/tests/conftest.py b/pkg/suggestion/tests/conftest.py new file mode 100644 index 00000000000..2f957f9b572 --- /dev/null +++ b/pkg/suggestion/tests/conftest.py @@ -0,0 +1,49 @@ +# pylint: disable=missing-docstring +import pytest +import numpy as np + + +def booth_function(X): + f = (X[:, 0] + 2 * X[:, 1] - 7) ** 2 + (2 * X[:, 0] + X[:, 1] - 5) ** 2 + return f + + +@pytest.fixture +def lower_bounds(): + return [-5, -5] + + +@pytest.fixture +def upper_bounds(): + return [5, 5] + + +@pytest.fixture +def dim(): + return 2 + + +@pytest.fixture +def request_num(): + return 5 + + +@pytest.fixture +def X_train(lower_bounds, upper_bounds): + x_range = np.arange(lower_bounds[0], upper_bounds[0] + 1) + y_range = np.arange(lower_bounds[1], upper_bounds[1] + 1) + X_train = np.array([(x, y) for x in x_range for y in y_range]) + return X_train + + +@pytest.fixture +def X_test(): + x_range, y_range = np.arange(-1.5, 2.5), np.arange(-1.5, 2.5) + X_test = np.array([(x, y) for x in x_range for y in y_range]) + return X_test + + +@pytest.fixture +def y_train(X_train): + y_train = -booth_function(X_train) + return y_train diff --git a/pkg/suggestion/tests/parameter_values.yaml b/pkg/suggestion/tests/parameter_values.yaml new file mode 100644 index 00000000000..d9cef801e80 --- /dev/null +++ b/pkg/suggestion/tests/parameter_values.yaml @@ -0,0 +1,20 @@ +parameters: + - - name: "x" + value: 1.0 + - name: "y" + value: 1 + - name: "fake_discrete" + value: 2 + - name: "fake_categorical" + value: "true" + - - name: "x" + value: 1.0 + - name: "y" + value: 1 + - name: "fake_discrete" + value: 3 + - name: "fake_categorical" + value: "false" +metrics: + - 1.0 + - 1.0 diff --git a/pkg/suggestion/tests/study_config.yaml b/pkg/suggestion/tests/study_config.yaml new file mode 100644 index 00000000000..6644951d50b --- /dev/null +++ b/pkg/suggestion/tests/study_config.yaml @@ -0,0 +1,26 @@ +optimization_type: 2 # Code for Maximize +parameter_configs: + configs: + - parameter_type: 1 # Code for DOUBLE + name: "x" + feasible: + min: -5.0 + max: 5.0 + - parameter_type: 2 # Code for INT + name: "y" + feasible: + min: -5 + max: 5 + - parameter_type: 3 # Code for DISCRETE + name: "fake_discrete" + feasible: + list: + - 2 + - 3 + - 5 + - parameter_type: 4 # Code for CATEGORICAL + name: "fake_categorical" + feasible: + list: + - "true" + - "false" diff --git a/pkg/suggestion/tests/test_acquisition_function.py b/pkg/suggestion/tests/test_acquisition_function.py new file mode 100644 index 00000000000..e6a7d58732b --- /dev/null +++ b/pkg/suggestion/tests/test_acquisition_function.py @@ -0,0 +1,23 @@ +import pytest + +from ..bayesianoptimization.src.acquisition_func import AcquisitionFunc +from ..bayesianoptimization.src.model.gp import GaussianProcessModel + + +@pytest.fixture +def model(X_train, y_train): + model = GaussianProcessModel() + model.fit(X_train, y_train) + return model + + +@pytest.mark.parametrize("aq_mode", ["ei", "pi", "lcb"]) +def test_ei(aq_mode, model, X_test): + aq = AcquisitionFunc(model, + current_optimal=1.0, + mode=aq_mode, + trade_off=0.01) + results, y_mean, y_variance = aq.compute(X_test) + assert results.shape == (16,) + assert y_mean.shape == (16,) + assert y_variance.shape == (16,) diff --git a/pkg/suggestion/tests/test_algorithm_manager.py b/pkg/suggestion/tests/test_algorithm_manager.py new file mode 100644 index 00000000000..b5ba3bc190c --- /dev/null +++ b/pkg/suggestion/tests/test_algorithm_manager.py @@ -0,0 +1,53 @@ +import os + +import yaml +import pytest +import numpy as np +from box import Box + +from pkg.api.python import api_pb2 +from ..bayesianoptimization.src.algorithm_manager import AlgorithmManager + + +TEST_DIR = os.path.dirname(os.path.realpath(__file__)) + + +@pytest.fixture +def study_config(): + with open(os.path.join(TEST_DIR, "study_config.yaml"), "r") as f: + contents = yaml.safe_load(f) + return Box(contents) + + +@pytest.fixture +def observations(): + with open(os.path.join(TEST_DIR, "parameter_values.yaml"), "r") as f: + contents = yaml.safe_load(f) + return Box(contents) + + +def test_algorithm_manager(study_config, observations): + study_id = "test_id" + x_next = [1.0, 1, 5, "true"] + manager = AlgorithmManager(study_id, study_config, + observations.parameters, observations.metrics) + assert manager.study_id == study_id + assert manager.study_config == study_config + assert manager.goal == api_pb2.MAXIMIZE + assert manager.types == [api_pb2.DOUBLE, api_pb2.INT, api_pb2.DISCRETE, api_pb2.CATEGORICAL] + assert manager.names == ["x", "y", "fake_discrete", "fake_categorical"] + assert manager.dim == 5 + assert manager.lower_bound == [-5.0, -5, 2, 0, 0] + assert manager.upper_bound == [5.0, 5, 5, 1, 1] + assert manager.discrete_info == [{"name": "fake_discrete", "values": [2, 3, 5]}] + assert manager.categorical_info == \ + [{"name": "fake_categorical", "values": ["true", "false"], "number": 2}] + assert np.allclose(manager.X_train, np.array([[1.0, 1, 2, 1, 0], [1.0, 1, 3, 0, 1]])) + assert np.allclose(manager.y_train, np.array([1.0, 1.0])) + parsed_x_next = manager.parse_x_next(x_next) + x_next_dict = manager.convert_to_dict(parsed_x_next) + assert x_next_dict == \ + [{"name": "x", "value": 1.0, "type": api_pb2.DOUBLE}, + {"name": "y", "value": 1, "type": api_pb2.INT}, + {"name": "fake_discrete", "value": 5, "type": api_pb2.DISCRETE}, + {"name": "fake_categorical", "value": "true", "type": api_pb2.CATEGORICAL}] diff --git a/pkg/suggestion/tests/test_boa.py b/pkg/suggestion/tests/test_boa.py new file mode 100644 index 00000000000..dcb19f2ee30 --- /dev/null +++ b/pkg/suggestion/tests/test_boa.py @@ -0,0 +1,23 @@ +import numpy as np + +from ..bayesianoptimization.src.bayesian_optimization_algorithm import BOAlgorithm + + +def test_boa(dim, request_num, lower_bounds, upper_bounds, X_train, y_train): + boa = BOAlgorithm(dim=dim, + N=200, + lowerbound=np.array(lower_bounds, dtype=np.float64), + upperbound=np.array(upper_bounds, dtype=np.float64), + X_train=X_train, + y_train=y_train, + mode="ei", + trade_off=0.01, + length_scale=0.5, + noise=0.00005, + nu=1.5, + kernel_type="matern", + n_estimators=None, + max_features=None, + model_type="gp") + response = boa.get_suggestion(request_num) + assert len(response) == request_num diff --git a/pkg/suggestion/tests/test_global_optimizer.py b/pkg/suggestion/tests/test_global_optimizer.py new file mode 100644 index 00000000000..fb5aee010c0 --- /dev/null +++ b/pkg/suggestion/tests/test_global_optimizer.py @@ -0,0 +1,36 @@ +import pytest +import numpy as np +from sklearn.preprocessing import MinMaxScaler + +from ..bayesianoptimization.src.global_optimizer import GlobalOptimizer + + +@pytest.fixture +def scaler(lower_bounds, upper_bounds, dim): + scaler = MinMaxScaler() + lower_bounds = np.array(lower_bounds, dtype=np.float64).reshape(1, dim) + upper_bounds = np.array(upper_bounds, dtype=np.float64).reshape(1, dim) + scaler.fit(np.append(lower_bounds, upper_bounds, axis=0)) + return scaler + + +def test_global_optimizer(dim, request_num, scaler, X_train, y_train): + optimizer = GlobalOptimizer(N=200, + l=np.zeros((1, dim)), + u=np.ones((1, dim)), + scaler=scaler, + X_train=X_train, + y_train=y_train, + current_optimal=1.0, + mode="ei", + trade_off=0.01, + length_scale=0.5, + noise=0.00005, + nu=1.5, + kernel_type="matern", + n_estimators=None, + max_features=None, + model_type="gp") + f_min, x_next_candidate = optimizer.direct(request_num) + assert isinstance(f_min, float) + assert np.array(x_next_candidate).shape == (request_num, 1, dim) diff --git a/pkg/suggestion/tests/test_models.py b/pkg/suggestion/tests/test_models.py new file mode 100644 index 00000000000..b76242f800b --- /dev/null +++ b/pkg/suggestion/tests/test_models.py @@ -0,0 +1,23 @@ +import pytest + +from ..bayesianoptimization.src.model.rf import RandomForestModel +from ..bayesianoptimization.src.model.gp import GaussianProcessModel + + +MODELS = [RandomForestModel(n_estimators=5), + GaussianProcessModel(), + GaussianProcessModel(kernel_type="rbf")] + + +@pytest.mark.parametrize("model", MODELS) +def test_fit_predict(model, X_train, y_train, X_test): + model.fit(X_train, y_train) + y_mean, y_std, y_variance = model.predict(X_test) + assert y_mean.shape == (16,) + assert y_std.shape == (16,) + assert y_variance.shape == (16,) + + +def test_gp_kernel_type_exception(): + with pytest.raises(Exception): + _ = GaussianProcessModel(kernel_type="different_kernel") diff --git a/pkg/suggestion/tests/test_requirements.txt b/pkg/suggestion/tests/test_requirements.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/setup.py b/setup.py new file mode 100644 index 00000000000..074045aa294 --- /dev/null +++ b/setup.py @@ -0,0 +1,4 @@ +from setuptools import setup + +setup(name="pkg", + packages=["pkg"]) diff --git a/test/scripts/python-tests.sh b/test/scripts/python-tests.sh new file mode 100755 index 00000000000..00111b07a30 --- /dev/null +++ b/test/scripts/python-tests.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# Copyright 2018 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This shell script is used to run the python tests in the argo workflow + +pip install -r cmd/suggestion/bayesianoptimization/requirements.txt +pip install -r pkg/suggestion/test_requirements.txt +python setup.py develop +pylint pkg/suggestion/bayesianoptimization/src --disable=fixme --exit-zero --reports=y +pytest pkg/suggestion/tests --verbose --cov=pkg/suggestion/bayesianoptimization/src --cov-report term-missing diff --git a/test/scripts/run-tests.sh b/test/scripts/run-tests.sh index ee157186f05..22f4b90c8d8 100755 --- a/test/scripts/run-tests.sh +++ b/test/scripts/run-tests.sh @@ -111,7 +111,8 @@ TIMEOUT=120 until curl localhost:6789 || [ $TIMEOUT -eq 0 ]; do sleep 5 TIMEOUT=$(( TIMEOUT - 1 )) -done +done + cp -r test ${GO_DIR}/test cd ${GO_DIR}/test/e2e kubectl apply -f valid-studyjob.yaml diff --git a/test/workflows/components/workflows.libsonnet b/test/workflows/components/workflows.libsonnet index 2c8956b6ede..e5593228218 100644 --- a/test/workflows/components/workflows.libsonnet +++ b/test/workflows/components/workflows.libsonnet @@ -58,6 +58,7 @@ local testWorkerImage = "gcr.io/kubeflow-ci/test-worker"; local golangImage = "golang:1.9.4-stretch"; // TODO(jose5918) Build our own helm image + local pythonImage = "python:3.6-jessie"; local helmImage = "volumecontroller/golang:1.9.2"; // The name of the NFS volume claim to use for test files. // local nfsVolumeClaim = "kubeflow-testing"; @@ -271,6 +272,12 @@ template: "unit-test", }, ], + [ + { + name: "python-tests", + template: "python-tests", + }, + ], [ { name: "run-tests", @@ -316,6 +323,9 @@ $.parts(namespace, name, overrides).e2e(prow_env, bucket).buildTemplate("setup-cluster",testWorkerImage, [ "test/scripts/create-cluster.sh", ]), // setup cluster + $.parts(namespace, name, overrides).e2e(prow_env, bucket).buildTemplate("python-tests", pythonImage, [ + "test/scripts/python-tests.sh", + ]), // run python tests $.parts(namespace, name, overrides).e2e(prow_env, bucket).buildTemplate("run-tests", helmImage, [ "test/scripts/run-tests.sh", ]), // run tests