From c87d5838b4142fc458f030c1d58be52a554354ac Mon Sep 17 00:00:00 2001
From: jdplatt <john.daniel.platt@gmail.com>
Date: Mon, 11 Mar 2019 15:54:38 -0400
Subject: [PATCH] Test for Bayesian Optimization Algo (#406)

* added tests for acquisition function and models

* added tests for global_optimizer

* added tests for boa

* minor linting

* tests for algorithm manager

* added discrete parameter to study config

* covered all parameter types

* moved python script to testing folder

* added python tests to unit tests

* remembered to uncomment existing tests

* fixed path to test script

* moved python tests to separate job in workflow

* added run command to test script
---
 .gitignore                                    |  3 +
 .../src/acquisition_func => }/__init__.py     |  0
 .../src/acquisition_func.py                   | 36 +++++++++++
 .../src/acquisition_func/acquisition_func.py  | 61 -------------------
 .../src/algorithm_manager.py                  | 35 ++++-------
 .../src/bayesian_optimization_algorithm.py    |  4 +-
 .../global_optimizer.py                       | 49 +++++++--------
 .../bayesianoptimization/src/model/gp.py      | 22 ++++---
 .../bayesianoptimization/src/model/rf.py      | 19 +++++-
 .../bayesianoptimization/src/utils.py         | 17 ++++++
 pkg/suggestion/test_requirements.txt          |  5 ++
 .../global_optimizer => tests}/__init__.py    |  0
 pkg/suggestion/tests/conftest.py              | 49 +++++++++++++++
 pkg/suggestion/tests/parameter_values.yaml    | 20 ++++++
 pkg/suggestion/tests/study_config.yaml        | 26 ++++++++
 .../tests/test_acquisition_function.py        | 23 +++++++
 .../tests/test_algorithm_manager.py           | 53 ++++++++++++++++
 pkg/suggestion/tests/test_boa.py              | 23 +++++++
 pkg/suggestion/tests/test_global_optimizer.py | 36 +++++++++++
 pkg/suggestion/tests/test_models.py           | 23 +++++++
 pkg/suggestion/tests/test_requirements.txt    |  0
 setup.py                                      |  4 ++
 test/scripts/python-tests.sh                  | 23 +++++++
 test/scripts/run-tests.sh                     |  3 +-
 test/workflows/components/workflows.libsonnet | 10 +++
 25 files changed, 419 insertions(+), 125 deletions(-)
 rename pkg/suggestion/{bayesianoptimization/src/acquisition_func => }/__init__.py (100%)
 create mode 100644 pkg/suggestion/bayesianoptimization/src/acquisition_func.py
 delete mode 100644 pkg/suggestion/bayesianoptimization/src/acquisition_func/acquisition_func.py
 rename pkg/suggestion/bayesianoptimization/src/{global_optimizer => }/global_optimizer.py (90%)
 create mode 100644 pkg/suggestion/bayesianoptimization/src/utils.py
 create mode 100644 pkg/suggestion/test_requirements.txt
 rename pkg/suggestion/{bayesianoptimization/src/global_optimizer => tests}/__init__.py (100%)
 create mode 100644 pkg/suggestion/tests/conftest.py
 create mode 100644 pkg/suggestion/tests/parameter_values.yaml
 create mode 100644 pkg/suggestion/tests/study_config.yaml
 create mode 100644 pkg/suggestion/tests/test_acquisition_function.py
 create mode 100644 pkg/suggestion/tests/test_algorithm_manager.py
 create mode 100644 pkg/suggestion/tests/test_boa.py
 create mode 100644 pkg/suggestion/tests/test_global_optimizer.py
 create mode 100644 pkg/suggestion/tests/test_models.py
 create mode 100644 pkg/suggestion/tests/test_requirements.txt
 create mode 100644 setup.py
 create mode 100755 test/scripts/python-tests.sh

diff --git a/.gitignore b/.gitignore
index e26c1d4356b..9ae1bd8ae1a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,9 @@
 # python ignore files
 __pycache__/
 .idea/
+.coverage
+.pytest_cache
+*.egg-info
 
 # Project specific ignore files
 *.swp
diff --git a/pkg/suggestion/bayesianoptimization/src/acquisition_func/__init__.py b/pkg/suggestion/__init__.py
similarity index 100%
rename from pkg/suggestion/bayesianoptimization/src/acquisition_func/__init__.py
rename to pkg/suggestion/__init__.py
diff --git a/pkg/suggestion/bayesianoptimization/src/acquisition_func.py b/pkg/suggestion/bayesianoptimization/src/acquisition_func.py
new file mode 100644
index 00000000000..9e061c6bd76
--- /dev/null
+++ b/pkg/suggestion/bayesianoptimization/src/acquisition_func.py
@@ -0,0 +1,36 @@
+""" module for acquisition function"""
+import numpy as np
+from scipy.stats import norm
+
+
+class AcquisitionFunc:
+    """
+    Class for acquisition function with options for expected improvement,
+    probability of improvement, or lower confident bound.
+    """
+
+    def __init__(self, model, current_optimal, mode="ei", trade_off=0.01):
+        """
+        :param mode: pi: probability of improvement, ei: expected improvement, lcb: lower confident bound
+        :param trade_off: a parameter to control the trade off between exploiting and exploring
+        :param model_type: gp: gaussian process, rf: random forest
+        """
+        self.model = model
+        self.current_optimal = current_optimal
+        self.mode = mode
+        self.trade_off = trade_off
+
+    def compute(self, X_test):
+        y_mean, y_std, y_variance = self.model.predict(X_test)
+
+        z = (y_mean - self.current_optimal - self.trade_off) / y_std
+
+        if self.mode == "ei":
+            if y_std.any() < 0.000001:
+                return 0, y_mean, y_variance
+            result = y_std * (z * norm.cdf(z) + norm.pdf(z))
+        elif self.mode == "pi":
+            result = norm.cdf(z)
+        else:
+            result = - (y_mean - self.trade_off * y_std)
+        return np.squeeze(result), np.squeeze(y_mean), np.squeeze(y_variance)
diff --git a/pkg/suggestion/bayesianoptimization/src/acquisition_func/acquisition_func.py b/pkg/suggestion/bayesianoptimization/src/acquisition_func/acquisition_func.py
deleted file mode 100644
index b980f2658f7..00000000000
--- a/pkg/suggestion/bayesianoptimization/src/acquisition_func/acquisition_func.py
+++ /dev/null
@@ -1,61 +0,0 @@
-""" module for acquisition function"""
-import numpy as np
-from scipy.stats import norm
-import forestci as fci
-
-from pkg.suggestion.bayesianoptimization.src.model.gp import GaussianProcessModel
-from pkg.suggestion.bayesianoptimization.src.model.rf import RandomForestModel
-
-
-class AcquisitionFunc:
-    """ class for acquisition function
-    expected improvement in this case
-    """
-    def __init__(self, X_train, y_train, current_optimal, mode, trade_off, length_scale,
-                 noise, nu, kernel_type, n_estimators, max_features, model_type):
-        """
-        :param mode: pi: probability of improvement, ei: expected improvement, lcb: lower confident bound
-        :param trade_off: a parameter to control the trade off between exploiting and exploring
-        :param model_type: gp: gaussian process, rf: random forest
-        """
-        self.X_train = X_train
-        self.y_train = y_train
-        self.current_optimal = current_optimal
-        self.mode = mode or "ei"
-        self.trade_off = trade_off or 0.01
-        self.model_type = model_type or "gp"
-        if self.model_type == "gp":
-            self.model = GaussianProcessModel(
-                length_scale=length_scale,
-                noise=noise,
-                nu=nu,
-                kernel_type=kernel_type,
-            )
-        else:
-            self.model = RandomForestModel(
-                n_estimators=n_estimators,
-                max_features=max_features,
-            )
-
-    def compute(self, X_test):
-        if self.model_type == "gp":
-            self.model.gp.fit(self.X_train, self.y_train)
-            y_mean, y_std = self.model.gp.predict(X_test, return_std=True)
-            y_variance = y_std ** 2
-        else:
-            self.model.rf.fit(self.y_train, self.y_train)
-            y_mean = self.model.rf.predict(X_test)
-            y_variance = fci.random_forest_error(self.model.rf, self.X_train, X_test)
-            y_std = np.sqrt(y_variance)
-
-        z = (y_mean - self.current_optimal - self.trade_off) / y_std
-
-        if self.mode == "ei":
-            if y_std < 0.000001:
-                return 0, y_mean, y_variance
-            result = y_std * (z * norm.cdf(z) + norm.pdf(z))
-        elif self.mode == "pi":
-            result = norm.cdf(z)
-        else:
-            result = - (y_mean - self.trade_off * y_std)
-        return np.squeeze(result), np.squeeze(y_mean), np.squeeze(y_variance)
diff --git a/pkg/suggestion/bayesianoptimization/src/algorithm_manager.py b/pkg/suggestion/bayesianoptimization/src/algorithm_manager.py
index 0898acf4c6c..3ff5115b7b6 100644
--- a/pkg/suggestion/bayesianoptimization/src/algorithm_manager.py
+++ b/pkg/suggestion/bayesianoptimization/src/algorithm_manager.py
@@ -1,10 +1,10 @@
 """ module for algorithm manager """
-
 import numpy as np
 
 from pkg.api.python import api_pb2
-import logging
-from logging import getLogger, StreamHandler, INFO, DEBUG
+
+from .utils import get_logger
+
 
 def deal_with_discrete(feasible_values, current_value):
     """ function to embed the current values to the feasible discrete space"""
@@ -12,28 +12,20 @@ def deal_with_discrete(feasible_values, current_value):
     diff = np.absolute(diff)
     return feasible_values[np.argmin(diff)]
 
+
 def deal_with_categorical(feasible_values, one_hot_values):
     """ function to do the one hot encoding of the categorical values """
-    #index = np.argmax(one_hot_values)
-    index = one_hot_values.argmax()
+    index = np.argmax(one_hot_values)
+    #index = one_hot_values.argmax()
     return feasible_values[int(index)]
 
+
 class AlgorithmManager:
     """ class for the algorithm manager
     provide some helper functions
     """
     def __init__(self, study_id, study_config, X_train, y_train, logger=None):
-        if logger == None:
-            self.logger = getLogger(__name__)
-            FORMAT = '%(asctime)-15s StudyID %(studyid)s %(message)s'
-            logging.basicConfig(format=FORMAT)
-            handler = StreamHandler()
-            handler.setLevel(DEBUG)
-            self.logger.setLevel(DEBUG)
-            self.logger.addHandler(handler)
-            self.logger.propagate = False
-        else:
-            self.logger = logger
+        self.logger = logger if (logger is not None) else get_logger()
         self._study_id = study_id
         self._study_config = study_config
         self._goal = self._study_config.optimization_type
@@ -82,7 +74,7 @@ def lower_bound(self):
 
     @property
     def upper_bound(self):
-        """ return the ipper bound of all the parameters """
+        """ return the upper bound of all the parameters """
         return self._upperbound
 
     @property
@@ -118,10 +110,10 @@ def y_train(self):
     def _parse_config(self):
         """ extract info from the study configuration """
         for i, param in enumerate(self._study_config.parameter_configs.configs):
-            self._name_id[param.name]=i
+            self._name_id[param.name] = i
             self._types.append(param.parameter_type)
             self._names.append(param.name)
-            if param.parameter_type == api_pb2.DOUBLE or param.parameter_type == api_pb2.INT:
+            if param.parameter_type in [api_pb2.DOUBLE, api_pb2.INT]:
                 self._dim = self._dim + 1
                 self._lowerbound.append(float(param.feasible.min))
                 self._upperbound.append(float(param.feasible.max))
@@ -158,7 +150,7 @@ def _mapping_params(self, parameters_list):
             for p in parameters:
                 self.logger.debug("mapping: %r", p, extra={"StudyID": self._study_id})
                 map_id = self._name_id[p.name]
-                if self._types[map_id] == api_pb2.DOUBLE or self._types[map_id] == api_pb2.INT or self._types[map_id] == api_pb2.DISCRETE:
+                if self._types[map_id] in [api_pb2.DOUBLE, api_pb2.INT, api_pb2.DISCRETE]:
                     maplist[map_id] = float(p.value)
                 elif self._types[map_id] == api_pb2.CATEGORICAL:
                     for ci in self._categorical_info:
@@ -166,7 +158,7 @@ def _mapping_params(self, parameters_list):
                             maplist[map_id] = np.zeros(ci["number"])
                             for i, v in enumerate(ci["values"]):
                                 if v == p.value:
-                                    maplist[map_id][i]=1
+                                    maplist[map_id][i] = 1
                                     break
             self.logger.debug("mapped: %r", maplist, extra={"StudyID": self._study_id})
             ret.append(np.hstack(maplist))
@@ -234,4 +226,3 @@ def convert_to_dict(self, x_next):
             })
             result.append(tmp)
         return result
-
diff --git a/pkg/suggestion/bayesianoptimization/src/bayesian_optimization_algorithm.py b/pkg/suggestion/bayesianoptimization/src/bayesian_optimization_algorithm.py
index 2709cc402e7..6b2e57c3f9d 100644
--- a/pkg/suggestion/bayesianoptimization/src/bayesian_optimization_algorithm.py
+++ b/pkg/suggestion/bayesianoptimization/src/bayesian_optimization_algorithm.py
@@ -2,7 +2,7 @@
 import numpy as np
 from sklearn.preprocessing import MinMaxScaler
 
-from pkg.suggestion.bayesianoptimization.src.global_optimizer.global_optimizer import GlobalOptimizer
+from .global_optimizer import GlobalOptimizer
 
 
 class BOAlgorithm:
@@ -54,7 +54,7 @@ def get_suggestion(self, request_num):
         x_next_list = []
         if self.X_train is None and self.y_train is None and self.current_optimal is None:
             # randomly pick a point as the first trial
-            for i in range(request_num):
+            for _ in range(request_num):
                 x_next_list.append(np.random.uniform(self.lowerbound, self.upperbound, size=(1, self.dim)))
         else:
             _, x_next_list_que = self.optimizer.direct(request_num)
diff --git a/pkg/suggestion/bayesianoptimization/src/global_optimizer/global_optimizer.py b/pkg/suggestion/bayesianoptimization/src/global_optimizer.py
similarity index 90%
rename from pkg/suggestion/bayesianoptimization/src/global_optimizer/global_optimizer.py
rename to pkg/suggestion/bayesianoptimization/src/global_optimizer.py
index 3f4333e0ae7..7a307c86af9 100644
--- a/pkg/suggestion/bayesianoptimization/src/global_optimizer/global_optimizer.py
+++ b/pkg/suggestion/bayesianoptimization/src/global_optimizer.py
@@ -2,12 +2,13 @@
 DIRECT algorithm is used in this case
 """
 import copy
+
 import numpy as np
-from collections import deque
 
-from pkg.suggestion.bayesianoptimization.src.acquisition_func.acquisition_func import AcquisitionFunc
-import logging
-from logging import getLogger, StreamHandler, INFO, DEBUG
+from .acquisition_func import AcquisitionFunc
+from .model.gp import GaussianProcessModel
+from .model.rf import RandomForestModel
+from .utils import get_logger
 
 
 class RectPack:
@@ -74,37 +75,31 @@ class GlobalOptimizer:
 
     def __init__(self, N, l, u, scaler, X_train, y_train, current_optimal, mode, trade_off, length_scale,
                  noise, nu, kernel_type, n_estimators, max_features, model_type, logger=None):
-        if logger == None:
-            self.logger = getLogger(__name__)
-            FORMAT = '%(asctime)-15s StudyID %(studyid)s %(message)s'
-            logging.basicConfig(format=FORMAT)
-            handler = StreamHandler()
-            handler.setLevel(INFO)
-            self.logger.setLevel(INFO)
-            self.logger.addHandler(handler)
-            self.logger.propagate = False
-        else:
-            self.logger = logger
-
+        self.logger = logger if (logger is not None) else get_logger()
         self.N = N
         self.l = l
         self.u = u
         self.scaler = scaler
         self.buckets = []
         self.dim = None
+        if model_type == "gp":
+            model = GaussianProcessModel(
+                length_scale=length_scale,
+                noise=noise,
+                nu=nu,
+                kernel_type=kernel_type,
+            )
+        else:
+            model = RandomForestModel(
+                n_estimators=n_estimators,
+                max_features=max_features,
+            )
+        model.fit(X_train, y_train)
         self.aq_func = AcquisitionFunc(
-            X_train=X_train,
-            y_train=y_train,
+            model=model,
             current_optimal=current_optimal,
             mode=mode,
             trade_off=trade_off,
-            length_scale=length_scale,
-            noise=noise,
-            nu=nu,
-            kernel_type=kernel_type,
-            n_estimators=n_estimators,
-            max_features=max_features,
-            model_type=model_type,
         )
 
     def potential_opt(self, f_min):
@@ -174,7 +169,7 @@ def direct(self, request_num):
         x_next = first_rect.center
         ei_min.append(f_min)
 
-        for t in range(self.N):
+        for _ in range(self.N):
             opt_set = self.potential_opt(f_min)
 
             # for bucket in self.buckets:
@@ -215,7 +210,7 @@ def sample_buckets(self, request_num):
                 fc_sum -= a.fc
                 bucket_index.append([-a.fc, a.center])
         bucket_index = sorted(bucket_index, key=lambda x: x[0])
-        for i in range(request_num):
+        for _ in range(request_num):
             sample = np.random.rand()
             stick = 0.0
             for b in bucket_index:
diff --git a/pkg/suggestion/bayesianoptimization/src/model/gp.py b/pkg/suggestion/bayesianoptimization/src/model/gp.py
index 9f8a750a5ab..446238c0669 100644
--- a/pkg/suggestion/bayesianoptimization/src/model/gp.py
+++ b/pkg/suggestion/bayesianoptimization/src/model/gp.py
@@ -5,7 +5,8 @@
 
 class GaussianProcessModel:
     """ use the gaussian process as a prior """
-    def __init__(self, length_scale, noise, nu, kernel_type):
+    def __init__(self, length_scale=0.5, noise=0.00005,
+                 nu=1.5, kernel_type="matern"):
         """
         :param length_scale: the larger the length_scale is, the smoother the gaussian prior is. If a float,
         an isotropic kernel is used. If an array, an anisotropic kernel is used where each dimension of it defines
@@ -15,20 +16,23 @@ def __init__(self, length_scale, noise, nu, kernel_type):
         approximate function is.
         :param kernel_type: "rbf": squared exponential kernel, "matern": Matern kernel.
         """
-
-        length_scale = length_scale or 0.5
-        noise = noise or 0.00005
-        nu = nu or 1.5
-        kernel_type = kernel_type or "matern"
-
         if kernel_type == "rbf":
             kernel = RBF(length_scale=length_scale)
-        else:
+        elif kernel_type == "matern":
             kernel = Matern(length_scale=length_scale, nu=nu)
-
+        else:
+            raise Exception("kernel_type must be 'rbf' or 'matern'")
         self.gp = GaussianProcessRegressor(
             kernel=kernel,
             alpha=noise,
             random_state=0,
             optimizer=None,
         )
+
+    def fit(self, X_train, y_train):
+        self.gp.fit(X_train, y_train)
+
+    def predict(self, X_test):
+        y_mean, y_std = self.gp.predict(X_test, return_std=True)
+        y_variance = y_std ** 2
+        return y_mean, y_std, y_variance
diff --git a/pkg/suggestion/bayesianoptimization/src/model/rf.py b/pkg/suggestion/bayesianoptimization/src/model/rf.py
index 59818902c66..8778b921e78 100644
--- a/pkg/suggestion/bayesianoptimization/src/model/rf.py
+++ b/pkg/suggestion/bayesianoptimization/src/model/rf.py
@@ -1,11 +1,24 @@
+import numpy as np
+import forestci as fci
 from sklearn.ensemble import RandomForestRegressor
 
 
 class RandomForestModel:
-    def __init__(self, n_estimators, max_features):
-        n_estimators = n_estimators or 50
-        max_features = max_features or "auto"
+
+    def __init__(self, n_estimators=50, max_features="auto"):
         self.rf = RandomForestRegressor(
             n_estimators=n_estimators,
             max_features=max_features,
         )
+        self.X_train = None
+
+    def fit(self, X_train, y_train):
+        print(X_train.shape, y_train.shape)
+        self.X_train = X_train
+        self.rf.fit(X_train, y_train)
+
+    def predict(self, X_test):
+        y_mean = self.rf.predict(X_test)
+        y_variance = fci.random_forest_error(self.rf, self.X_train, X_test)
+        y_std = np.sqrt(y_variance)
+        return y_mean, y_std, y_variance
diff --git a/pkg/suggestion/bayesianoptimization/src/utils.py b/pkg/suggestion/bayesianoptimization/src/utils.py
new file mode 100644
index 00000000000..7fafc3af390
--- /dev/null
+++ b/pkg/suggestion/bayesianoptimization/src/utils.py
@@ -0,0 +1,17 @@
+import os
+import logging
+from logging import getLogger, StreamHandler
+
+
+FORMAT = '%(asctime)-15s StudyID %(studyid)s %(message)s'
+LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
+
+
+def get_logger(name=__name__):
+    logger = getLogger(name)
+    logging.basicConfig(format=FORMAT)
+    handler = StreamHandler()
+    logger.setLevel(LOG_LEVEL)
+    logger.addHandler(handler)
+    logger.propagate = False
+    return logger
diff --git a/pkg/suggestion/test_requirements.txt b/pkg/suggestion/test_requirements.txt
new file mode 100644
index 00000000000..69022052405
--- /dev/null
+++ b/pkg/suggestion/test_requirements.txt
@@ -0,0 +1,5 @@
+pylint
+pytest
+pytest-cov
+pyyaml
+python-box
diff --git a/pkg/suggestion/bayesianoptimization/src/global_optimizer/__init__.py b/pkg/suggestion/tests/__init__.py
similarity index 100%
rename from pkg/suggestion/bayesianoptimization/src/global_optimizer/__init__.py
rename to pkg/suggestion/tests/__init__.py
diff --git a/pkg/suggestion/tests/conftest.py b/pkg/suggestion/tests/conftest.py
new file mode 100644
index 00000000000..2f957f9b572
--- /dev/null
+++ b/pkg/suggestion/tests/conftest.py
@@ -0,0 +1,49 @@
+# pylint: disable=missing-docstring
+import pytest
+import numpy as np
+
+
+def booth_function(X):
+    f = (X[:, 0] + 2 * X[:, 1] - 7) ** 2 + (2 * X[:, 0] + X[:, 1] - 5) ** 2
+    return f
+
+
+@pytest.fixture
+def lower_bounds():
+    return [-5, -5]
+
+
+@pytest.fixture
+def upper_bounds():
+    return [5, 5]
+
+
+@pytest.fixture
+def dim():
+    return 2
+
+
+@pytest.fixture
+def request_num():
+    return 5
+
+
+@pytest.fixture
+def X_train(lower_bounds, upper_bounds):
+    x_range = np.arange(lower_bounds[0], upper_bounds[0] + 1)
+    y_range = np.arange(lower_bounds[1], upper_bounds[1] + 1)
+    X_train = np.array([(x, y) for x in x_range for y in y_range])
+    return X_train
+
+
+@pytest.fixture
+def X_test():
+    x_range, y_range = np.arange(-1.5, 2.5), np.arange(-1.5, 2.5)
+    X_test = np.array([(x, y) for x in x_range for y in y_range])
+    return X_test
+
+
+@pytest.fixture
+def y_train(X_train):
+    y_train = -booth_function(X_train)
+    return y_train
diff --git a/pkg/suggestion/tests/parameter_values.yaml b/pkg/suggestion/tests/parameter_values.yaml
new file mode 100644
index 00000000000..d9cef801e80
--- /dev/null
+++ b/pkg/suggestion/tests/parameter_values.yaml
@@ -0,0 +1,20 @@
+parameters:
+  - - name: "x"
+      value: 1.0
+    - name: "y"
+      value: 1
+    - name: "fake_discrete"
+      value: 2
+    - name: "fake_categorical"
+      value: "true"
+  - - name: "x"
+      value: 1.0
+    - name: "y"
+      value: 1
+    - name: "fake_discrete"
+      value: 3
+    - name: "fake_categorical"
+      value: "false"
+metrics:
+  - 1.0
+  - 1.0
diff --git a/pkg/suggestion/tests/study_config.yaml b/pkg/suggestion/tests/study_config.yaml
new file mode 100644
index 00000000000..6644951d50b
--- /dev/null
+++ b/pkg/suggestion/tests/study_config.yaml
@@ -0,0 +1,26 @@
+optimization_type: 2 # Code for Maximize
+parameter_configs:
+  configs:
+    - parameter_type: 1 # Code for DOUBLE
+      name: "x"
+      feasible:
+        min: -5.0
+        max: 5.0
+    - parameter_type: 2 # Code for INT
+      name: "y"
+      feasible:
+        min: -5
+        max: 5
+    - parameter_type: 3 # Code for DISCRETE
+      name: "fake_discrete"
+      feasible:
+        list:
+          - 2
+          - 3
+          - 5
+    - parameter_type: 4 # Code for CATEGORICAL
+      name: "fake_categorical"
+      feasible:
+        list:
+          - "true"
+          - "false"
diff --git a/pkg/suggestion/tests/test_acquisition_function.py b/pkg/suggestion/tests/test_acquisition_function.py
new file mode 100644
index 00000000000..e6a7d58732b
--- /dev/null
+++ b/pkg/suggestion/tests/test_acquisition_function.py
@@ -0,0 +1,23 @@
+import pytest
+
+from ..bayesianoptimization.src.acquisition_func import AcquisitionFunc
+from ..bayesianoptimization.src.model.gp import GaussianProcessModel
+
+
+@pytest.fixture
+def model(X_train, y_train):
+    model = GaussianProcessModel()
+    model.fit(X_train, y_train)
+    return model
+
+
+@pytest.mark.parametrize("aq_mode", ["ei", "pi", "lcb"])
+def test_ei(aq_mode, model, X_test):
+    aq = AcquisitionFunc(model,
+                         current_optimal=1.0,
+                         mode=aq_mode,
+                         trade_off=0.01)
+    results, y_mean, y_variance = aq.compute(X_test)
+    assert results.shape == (16,)
+    assert y_mean.shape == (16,)
+    assert y_variance.shape == (16,)
diff --git a/pkg/suggestion/tests/test_algorithm_manager.py b/pkg/suggestion/tests/test_algorithm_manager.py
new file mode 100644
index 00000000000..b5ba3bc190c
--- /dev/null
+++ b/pkg/suggestion/tests/test_algorithm_manager.py
@@ -0,0 +1,53 @@
+import os
+
+import yaml
+import pytest
+import numpy as np
+from box import Box
+
+from pkg.api.python import api_pb2
+from ..bayesianoptimization.src.algorithm_manager import AlgorithmManager
+
+
+TEST_DIR = os.path.dirname(os.path.realpath(__file__))
+
+
+@pytest.fixture
+def study_config():
+    with open(os.path.join(TEST_DIR, "study_config.yaml"), "r") as f:
+        contents = yaml.safe_load(f)
+    return Box(contents)
+
+
+@pytest.fixture
+def observations():
+    with open(os.path.join(TEST_DIR, "parameter_values.yaml"), "r") as f:
+        contents = yaml.safe_load(f)
+    return Box(contents)
+
+
+def test_algorithm_manager(study_config, observations):
+    study_id = "test_id"
+    x_next = [1.0, 1, 5, "true"]
+    manager = AlgorithmManager(study_id, study_config,
+                               observations.parameters, observations.metrics)
+    assert manager.study_id == study_id
+    assert manager.study_config == study_config
+    assert manager.goal == api_pb2.MAXIMIZE
+    assert manager.types == [api_pb2.DOUBLE, api_pb2.INT, api_pb2.DISCRETE, api_pb2.CATEGORICAL]
+    assert manager.names == ["x", "y", "fake_discrete", "fake_categorical"]
+    assert manager.dim == 5
+    assert manager.lower_bound == [-5.0, -5, 2, 0, 0]
+    assert manager.upper_bound == [5.0, 5, 5, 1, 1]
+    assert manager.discrete_info == [{"name": "fake_discrete", "values": [2, 3, 5]}]
+    assert manager.categorical_info == \
+           [{"name": "fake_categorical", "values": ["true", "false"], "number": 2}]
+    assert np.allclose(manager.X_train, np.array([[1.0, 1, 2, 1, 0], [1.0, 1, 3, 0, 1]]))
+    assert np.allclose(manager.y_train, np.array([1.0, 1.0]))
+    parsed_x_next = manager.parse_x_next(x_next)
+    x_next_dict = manager.convert_to_dict(parsed_x_next)
+    assert x_next_dict == \
+           [{"name": "x", "value": 1.0, "type": api_pb2.DOUBLE},
+            {"name": "y", "value": 1, "type": api_pb2.INT},
+            {"name": "fake_discrete", "value": 5, "type": api_pb2.DISCRETE},
+            {"name": "fake_categorical", "value": "true", "type": api_pb2.CATEGORICAL}]
diff --git a/pkg/suggestion/tests/test_boa.py b/pkg/suggestion/tests/test_boa.py
new file mode 100644
index 00000000000..dcb19f2ee30
--- /dev/null
+++ b/pkg/suggestion/tests/test_boa.py
@@ -0,0 +1,23 @@
+import numpy as np
+
+from ..bayesianoptimization.src.bayesian_optimization_algorithm import BOAlgorithm
+
+
+def test_boa(dim, request_num, lower_bounds, upper_bounds, X_train, y_train):
+    boa = BOAlgorithm(dim=dim,
+                      N=200,
+                      lowerbound=np.array(lower_bounds, dtype=np.float64),
+                      upperbound=np.array(upper_bounds, dtype=np.float64),
+                      X_train=X_train,
+                      y_train=y_train,
+                      mode="ei",
+                      trade_off=0.01,
+                      length_scale=0.5,
+                      noise=0.00005,
+                      nu=1.5,
+                      kernel_type="matern",
+                      n_estimators=None,
+                      max_features=None,
+                      model_type="gp")
+    response = boa.get_suggestion(request_num)
+    assert len(response) == request_num
diff --git a/pkg/suggestion/tests/test_global_optimizer.py b/pkg/suggestion/tests/test_global_optimizer.py
new file mode 100644
index 00000000000..fb5aee010c0
--- /dev/null
+++ b/pkg/suggestion/tests/test_global_optimizer.py
@@ -0,0 +1,36 @@
+import pytest
+import numpy as np
+from sklearn.preprocessing import MinMaxScaler
+
+from ..bayesianoptimization.src.global_optimizer import GlobalOptimizer
+
+
+@pytest.fixture
+def scaler(lower_bounds, upper_bounds, dim):
+    scaler = MinMaxScaler()
+    lower_bounds = np.array(lower_bounds, dtype=np.float64).reshape(1, dim)
+    upper_bounds = np.array(upper_bounds, dtype=np.float64).reshape(1, dim)
+    scaler.fit(np.append(lower_bounds, upper_bounds, axis=0))
+    return scaler
+
+
+def test_global_optimizer(dim, request_num, scaler, X_train, y_train):
+    optimizer = GlobalOptimizer(N=200,
+                                l=np.zeros((1, dim)),
+                                u=np.ones((1, dim)),
+                                scaler=scaler,
+                                X_train=X_train,
+                                y_train=y_train,
+                                current_optimal=1.0,
+                                mode="ei",
+                                trade_off=0.01,
+                                length_scale=0.5,
+                                noise=0.00005,
+                                nu=1.5,
+                                kernel_type="matern",
+                                n_estimators=None,
+                                max_features=None,
+                                model_type="gp")
+    f_min, x_next_candidate = optimizer.direct(request_num)
+    assert isinstance(f_min, float)
+    assert np.array(x_next_candidate).shape == (request_num, 1, dim)
diff --git a/pkg/suggestion/tests/test_models.py b/pkg/suggestion/tests/test_models.py
new file mode 100644
index 00000000000..b76242f800b
--- /dev/null
+++ b/pkg/suggestion/tests/test_models.py
@@ -0,0 +1,23 @@
+import pytest
+
+from ..bayesianoptimization.src.model.rf import RandomForestModel
+from ..bayesianoptimization.src.model.gp import GaussianProcessModel
+
+
+MODELS = [RandomForestModel(n_estimators=5),
+          GaussianProcessModel(),
+          GaussianProcessModel(kernel_type="rbf")]
+
+
+@pytest.mark.parametrize("model", MODELS)
+def test_fit_predict(model, X_train, y_train, X_test):
+    model.fit(X_train, y_train)
+    y_mean, y_std, y_variance = model.predict(X_test)
+    assert y_mean.shape == (16,)
+    assert y_std.shape == (16,)
+    assert y_variance.shape == (16,)
+
+
+def test_gp_kernel_type_exception():
+    with pytest.raises(Exception):
+        _ = GaussianProcessModel(kernel_type="different_kernel")
diff --git a/pkg/suggestion/tests/test_requirements.txt b/pkg/suggestion/tests/test_requirements.txt
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000000..074045aa294
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,4 @@
+from setuptools import setup
+
+setup(name="pkg",
+      packages=["pkg"])
diff --git a/test/scripts/python-tests.sh b/test/scripts/python-tests.sh
new file mode 100755
index 00000000000..00111b07a30
--- /dev/null
+++ b/test/scripts/python-tests.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+# Copyright 2018 The Kubernetes Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This shell script is used to run the python tests in the argo workflow
+
+pip install -r cmd/suggestion/bayesianoptimization/requirements.txt
+pip install -r pkg/suggestion/test_requirements.txt
+python setup.py develop
+pylint pkg/suggestion/bayesianoptimization/src --disable=fixme --exit-zero --reports=y
+pytest pkg/suggestion/tests --verbose --cov=pkg/suggestion/bayesianoptimization/src --cov-report term-missing
diff --git a/test/scripts/run-tests.sh b/test/scripts/run-tests.sh
index ee157186f05..22f4b90c8d8 100755
--- a/test/scripts/run-tests.sh
+++ b/test/scripts/run-tests.sh
@@ -111,7 +111,8 @@ TIMEOUT=120
 until curl localhost:6789 || [ $TIMEOUT -eq 0 ]; do
     sleep 5
     TIMEOUT=$(( TIMEOUT - 1 ))
-done 
+done
+
 cp -r test ${GO_DIR}/test
 cd ${GO_DIR}/test/e2e
 kubectl apply -f valid-studyjob.yaml
diff --git a/test/workflows/components/workflows.libsonnet b/test/workflows/components/workflows.libsonnet
index 2c8956b6ede..e5593228218 100644
--- a/test/workflows/components/workflows.libsonnet
+++ b/test/workflows/components/workflows.libsonnet
@@ -58,6 +58,7 @@
       local testWorkerImage = "gcr.io/kubeflow-ci/test-worker";
       local golangImage = "golang:1.9.4-stretch";
       // TODO(jose5918) Build our own helm image
+      local pythonImage = "python:3.6-jessie";
       local helmImage = "volumecontroller/golang:1.9.2";
       // The name of the NFS volume claim to use for test files.
       // local nfsVolumeClaim = "kubeflow-testing";
@@ -271,6 +272,12 @@
                     template: "unit-test",
                   },
                 ],
+                [
+                  {
+                    name: "python-tests",
+                    template: "python-tests",
+                  },
+                ],
                 [
                   {
                     name: "run-tests",
@@ -316,6 +323,9 @@
             $.parts(namespace, name, overrides).e2e(prow_env, bucket).buildTemplate("setup-cluster",testWorkerImage, [
               "test/scripts/create-cluster.sh",
             ]),  // setup cluster
+            $.parts(namespace, name, overrides).e2e(prow_env, bucket).buildTemplate("python-tests", pythonImage, [
+              "test/scripts/python-tests.sh",
+            ]),  // run python tests
             $.parts(namespace, name, overrides).e2e(prow_env, bucket).buildTemplate("run-tests", helmImage, [
               "test/scripts/run-tests.sh",
             ]),  // run tests