Enrich circle ci integration (#5)

bettercallshao · Aug 30, 2019 · 0bb107f · 0bb107f
1 parent e993e20
commit 0bb107f
Show file tree

Hide file tree

Showing 9 changed files with 153 additions and 87 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -1,28 +1,74 @@
-# Python CircleCI 2.0 configuration file
-#
-# Check https://circleci.com/docs/2.0/language-python/ for more details
-#
 version: 2
 jobs:
-  build:
+  sanity:
     docker:
-      - image: circleci/python:3.7
-
+    - image: circleci/python:3.7
     working_directory: ~/repo
-
     steps:
-      - checkout
-
-      - run:
-          name: install dependencies
-          command: |
-            pipenv install
-
-      - run:
-          name: run tests
-          command: |
-            pipenv run pytest
-
-      - store_artifacts:
-          path: test-reports
-          destination: test-reports
+    - checkout
+    - run:
+        name: install dependencies
+        command: |
+          pip install --user -r ci/requirements.txt
+    - run:
+        name: linter
+        command: |
+          flake8 .
+    - run:
+        name: install package
+        command: |
+          pip install --user .
+    - run:
+        name: pytest
+        command: |
+          pytest
+  release:
+    docker:
+    - image: circleci/python:3.7
+    working_directory: ~/repo
+    steps:
+    - checkout
+    - run:
+        name: install dependencies
+        command: |
+          pip install --user -r ci/requirements.txt
+    - run:
+        name: generate .pypirc
+        command: |
+          echo -e "[pypi]" >> ~/.pypirc
+          echo -e "username = $PYPI_USERNAME" >> ~/.pypirc
+          echo -e "password = $PYPI_PASSWORD" >> ~/.pypirc
+    - run:
+        name: build package
+        command: |
+          python setup.py sdist
+    - run:
+        name: upload to pypi
+        command: |
+          twine upload dist/*
+workflows:
+  version: 2
+  sanity:
+    jobs:
+    - sanity:
+        filters:
+          branches:
+            only: /.*/
+          tags:
+            ignore: /.*/
+  release:
+    jobs:
+    - sanity:
+        filters:
+          tags:
+            only: /v[0-9]+(\.[0-9]+)*/
+          branches:
+            ignore: /.*/
+    - release:
+        filters:
+          tags:
+            only: /v[0-9]+(\.[0-9]+)*/
+          branches:
+            ignore: /.*/
+        requires:
+        - sanity
diff --git a/Pipfile b/Pipfile
diff --git a/README.md b/README.md
@@ -1,4 +1,6 @@
 # Surrogate Search CV
+[![CircleCI](https://circleci.com/gh/timlyrics/sklearn_surrogatesearchcv.svg?style=shield)](https://circleci.com/gh/timlyrics/sklearn_surrogatesearchcv)
+[![PyPi](https://badge.fury.io/py/sklearn_surrogatesearchcv.svg)](https://badge.fury.io/py/sklearn_surrogatesearchcv)
 
 This package implements a randomized hyper parameter search for sklearn (similar to `RandomizedSearchCV`) but utilizes surrogate adaptive sampling from pySOT. Use this similarly to GridSearchCV with a few extra paramters.
 
@@ -50,10 +52,6 @@ best_score_
 
 For a complete example, please refer to `src/test/test_basic.py`.
 
-## CI
-
-[![CircleCI](https://circleci.com/gh/timlyrics/sklearn_surrogatesearchcv.svg?style=svg)](https://circleci.com/gh/timlyrics/sklearn_surrogatesearchcv)
-
 ## Resources
 
 A slide about role of surrogate optimization in ml. [link](https://www.slideshare.net/TimTan2/machine-learning-vs-traditional-optimization)
diff --git a/ci/requirements.txt b/ci/requirements.txt
@@ -0,0 +1,19 @@
+flake8
+flake8-bandit
+flake8-broken-line
+flake8-bugbear
+flake8-builtins
+flake8-commas
+flake8-comprehensions
+flake8-docstrings
+flake8-eradicate
+flake8-isort
+flake8-logging-format
+flake8-mutable
+flake8-pep3101
+flake8-pytest
+flake8-quotes
+flake8-string-format
+flake8-tidy-imports
+pytest
+twine
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,9 @@
+[flake8]
+max-line-length=100
+exclude=src/test/*
+
+[isort]
+line_length=100
+indent='    '
+multi_line_output=3
+length_sort=1
diff --git a/setup.py b/setup.py
@@ -1,10 +1,13 @@
-from setuptools import find_packages, setup
+# -*- coding: utf-8 -*-
+"""Setup for sklearn_surrogatesearchcv."""
 
-version = '0.1'
+from setuptools import setup, find_packages
+
+version = '0.1.1'
 
 install_requires = [
     'sklearn',
-    'pySOT'
+    'pySOT',
 ]
 
 with open('README.md') as f:
@@ -13,8 +16,8 @@
 setup(
     name='sklearn_surrogatesearchcv',
     version=version,
-    description="Surrogate adaptive randomized search for hyper parameters"
-                "in sklearn.",
+    description='Surrogate adaptive randomized search for hyper parameters'
+                'in sklearn.',
     long_description=long_description,
     long_description_content_type='text/markdown',
     classifiers=[],

diff --git a/src/sklearn_surrogatesearchcv/__init__.py b/src/sklearn_surrogatesearchcv/__init__.py
@@ -1 +1 @@
-from .surrogatesearchcv import SurrogateSearchCV # noqa
+from .surrogatesearchcv import SurrogateSearchCV  # noqa
diff --git a/src/sklearn_surrogatesearchcv/surrogatesearchcv.py b/src/sklearn_surrogatesearchcv/surrogatesearchcv.py
@@ -1,23 +1,24 @@
+# -*- coding: utf-8 -*-
+"""Surrogate search with cross validation for hyper parameter tuning."""
+
 from __future__ import print_function
 
 import numpy as np
-from sklearn.model_selection import GridSearchCV
-
+from pySOT.strategy import SRBFStrategy
 from poap.controller import SerialController
+from pySOT.surrogate import LinearTail, CubicKernel, RBFInterpolant, SurrogateUnitBox
+from sklearn.model_selection import GridSearchCV
 from pySOT.experimental_design import SymmetricLatinHypercube
 from pySOT.optimization_problems import OptimizationProblem
-from pySOT.strategy import SRBFStrategy
-from pySOT.surrogate import (CubicKernel, LinearTail, RBFInterpolant,
-                             SurrogateUnitBox)
 
 
 class SurrogateSearchCV(object):
-    """Surrogate search with cross validation for hyper parameter tuning.
-    """
+    """Surrogate search with cross validation for hyper parameter tuning."""
 
     def __init__(self, estimator, n_iter=10, param_def=None, refit=False,
                  **kwargs):
-        """
+        """Surrogate search with cross validation for hyper parameter tuning.
+
         :param estimator: estimator
         :param n_iter: number of iterations to run (default 10)
         :param param_def: list of dictionaries, e.g.
@@ -46,19 +47,19 @@ def __init__(self, estimator, n_iter=10, param_def=None, refit=False,
         if refit:
             raise ValueError('Refit not supported')
 
-        for d in param_def:
-            if 'name' not in d:
+        for param in param_def:
+            if 'name' not in param:
                 raise ValueError('Name must be defined for each parameter')
 
-            if 'integer' not in d:
-                d['integer'] = False
+            if 'integer' not in param:
+                param['integer'] = False
 
-            if 'lb' not in d or 'ub' not in d:
+            if 'lb' not in param or 'ub' not in param:
                 raise ValueError(
-                    'Fields lb and ub must be defined for {name}'.format(**d))
-            if d['ub'] <= d['lb']:
+                    'Fields lb and ub must be defined for {name}'.format(**param))
+            if param['ub'] <= param['lb']:
                 raise ValueError(
-                    'Field ub must be larger than lb for {name}'.format(**d))
+                    'Field ub must be larger than lb for {name}'.format(**param))
 
         self.param_def = param_def
         self.kwargs = kwargs
@@ -68,7 +69,7 @@ def __init__(self, estimator, n_iter=10, param_def=None, refit=False,
         self.score_history_ = []
 
     def fit(self, X, y=None, **kwargs):
-        """
+        """Run training with cross validation.
 
         :param X: training data
         :param **: parameters to be passed to GridSearchCV
@@ -78,23 +79,23 @@ class Target(OptimizationProblem):
             def __init__(self, outer):
                 self.outer = outer
                 param_def = outer.param_def
-                self.lb = np.array(list(d['lb'] for d in param_def))
-                self.ub = np.array(list(d['ub'] for d in param_def))
+                self.lb = np.array([param['lb'] for param in param_def])
+                self.ub = np.array([param['ub'] for param in param_def])
                 self.dim = len(param_def)
-                self.int_var = np.array(list(
-                    i for i, d in enumerate(param_def) if d['integer']))
-                self.cont_var = np.array(list(
-                    i for i, d in enumerate(param_def)
-                    if i not in self.int_var))
-
-            def eval(self, x):
-                print('Eval {} ...'.format(x))
+                self.int_var = np.array([
+                    idx for idx, param in enumerate(param_def) if param['integer']])
+                self.cont_var = np.array([
+                    idx for idx, param in enumerate(param_def)
+                    if idx not in self.int_var])
+
+            def eval_(self, x):
+                print('Eval {0} ...'.format(x))
                 param_def = self.outer.param_def
                 outer = self.outer
                 # prepare parameters grid for gridsearchcv
                 param_grid = (
-                    {d['name']: [int(x[i]) if d['integer'] else x[i]]
-                        for i, d in enumerate(param_def)})
+                    {param['name']: [int(x[idx]) if param['integer'] else x[idx]]
+                        for idx, param in enumerate(param_def)})
                 # create gridsearchcv to evaluate the cv
                 gs = GridSearchCV(outer.estimator, param_grid, refit=False,
                                   **outer.kwargs)
@@ -108,7 +109,7 @@ def eval(self, x):
                 # also record history
                 outer.params_history_.append(x)
                 outer.score_history_.append(gs_score)
-                print('Eval {} => {}'.format(x, gs_score))
+                print('Eval {0} => {1}'.format(x, gs_score))
                 # pySOT score is the lower the better, so return the negated
                 return -gs_score
 
@@ -123,15 +124,15 @@ def eval(self, x):
                                        num_pts=2 * (target.dim + 1))
 
         # Create a strategy and a controller
-        controller = SerialController(objective=target.eval)
+        controller = SerialController(objective=target.eval_)
         controller.strategy = SRBFStrategy(
             max_evals=self.n_iter, batch_size=1, opt_prob=target,
             exp_design=slhd, surrogate=rbf, asynchronous=False)
 
-        print("Maximum number of evaluations: {}".format(self.n_iter))
-        print("Strategy: {}".format(controller.strategy.__class__.__name__))
-        print("Experimental design: {}".format(slhd.__class__.__name__))
-        print("Surrogate: {}".format(rbf.__class__.__name__))
+        print('Maximum number of evaluations: {0}'.format(self.n_iter))
+        print('Strategy: {0}'.format(controller.strategy.__class__.__name__))
+        print('Experimental design: {0}'.format(slhd.__class__.__name__))
+        print('Surrogate: {0}'.format(rbf.__class__.__name__))
 
         # Run the optimization strategy
         result = controller.run()

diff --git a/src/test/test_basic.py b/src/test/test_basic.py
@@ -1,19 +1,22 @@
+# -*- coding: utf-8 -*-
+"""Tests for SurrogateSearchCV."""
+
 from time import time
 
 from sklearn.datasets import load_digits
 from sklearn.ensemble import RandomForestClassifier
-
 from sklearn_surrogatesearchcv import SurrogateSearchCV
 
 
 def test_basic():
+    """A simple end-to-end test case."""
     digits = load_digits()
     X, y = digits.data, digits.target
     clf = RandomForestClassifier(n_estimators=5)
 
     param_def = [
         {
-            'name': "max_depth",
+            'name': 'max_depth',
             'integer': True,
             'lb': 3,
             'ub': 6,
@@ -38,10 +41,10 @@ def test_basic():
 
     start = time()
     surrogate_search.fit(X, y)
-    print("SurrogateSearchCV took %.2f seconds for %d candidates"
-          " parameter settings." % ((time() - start), n_iter_search))
-    print("Best score is {}".format(surrogate_search.best_score_))
-    print("Best params are {}".format(surrogate_search.best_params_))
+    print('SurrogateSearchCV took {0:.2f} seconds for {1} candidates'
+          ' parameter settings.'.format((time() - start), n_iter_search))
+    print('Best score is {0}'.format(surrogate_search.best_score_))
+    print('Best params are {0}'.format(surrogate_search.best_params_))
 
     assert len(surrogate_search.params_history_) == n_iter_search
     assert len(surrogate_search.score_history_) == n_iter_search