Skip to content

Commit

Permalink
Enrich circle ci integration (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
bettercallshao authored Aug 30, 2019
1 parent e993e20 commit 0bb107f
Show file tree
Hide file tree
Showing 9 changed files with 153 additions and 87 deletions.
92 changes: 69 additions & 23 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -1,28 +1,74 @@
# Python CircleCI 2.0 configuration file
#
# Check https://circleci.com/docs/2.0/language-python/ for more details
#
version: 2
jobs:
build:
sanity:
docker:
- image: circleci/python:3.7

- image: circleci/python:3.7
working_directory: ~/repo

steps:
- checkout

- run:
name: install dependencies
command: |
pipenv install
- run:
name: run tests
command: |
pipenv run pytest
- store_artifacts:
path: test-reports
destination: test-reports
- checkout
- run:
name: install dependencies
command: |
pip install --user -r ci/requirements.txt
- run:
name: linter
command: |
flake8 .
- run:
name: install package
command: |
pip install --user .
- run:
name: pytest
command: |
pytest
release:
docker:
- image: circleci/python:3.7
working_directory: ~/repo
steps:
- checkout
- run:
name: install dependencies
command: |
pip install --user -r ci/requirements.txt
- run:
name: generate .pypirc
command: |
echo -e "[pypi]" >> ~/.pypirc
echo -e "username = $PYPI_USERNAME" >> ~/.pypirc
echo -e "password = $PYPI_PASSWORD" >> ~/.pypirc
- run:
name: build package
command: |
python setup.py sdist
- run:
name: upload to pypi
command: |
twine upload dist/*
workflows:
version: 2
sanity:
jobs:
- sanity:
filters:
branches:
only: /.*/
tags:
ignore: /.*/
release:
jobs:
- sanity:
filters:
tags:
only: /v[0-9]+(\.[0-9]+)*/
branches:
ignore: /.*/
- release:
filters:
tags:
only: /v[0-9]+(\.[0-9]+)*/
branches:
ignore: /.*/
requires:
- sanity
13 changes: 0 additions & 13 deletions Pipfile

This file was deleted.

6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Surrogate Search CV
[![CircleCI](https://circleci.com/gh/timlyrics/sklearn_surrogatesearchcv.svg?style=shield)](https://circleci.com/gh/timlyrics/sklearn_surrogatesearchcv)
[![PyPi](https://badge.fury.io/py/sklearn_surrogatesearchcv.svg)](https://badge.fury.io/py/sklearn_surrogatesearchcv)

This package implements a randomized hyper parameter search for sklearn (similar to `RandomizedSearchCV`) but utilizes surrogate adaptive sampling from pySOT. Use this similarly to GridSearchCV with a few extra paramters.

Expand Down Expand Up @@ -50,10 +52,6 @@ best_score_

For a complete example, please refer to `src/test/test_basic.py`.

## CI

[![CircleCI](https://circleci.com/gh/timlyrics/sklearn_surrogatesearchcv.svg?style=svg)](https://circleci.com/gh/timlyrics/sklearn_surrogatesearchcv)

## Resources

A slide about role of surrogate optimization in ml. [link](https://www.slideshare.net/TimTan2/machine-learning-vs-traditional-optimization)
19 changes: 19 additions & 0 deletions ci/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
flake8
flake8-bandit
flake8-broken-line
flake8-bugbear
flake8-builtins
flake8-commas
flake8-comprehensions
flake8-docstrings
flake8-eradicate
flake8-isort
flake8-logging-format
flake8-mutable
flake8-pep3101
flake8-pytest
flake8-quotes
flake8-string-format
flake8-tidy-imports
pytest
twine
9 changes: 9 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[flake8]
max-line-length=100
exclude=src/test/*

[isort]
line_length=100
indent=' '
multi_line_output=3
length_sort=1
13 changes: 8 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from setuptools import find_packages, setup
# -*- coding: utf-8 -*-
"""Setup for sklearn_surrogatesearchcv."""

version = '0.1'
from setuptools import setup, find_packages

version = '0.1.1'

install_requires = [
'sklearn',
'pySOT'
'pySOT',
]

with open('README.md') as f:
Expand All @@ -13,8 +16,8 @@
setup(
name='sklearn_surrogatesearchcv',
version=version,
description="Surrogate adaptive randomized search for hyper parameters"
"in sklearn.",
description='Surrogate adaptive randomized search for hyper parameters'
'in sklearn.',
long_description=long_description,
long_description_content_type='text/markdown',
classifiers=[],
Expand Down
2 changes: 1 addition & 1 deletion src/sklearn_surrogatesearchcv/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .surrogatesearchcv import SurrogateSearchCV # noqa
from .surrogatesearchcv import SurrogateSearchCV # noqa
71 changes: 36 additions & 35 deletions src/sklearn_surrogatesearchcv/surrogatesearchcv.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
# -*- coding: utf-8 -*-
"""Surrogate search with cross validation for hyper parameter tuning."""

from __future__ import print_function

import numpy as np
from sklearn.model_selection import GridSearchCV

from pySOT.strategy import SRBFStrategy
from poap.controller import SerialController
from pySOT.surrogate import LinearTail, CubicKernel, RBFInterpolant, SurrogateUnitBox
from sklearn.model_selection import GridSearchCV
from pySOT.experimental_design import SymmetricLatinHypercube
from pySOT.optimization_problems import OptimizationProblem
from pySOT.strategy import SRBFStrategy
from pySOT.surrogate import (CubicKernel, LinearTail, RBFInterpolant,
SurrogateUnitBox)


class SurrogateSearchCV(object):
"""Surrogate search with cross validation for hyper parameter tuning.
"""
"""Surrogate search with cross validation for hyper parameter tuning."""

def __init__(self, estimator, n_iter=10, param_def=None, refit=False,
**kwargs):
"""
"""Surrogate search with cross validation for hyper parameter tuning.
:param estimator: estimator
:param n_iter: number of iterations to run (default 10)
:param param_def: list of dictionaries, e.g.
Expand Down Expand Up @@ -46,19 +47,19 @@ def __init__(self, estimator, n_iter=10, param_def=None, refit=False,
if refit:
raise ValueError('Refit not supported')

for d in param_def:
if 'name' not in d:
for param in param_def:
if 'name' not in param:
raise ValueError('Name must be defined for each parameter')

if 'integer' not in d:
d['integer'] = False
if 'integer' not in param:
param['integer'] = False

if 'lb' not in d or 'ub' not in d:
if 'lb' not in param or 'ub' not in param:
raise ValueError(
'Fields lb and ub must be defined for {name}'.format(**d))
if d['ub'] <= d['lb']:
'Fields lb and ub must be defined for {name}'.format(**param))
if param['ub'] <= param['lb']:
raise ValueError(
'Field ub must be larger than lb for {name}'.format(**d))
'Field ub must be larger than lb for {name}'.format(**param))

self.param_def = param_def
self.kwargs = kwargs
Expand All @@ -68,7 +69,7 @@ def __init__(self, estimator, n_iter=10, param_def=None, refit=False,
self.score_history_ = []

def fit(self, X, y=None, **kwargs):
"""
"""Run training with cross validation.
:param X: training data
:param **: parameters to be passed to GridSearchCV
Expand All @@ -78,23 +79,23 @@ class Target(OptimizationProblem):
def __init__(self, outer):
self.outer = outer
param_def = outer.param_def
self.lb = np.array(list(d['lb'] for d in param_def))
self.ub = np.array(list(d['ub'] for d in param_def))
self.lb = np.array([param['lb'] for param in param_def])
self.ub = np.array([param['ub'] for param in param_def])
self.dim = len(param_def)
self.int_var = np.array(list(
i for i, d in enumerate(param_def) if d['integer']))
self.cont_var = np.array(list(
i for i, d in enumerate(param_def)
if i not in self.int_var))

def eval(self, x):
print('Eval {} ...'.format(x))
self.int_var = np.array([
idx for idx, param in enumerate(param_def) if param['integer']])
self.cont_var = np.array([
idx for idx, param in enumerate(param_def)
if idx not in self.int_var])

def eval_(self, x):
print('Eval {0} ...'.format(x))
param_def = self.outer.param_def
outer = self.outer
# prepare parameters grid for gridsearchcv
param_grid = (
{d['name']: [int(x[i]) if d['integer'] else x[i]]
for i, d in enumerate(param_def)})
{param['name']: [int(x[idx]) if param['integer'] else x[idx]]
for idx, param in enumerate(param_def)})
# create gridsearchcv to evaluate the cv
gs = GridSearchCV(outer.estimator, param_grid, refit=False,
**outer.kwargs)
Expand All @@ -108,7 +109,7 @@ def eval(self, x):
# also record history
outer.params_history_.append(x)
outer.score_history_.append(gs_score)
print('Eval {} => {}'.format(x, gs_score))
print('Eval {0} => {1}'.format(x, gs_score))
# pySOT score is the lower the better, so return the negated
return -gs_score

Expand All @@ -123,15 +124,15 @@ def eval(self, x):
num_pts=2 * (target.dim + 1))

# Create a strategy and a controller
controller = SerialController(objective=target.eval)
controller = SerialController(objective=target.eval_)
controller.strategy = SRBFStrategy(
max_evals=self.n_iter, batch_size=1, opt_prob=target,
exp_design=slhd, surrogate=rbf, asynchronous=False)

print("Maximum number of evaluations: {}".format(self.n_iter))
print("Strategy: {}".format(controller.strategy.__class__.__name__))
print("Experimental design: {}".format(slhd.__class__.__name__))
print("Surrogate: {}".format(rbf.__class__.__name__))
print('Maximum number of evaluations: {0}'.format(self.n_iter))
print('Strategy: {0}'.format(controller.strategy.__class__.__name__))
print('Experimental design: {0}'.format(slhd.__class__.__name__))
print('Surrogate: {0}'.format(rbf.__class__.__name__))

# Run the optimization strategy
result = controller.run()
Expand Down
15 changes: 9 additions & 6 deletions src/test/test_basic.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
# -*- coding: utf-8 -*-
"""Tests for SurrogateSearchCV."""

from time import time

from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier

from sklearn_surrogatesearchcv import SurrogateSearchCV


def test_basic():
"""A simple end-to-end test case."""
digits = load_digits()
X, y = digits.data, digits.target
clf = RandomForestClassifier(n_estimators=5)

param_def = [
{
'name': "max_depth",
'name': 'max_depth',
'integer': True,
'lb': 3,
'ub': 6,
Expand All @@ -38,10 +41,10 @@ def test_basic():

start = time()
surrogate_search.fit(X, y)
print("SurrogateSearchCV took %.2f seconds for %d candidates"
" parameter settings." % ((time() - start), n_iter_search))
print("Best score is {}".format(surrogate_search.best_score_))
print("Best params are {}".format(surrogate_search.best_params_))
print('SurrogateSearchCV took {0:.2f} seconds for {1} candidates'
' parameter settings.'.format((time() - start), n_iter_search))
print('Best score is {0}'.format(surrogate_search.best_score_))
print('Best params are {0}'.format(surrogate_search.best_params_))

assert len(surrogate_search.params_history_) == n_iter_search
assert len(surrogate_search.score_history_) == n_iter_search

0 comments on commit 0bb107f

Please sign in to comment.