diff --git a/.circleci/config.yml b/.circleci/config.yml index bb87d64..9f7cf17 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,28 +1,74 @@ -# Python CircleCI 2.0 configuration file -# -# Check https://circleci.com/docs/2.0/language-python/ for more details -# version: 2 jobs: - build: + sanity: docker: - - image: circleci/python:3.7 - + - image: circleci/python:3.7 working_directory: ~/repo - steps: - - checkout - - - run: - name: install dependencies - command: | - pipenv install - - - run: - name: run tests - command: | - pipenv run pytest - - - store_artifacts: - path: test-reports - destination: test-reports + - checkout + - run: + name: install dependencies + command: | + pip install --user -r ci/requirements.txt + - run: + name: linter + command: | + flake8 . + - run: + name: install package + command: | + pip install --user . + - run: + name: pytest + command: | + pytest + release: + docker: + - image: circleci/python:3.7 + working_directory: ~/repo + steps: + - checkout + - run: + name: install dependencies + command: | + pip install --user -r ci/requirements.txt + - run: + name: generate .pypirc + command: | + echo -e "[pypi]" >> ~/.pypirc + echo -e "username = $PYPI_USERNAME" >> ~/.pypirc + echo -e "password = $PYPI_PASSWORD" >> ~/.pypirc + - run: + name: build package + command: | + python setup.py sdist + - run: + name: upload to pypi + command: | + twine upload dist/* +workflows: + version: 2 + sanity: + jobs: + - sanity: + filters: + branches: + only: /.*/ + tags: + ignore: /.*/ + release: + jobs: + - sanity: + filters: + tags: + only: /v[0-9]+(\.[0-9]+)*/ + branches: + ignore: /.*/ + - release: + filters: + tags: + only: /v[0-9]+(\.[0-9]+)*/ + branches: + ignore: /.*/ + requires: + - sanity diff --git a/Pipfile b/Pipfile deleted file mode 100644 index cfefc9c..0000000 --- a/Pipfile +++ /dev/null @@ -1,13 +0,0 @@ -[[source]] -url = "https://pypi.org/simple" -verify_ssl = true -name = "pypi" - -[packages] -sklearn-surrogatesearchcv = {path = "."} -pytest = "*" - -[dev-packages] - -[requires] -python_version = "3.7" diff --git a/README.md b/README.md index b924de4..322b12b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ # Surrogate Search CV +[![CircleCI](https://circleci.com/gh/timlyrics/sklearn_surrogatesearchcv.svg?style=shield)](https://circleci.com/gh/timlyrics/sklearn_surrogatesearchcv) +[![PyPi](https://badge.fury.io/py/sklearn_surrogatesearchcv.svg)](https://badge.fury.io/py/sklearn_surrogatesearchcv) This package implements a randomized hyper parameter search for sklearn (similar to `RandomizedSearchCV`) but utilizes surrogate adaptive sampling from pySOT. Use this similarly to GridSearchCV with a few extra paramters. @@ -50,10 +52,6 @@ best_score_ For a complete example, please refer to `src/test/test_basic.py`. -## CI - -[![CircleCI](https://circleci.com/gh/timlyrics/sklearn_surrogatesearchcv.svg?style=svg)](https://circleci.com/gh/timlyrics/sklearn_surrogatesearchcv) - ## Resources A slide about role of surrogate optimization in ml. [link](https://www.slideshare.net/TimTan2/machine-learning-vs-traditional-optimization) diff --git a/ci/requirements.txt b/ci/requirements.txt new file mode 100644 index 0000000..ea12f45 --- /dev/null +++ b/ci/requirements.txt @@ -0,0 +1,19 @@ +flake8 +flake8-bandit +flake8-broken-line +flake8-bugbear +flake8-builtins +flake8-commas +flake8-comprehensions +flake8-docstrings +flake8-eradicate +flake8-isort +flake8-logging-format +flake8-mutable +flake8-pep3101 +flake8-pytest +flake8-quotes +flake8-string-format +flake8-tidy-imports +pytest +twine diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..02add47 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,9 @@ +[flake8] +max-line-length=100 +exclude=src/test/* + +[isort] +line_length=100 +indent=' ' +multi_line_output=3 +length_sort=1 diff --git a/setup.py b/setup.py index e9c3e7d..33aefbe 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,13 @@ -from setuptools import find_packages, setup +# -*- coding: utf-8 -*- +"""Setup for sklearn_surrogatesearchcv.""" -version = '0.1' +from setuptools import setup, find_packages + +version = '0.1.1' install_requires = [ 'sklearn', - 'pySOT' + 'pySOT', ] with open('README.md') as f: @@ -13,8 +16,8 @@ setup( name='sklearn_surrogatesearchcv', version=version, - description="Surrogate adaptive randomized search for hyper parameters" - "in sklearn.", + description='Surrogate adaptive randomized search for hyper parameters' + 'in sklearn.', long_description=long_description, long_description_content_type='text/markdown', classifiers=[], diff --git a/src/sklearn_surrogatesearchcv/__init__.py b/src/sklearn_surrogatesearchcv/__init__.py index 2a410a3..5feab4b 100644 --- a/src/sklearn_surrogatesearchcv/__init__.py +++ b/src/sklearn_surrogatesearchcv/__init__.py @@ -1 +1 @@ -from .surrogatesearchcv import SurrogateSearchCV # noqa +from .surrogatesearchcv import SurrogateSearchCV # noqa diff --git a/src/sklearn_surrogatesearchcv/surrogatesearchcv.py b/src/sklearn_surrogatesearchcv/surrogatesearchcv.py index b4c074c..2d46951 100644 --- a/src/sklearn_surrogatesearchcv/surrogatesearchcv.py +++ b/src/sklearn_surrogatesearchcv/surrogatesearchcv.py @@ -1,23 +1,24 @@ +# -*- coding: utf-8 -*- +"""Surrogate search with cross validation for hyper parameter tuning.""" + from __future__ import print_function import numpy as np -from sklearn.model_selection import GridSearchCV - +from pySOT.strategy import SRBFStrategy from poap.controller import SerialController +from pySOT.surrogate import LinearTail, CubicKernel, RBFInterpolant, SurrogateUnitBox +from sklearn.model_selection import GridSearchCV from pySOT.experimental_design import SymmetricLatinHypercube from pySOT.optimization_problems import OptimizationProblem -from pySOT.strategy import SRBFStrategy -from pySOT.surrogate import (CubicKernel, LinearTail, RBFInterpolant, - SurrogateUnitBox) class SurrogateSearchCV(object): - """Surrogate search with cross validation for hyper parameter tuning. - """ + """Surrogate search with cross validation for hyper parameter tuning.""" def __init__(self, estimator, n_iter=10, param_def=None, refit=False, **kwargs): - """ + """Surrogate search with cross validation for hyper parameter tuning. + :param estimator: estimator :param n_iter: number of iterations to run (default 10) :param param_def: list of dictionaries, e.g. @@ -46,19 +47,19 @@ def __init__(self, estimator, n_iter=10, param_def=None, refit=False, if refit: raise ValueError('Refit not supported') - for d in param_def: - if 'name' not in d: + for param in param_def: + if 'name' not in param: raise ValueError('Name must be defined for each parameter') - if 'integer' not in d: - d['integer'] = False + if 'integer' not in param: + param['integer'] = False - if 'lb' not in d or 'ub' not in d: + if 'lb' not in param or 'ub' not in param: raise ValueError( - 'Fields lb and ub must be defined for {name}'.format(**d)) - if d['ub'] <= d['lb']: + 'Fields lb and ub must be defined for {name}'.format(**param)) + if param['ub'] <= param['lb']: raise ValueError( - 'Field ub must be larger than lb for {name}'.format(**d)) + 'Field ub must be larger than lb for {name}'.format(**param)) self.param_def = param_def self.kwargs = kwargs @@ -68,7 +69,7 @@ def __init__(self, estimator, n_iter=10, param_def=None, refit=False, self.score_history_ = [] def fit(self, X, y=None, **kwargs): - """ + """Run training with cross validation. :param X: training data :param **: parameters to be passed to GridSearchCV @@ -78,23 +79,23 @@ class Target(OptimizationProblem): def __init__(self, outer): self.outer = outer param_def = outer.param_def - self.lb = np.array(list(d['lb'] for d in param_def)) - self.ub = np.array(list(d['ub'] for d in param_def)) + self.lb = np.array([param['lb'] for param in param_def]) + self.ub = np.array([param['ub'] for param in param_def]) self.dim = len(param_def) - self.int_var = np.array(list( - i for i, d in enumerate(param_def) if d['integer'])) - self.cont_var = np.array(list( - i for i, d in enumerate(param_def) - if i not in self.int_var)) - - def eval(self, x): - print('Eval {} ...'.format(x)) + self.int_var = np.array([ + idx for idx, param in enumerate(param_def) if param['integer']]) + self.cont_var = np.array([ + idx for idx, param in enumerate(param_def) + if idx not in self.int_var]) + + def eval_(self, x): + print('Eval {0} ...'.format(x)) param_def = self.outer.param_def outer = self.outer # prepare parameters grid for gridsearchcv param_grid = ( - {d['name']: [int(x[i]) if d['integer'] else x[i]] - for i, d in enumerate(param_def)}) + {param['name']: [int(x[idx]) if param['integer'] else x[idx]] + for idx, param in enumerate(param_def)}) # create gridsearchcv to evaluate the cv gs = GridSearchCV(outer.estimator, param_grid, refit=False, **outer.kwargs) @@ -108,7 +109,7 @@ def eval(self, x): # also record history outer.params_history_.append(x) outer.score_history_.append(gs_score) - print('Eval {} => {}'.format(x, gs_score)) + print('Eval {0} => {1}'.format(x, gs_score)) # pySOT score is the lower the better, so return the negated return -gs_score @@ -123,15 +124,15 @@ def eval(self, x): num_pts=2 * (target.dim + 1)) # Create a strategy and a controller - controller = SerialController(objective=target.eval) + controller = SerialController(objective=target.eval_) controller.strategy = SRBFStrategy( max_evals=self.n_iter, batch_size=1, opt_prob=target, exp_design=slhd, surrogate=rbf, asynchronous=False) - print("Maximum number of evaluations: {}".format(self.n_iter)) - print("Strategy: {}".format(controller.strategy.__class__.__name__)) - print("Experimental design: {}".format(slhd.__class__.__name__)) - print("Surrogate: {}".format(rbf.__class__.__name__)) + print('Maximum number of evaluations: {0}'.format(self.n_iter)) + print('Strategy: {0}'.format(controller.strategy.__class__.__name__)) + print('Experimental design: {0}'.format(slhd.__class__.__name__)) + print('Surrogate: {0}'.format(rbf.__class__.__name__)) # Run the optimization strategy result = controller.run() diff --git a/src/test/test_basic.py b/src/test/test_basic.py index 427723c..861ffe3 100644 --- a/src/test/test_basic.py +++ b/src/test/test_basic.py @@ -1,19 +1,22 @@ +# -*- coding: utf-8 -*- +"""Tests for SurrogateSearchCV.""" + from time import time from sklearn.datasets import load_digits from sklearn.ensemble import RandomForestClassifier - from sklearn_surrogatesearchcv import SurrogateSearchCV def test_basic(): + """A simple end-to-end test case.""" digits = load_digits() X, y = digits.data, digits.target clf = RandomForestClassifier(n_estimators=5) param_def = [ { - 'name': "max_depth", + 'name': 'max_depth', 'integer': True, 'lb': 3, 'ub': 6, @@ -38,10 +41,10 @@ def test_basic(): start = time() surrogate_search.fit(X, y) - print("SurrogateSearchCV took %.2f seconds for %d candidates" - " parameter settings." % ((time() - start), n_iter_search)) - print("Best score is {}".format(surrogate_search.best_score_)) - print("Best params are {}".format(surrogate_search.best_params_)) + print('SurrogateSearchCV took {0:.2f} seconds for {1} candidates' + ' parameter settings.'.format((time() - start), n_iter_search)) + print('Best score is {0}'.format(surrogate_search.best_score_)) + print('Best params are {0}'.format(surrogate_search.best_params_)) assert len(surrogate_search.params_history_) == n_iter_search assert len(surrogate_search.score_history_) == n_iter_search