diff --git a/doc/source/conf.py b/doc/source/conf.py index 848b26c1493b..27d0c1200d9c 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -56,7 +56,7 @@ "ray.core.generated.TablePubsub",] for mod_name in MOCK_MODULES: sys.modules[mod_name] = mock.Mock() -# ray.rllib.models.action_dist.py and +# ray.rllib.models.action_dist.py and # ray.rllib.models.lstm.py will use tf.VERSION sys.modules["tensorflow"].VERSION = "9.9.9" diff --git a/doc/source/hyperband.rst b/doc/source/hyperband.rst deleted file mode 100644 index 1b3c7bf4dd8d..000000000000 --- a/doc/source/hyperband.rst +++ /dev/null @@ -1,99 +0,0 @@ -HyperBand and Early Stopping -============================ - -Ray Tune includes distributed implementations of early stopping algorithms such as `Median Stopping Rule `__, `HyperBand `__, and an `asynchronous version of HyperBand `__. These algorithms are very resource efficient and can outperform Bayesian Optimization methods in `many cases `__. - -Asynchronous HyperBand ----------------------- - -The `asynchronous version of HyperBand `__ scheduler can be plugged in on top of an existing grid or random search. This can be done by setting the ``scheduler`` parameter of ``run_experiments``, e.g. - -.. code-block:: python - - run_experiments({...}, scheduler=AsyncHyperBandScheduler()) - -Compared to the original version of HyperBand, this implementation provides better parallelism and avoids straggler issues during eliminations. An example of this can be found in `async_hyperband_example.py `__. **We recommend using this over the standard HyperBand scheduler.** - -.. autoclass:: ray.tune.async_hyperband.AsyncHyperBandScheduler - -HyperBand ---------- - -.. note:: Note that the HyperBand scheduler requires your trainable to support checkpointing, which is described in `Ray Tune documentation `__. Checkpointing enables the scheduler to multiplex many concurrent trials onto a limited size cluster. - -Ray Tune also implements the `standard version of HyperBand `__. You can use it as such: - -.. code-block:: python - - run_experiments({...}, scheduler=HyperBandScheduler()) - -An example of this can be found in `hyperband_example.py `__. The progress of one such HyperBand run is shown below. - - -:: - - == Status == - Using HyperBand: num_stopped=0 total_brackets=5 - Round #0: - Bracket(n=5, r=100, completed=80%): {'PAUSED': 4, 'PENDING': 1} - Bracket(n=8, r=33, completed=23%): {'PAUSED': 4, 'PENDING': 4} - Bracket(n=15, r=11, completed=4%): {'RUNNING': 2, 'PAUSED': 2, 'PENDING': 11} - Bracket(n=34, r=3, completed=0%): {'RUNNING': 2, 'PENDING': 32} - Bracket(n=81, r=1, completed=0%): {'PENDING': 38} - Resources used: 4/4 CPUs, 0/0 GPUs - Result logdir: ~/ray_results/hyperband_test - PAUSED trials: - - my_class_0_height=99,width=43: PAUSED [pid=11664], 0 s, 100 ts, 97.1 rew - - my_class_11_height=85,width=81: PAUSED [pid=11771], 0 s, 33 ts, 32.8 rew - - my_class_12_height=0,width=52: PAUSED [pid=11785], 0 s, 33 ts, 0 rew - - my_class_19_height=44,width=88: PAUSED [pid=11811], 0 s, 11 ts, 5.47 rew - - my_class_27_height=96,width=84: PAUSED [pid=11840], 0 s, 11 ts, 12.5 rew - ... 5 more not shown - PENDING trials: - - my_class_10_height=12,width=25: PENDING - - my_class_13_height=90,width=45: PENDING - - my_class_14_height=69,width=45: PENDING - - my_class_15_height=41,width=11: PENDING - - my_class_16_height=57,width=69: PENDING - ... 81 more not shown - RUNNING trials: - - my_class_23_height=75,width=51: RUNNING [pid=11843], 0 s, 1 ts, 1.47 rew - - my_class_26_height=16,width=48: RUNNING - - my_class_31_height=40,width=10: RUNNING - - my_class_53_height=28,width=96: RUNNING - -.. autoclass:: ray.tune.hyperband.HyperBandScheduler - - -HyperBand Implementation Details -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Implementation details may deviate slightly from theory but are focused on increasing usability. Note: ``R``, ``s_max``, and ``eta`` are parameters of HyperBand given by the paper. See `this post `_ for context. - -1. Both ``s_max`` (representing the ``number of brackets - 1``) and ``eta``, representing the downsampling rate, are fixed. In many practical settings, ``R``, which represents some resource unit and often the number of training iterations, can be set reasonably large, like ``R >= 200``. For simplicity, assume ``eta = 3``. Varying ``R`` between ``R = 200`` and ``R = 1000`` creates a huge range of the number of trials needed to fill up all brackets. - -.. image:: images/hyperband_bracket.png - -On the other hand, holding ``R`` constant at ``R = 300`` and varying ``eta`` also leads to HyperBand configurations that are not very intuitive: - -.. image:: images/hyperband_eta.png - -The implementation takes the same configuration as the example given in the paper and exposes ``max_t``, which is not a parameter in the paper. - -2. The example in the `post `_ to calculate ``n_0`` is actually a little different than the algorithm given in the paper. In this implementation, we implement ``n_0`` according to the paper (which is `n` in the below example): - -.. image:: images/hyperband_allocation.png - - -3. There are also implementation specific details like how trials are placed into brackets which are not covered in the paper. This implementation places trials within brackets according to smaller bracket first - meaning that with low number of trials, there will be less early stopping. - -Median Stopping Rule --------------------- - -The Median Stopping Rule implements the simple strategy of stopping a trial if its performance falls below the median of other trials at similar points in time. You can set the ``scheduler`` parameter as such: - -.. code-block:: python - - run_experiments({...}, scheduler=MedianStoppingRule()) - -.. autoclass:: ray.tune.median_stopping_rule.MedianStoppingRule diff --git a/doc/source/images/tune.png b/doc/source/images/tune.png new file mode 100644 index 000000000000..febd6de282e1 Binary files /dev/null and b/doc/source/images/tune.png differ diff --git a/doc/source/index.rst b/doc/source/index.rst index 5f0e0c215d2b..b71987108be0 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -40,11 +40,11 @@ View the `codebase on GitHub`_. Ray comes with libraries that accelerate deep learning and reinforcement learning development: -- `Ray Tune`_: Hyperparameter Optimization Framework -- `Ray RLlib`_: Scalable Reinforcement Learning +- `Tune`_: Scalable Hyperparameter Search +- `RLlib`_: Scalable Reinforcement Learning -.. _`Ray Tune`: tune.html -.. _`Ray RLlib`: rllib.html +.. _`Tune`: tune.html +.. _`RLlib`: rllib.html .. toctree:: @@ -67,12 +67,13 @@ Ray comes with libraries that accelerate deep learning and reinforcement learnin .. toctree:: :maxdepth: 1 - :caption: Ray Tune + :caption: Tune tune.rst - tune-config.rst - hyperband.rst - pbt.rst + tune-usage.rst + tune-schedulers.rst + tune-searchalg.rst + tune-package-ref.rst .. toctree:: :maxdepth: 1 diff --git a/doc/source/pbt.rst b/doc/source/pbt.rst deleted file mode 100644 index 5a5a4858816e..000000000000 --- a/doc/source/pbt.rst +++ /dev/null @@ -1,33 +0,0 @@ -Population Based Training -========================= - -Ray Tune includes a distributed implementation of `Population Based Training (PBT) `__. - - -PBT Scheduler -------------- - -Ray Tune's PBT scheduler can be plugged in on top of an existing grid or random search experiment. This can be enabled by setting the ``scheduler`` parameter of ``run_experiments``, e.g. - -.. code-block:: python - - run_experiments( - {...}, - scheduler=PopulationBasedTraining( - time_attr='time_total_s', - reward_attr='mean_accuracy', - perturbation_interval=600.0, - hyperparam_mutations={ - "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5], - "alpha": lambda: random.uniform(0.0, 1.0), - ... - })) - -When the PBT scheduler is enabled, each trial variant is treated as a member of the population. Periodically, top-performing trials are checkpointed (this requires your Trainable to support `checkpointing `__). Low-performing trials clone the checkpoints of top performers and perturb the configurations in the hope of discovering an even better variation. - -You can run this `toy PBT example `__ to get an idea of how how PBT operates. When training in PBT mode, a single trial may see many different hyperparameters over its lifetime, which is recorded in its ``result.json`` file. The following figure generated by the example shows PBT discovering new hyperparams over the course of a single experiment: - -.. image:: pbt.png - -.. autoclass:: ray.tune.pbt.PopulationBasedTraining - diff --git a/doc/source/rllib-training.rst b/doc/source/rllib-training.rst index 88f72afd5e85..e40f734b9104 100644 --- a/doc/source/rllib-training.rst +++ b/doc/source/rllib-training.rst @@ -115,11 +115,12 @@ Here is an example of the basic usage: checkpoint = agent.save() print("checkpoint saved at", checkpoint) + .. note:: It's recommended that you run RLlib agents with `Tune `__, for easy experiment management and visualization of results. Just set ``"run": AGENT_NAME, "env": ENV_NAME`` in the experiment config. -All RLlib agents are compatible with the `Tune API `__. This enables them to be easily used in experiments with `Tune `__. For example, the following code performs a simple hyperparam sweep of PPO: +All RLlib agents are compatible with the `Tune API `__. This enables them to be easily used in experiments with `Tune `__. For example, the following code performs a simple hyperparam sweep of PPO: .. code-block:: python diff --git a/doc/source/tune-config.rst b/doc/source/tune-config.rst deleted file mode 100644 index 2dba809a946c..000000000000 --- a/doc/source/tune-config.rst +++ /dev/null @@ -1,84 +0,0 @@ -Experiment Configuration -======================== - - -Experiment Setup ----------------- - -There are two ways to setup an experiment - one via Python and one via JSON. - -The first is to create an Experiment object. You can then pass in either -a single experiment or a list of experiments to `run_experiments`, as follows: - -.. code-block:: python - - # Single experiment - run_experiments(Experiment(...)) - - # Multiple experiments - run_experiments([Experiment(...), Experiment(...), ...]) - -.. autoclass:: ray.tune.Experiment - -An example of this can be found in `hyperband_example.py `__. - -Alternatively, you can pass in a Python dict. This uses the same fields as -the `ray.tune.Experiment`, except the experiment name is the key of the top level -dictionary. - -.. code-block:: python - - run_experiments({ - "my_experiment_name": { - "run": "my_func", - "trial_resources": { "cpu": 1, "gpu": 0 }, - "stop": { "mean_accuracy": 100 }, - "config": { - "alpha": tune.grid_search([0.2, 0.4, 0.6]), - "beta": tune.grid_search([1, 2]), - }, - "upload_dir": "s3://your_bucket/path", - "local_dir": "~/ray_results", - "max_failures": 2 - } - }) - -An example of this can be found in `async_hyperband_example.py `__. - - -Trial Variant Generation ------------------------- - -In the above example, we specified a grid search over two parameters using the ``tune.grid_search`` helper function. Ray Tune also supports sampling parameters from user-specified lambda functions, which can be used in combination with grid search. - -The following shows grid search over two nested parameters combined with random sampling from two lambda functions. Note that the value of ``beta`` depends on the value of ``alpha``, which is represented by referencing ``spec.config.alpha`` in the lambda function. This lets you specify conditional parameter distributions. - -.. code-block:: python - - "config": { - "alpha": lambda spec: np.random.uniform(100), - "beta": lambda spec: spec.config.alpha * np.random.normal(), - "nn_layers": [ - tune.grid_search([16, 64, 256]), - tune.grid_search([16, 64, 256]), - ], - }, - "repeat": 10, - -By default, each random variable and grid search point is sampled once. To take multiple random samples or repeat grid search runs, add ``repeat: N`` to the experiment config. E.g. in the above, ``"repeat": 10`` repeats the 3x3 grid search 10 times, for a total of 90 trials, each with randomly sampled values of ``alpha`` and ``beta``. - -.. note:: - - Lambda functions will be evaluated during trial variant generation. If you need to pass a literal function in your config, use ``tune.function(...)`` to escape it. - -For more information on variant generation, see `basic_variant.py `__. - -Resource Allocation -------------------- - -Ray Tune runs each trial as a Ray actor, allocating the specified GPU and CPU ``trial_resources`` to each actor (defaulting to 1 CPU per trial). A trial will not be scheduled unless at least that amount of resources is available in the cluster, preventing the cluster from being overloaded. - -If GPU resources are not requested, the ``CUDA_VISIBLE_DEVICES`` environment variable will be set as empty, disallowing GPU access. -Otherwise, it will be set to the GPUs in the list (this is managed by Ray). - -If your trainable function / class creates further Ray actors or tasks that also consume CPU / GPU resources, you will also want to set ``extra_cpu`` or ``extra_gpu`` to reserve extra resource slots for the actors you will create. For example, if a trainable class requires 1 GPU itself, but will launch 4 actors each using another GPU, then it should set ``"gpu": 1, "extra_gpu": 4``. diff --git a/doc/source/tune-package-ref.rst b/doc/source/tune-package-ref.rst new file mode 100644 index 000000000000..d6f13cd98155 --- /dev/null +++ b/doc/source/tune-package-ref.rst @@ -0,0 +1,33 @@ +Tune Package Reference +======================= + +ray.tune +-------- + +.. automodule:: ray.tune + :members: + :exclude-members: TuneError, Trainable + +.. autoclass:: ray.tune.Trainable + :members: + :private-members: + +ray.tune.schedulers +------------------- + +.. automodule:: ray.tune.schedulers + :members: + :show-inheritance: + +ray.tune.suggest +---------------- + +.. automodule:: ray.tune.suggest + :members: + :exclude-members: function, grid_search, SuggestionAlgorithm + :show-inheritance: + +.. autoclass:: ray.tune.suggest.SuggestionAlgorithm + :members: + :private-members: + :show-inheritance: diff --git a/doc/source/tune-schedulers.rst b/doc/source/tune-schedulers.rst new file mode 100644 index 000000000000..74399836ea29 --- /dev/null +++ b/doc/source/tune-schedulers.rst @@ -0,0 +1,150 @@ +Tune Trial Schedulers +===================== + +By default, Tune schedules trials in serial order with the ``FIFOScheduler`` class. However, you can also specify a custom scheduling algorithm that can early stop trials or perturb parameters. + +.. code-block:: python + + tune.run_experiments({...}, scheduler=AsyncHyperBandScheduler()) + +Tune includes distributed implementations of early stopping algorithms such as `Median Stopping Rule `__, `HyperBand `__, and an `asynchronous version of HyperBand `__. These algorithms are very resource efficient and can outperform Bayesian Optimization methods in `many cases `__. Currently, all schedulers take in a ``reward_attr``, which is assumed to be maximized. + +Current Available Trial Schedulers: + +.. contents:: + :local: + :backlinks: none + + +Population Based Training (PBT) +------------------------------- + +Tune includes a distributed implementation of `Population Based Training (PBT) `__. This can be enabled by setting the ``scheduler`` parameter of ``run_experiments``, e.g. + +.. code-block:: python + + pbt_scheduler = PopulationBasedTraining( + time_attr='time_total_s', + reward_attr='mean_accuracy', + perturbation_interval=600.0, + hyperparam_mutations={ + "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5], + "alpha": lambda: random.uniform(0.0, 1.0), + ... + }) + run_experiments({...}, scheduler=pbt_scheduler) + +When the PBT scheduler is enabled, each trial variant is treated as a member of the population. Periodically, top-performing trials are checkpointed (this requires your Trainable to support `checkpointing `__). Low-performing trials clone the checkpoints of top performers and perturb the configurations in the hope of discovering an even better variation. + +You can run this `toy PBT example `__ to get an idea of how how PBT operates. When training in PBT mode, a single trial may see many different hyperparameters over its lifetime, which is recorded in its ``result.json`` file. The following figure generated by the example shows PBT discovering new hyperparams over the course of a single experiment: + +.. image:: pbt.png + +.. autoclass:: ray.tune.schedulers.PopulationBasedTraining + :noindex: + +Asynchronous HyperBand +---------------------- + +The `asynchronous version of HyperBand `__ scheduler can be used by setting the ``scheduler`` parameter of ``run_experiments``, e.g. + +.. code-block:: python + + async_hb_scheduler = AsyncHyperBandScheduler( + time_attr='training_iteration', + reward_attr='episode_reward_mean', + max_t=100, + grace_period=10, + reduction_factor=3, + brackets=3) + run_experiments({...}, scheduler=async_hb_scheduler) + +Compared to the original version of HyperBand, this implementation provides better parallelism and avoids straggler issues during eliminations. An example of this can be found in `async_hyperband_example.py `__. **We recommend using this over the standard HyperBand scheduler.** + +.. autoclass:: ray.tune.schedulers.AsyncHyperBandScheduler + :noindex: + +HyperBand +--------- + +.. note:: Note that the HyperBand scheduler requires your trainable to support checkpointing, which is described in `Tune User Guide `__. Checkpointing enables the scheduler to multiplex many concurrent trials onto a limited size cluster. + +Tune also implements the `standard version of HyperBand `__. You can use it as such: + +.. code-block:: python + + run_experiments({...}, scheduler=HyperBandScheduler()) + +An example of this can be found in `hyperband_example.py `__. The progress of one such HyperBand run is shown below. + + +:: + + == Status == + Using HyperBand: num_stopped=0 total_brackets=5 + Round #0: + Bracket(n=5, r=100, completed=80%): {'PAUSED': 4, 'PENDING': 1} + Bracket(n=8, r=33, completed=23%): {'PAUSED': 4, 'PENDING': 4} + Bracket(n=15, r=11, completed=4%): {'RUNNING': 2, 'PAUSED': 2, 'PENDING': 11} + Bracket(n=34, r=3, completed=0%): {'RUNNING': 2, 'PENDING': 32} + Bracket(n=81, r=1, completed=0%): {'PENDING': 38} + Resources used: 4/4 CPUs, 0/0 GPUs + Result logdir: ~/ray_results/hyperband_test + PAUSED trials: + - my_class_0_height=99,width=43: PAUSED [pid=11664], 0 s, 100 ts, 97.1 rew + - my_class_11_height=85,width=81: PAUSED [pid=11771], 0 s, 33 ts, 32.8 rew + - my_class_12_height=0,width=52: PAUSED [pid=11785], 0 s, 33 ts, 0 rew + - my_class_19_height=44,width=88: PAUSED [pid=11811], 0 s, 11 ts, 5.47 rew + - my_class_27_height=96,width=84: PAUSED [pid=11840], 0 s, 11 ts, 12.5 rew + ... 5 more not shown + PENDING trials: + - my_class_10_height=12,width=25: PENDING + - my_class_13_height=90,width=45: PENDING + - my_class_14_height=69,width=45: PENDING + - my_class_15_height=41,width=11: PENDING + - my_class_16_height=57,width=69: PENDING + ... 81 more not shown + RUNNING trials: + - my_class_23_height=75,width=51: RUNNING [pid=11843], 0 s, 1 ts, 1.47 rew + - my_class_26_height=16,width=48: RUNNING + - my_class_31_height=40,width=10: RUNNING + - my_class_53_height=28,width=96: RUNNING + +.. autoclass:: ray.tune.schedulers.HyperBandScheduler + :noindex: + + +HyperBand Implementation Details +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Implementation details may deviate slightly from theory but are focused on increasing usability. Note: ``R``, ``s_max``, and ``eta`` are parameters of HyperBand given by the paper. See `this post `_ for context. + +1. Both ``s_max`` (representing the ``number of brackets - 1``) and ``eta``, representing the downsampling rate, are fixed. In many practical settings, ``R``, which represents some resource unit and often the number of training iterations, can be set reasonably large, like ``R >= 200``. For simplicity, assume ``eta = 3``. Varying ``R`` between ``R = 200`` and ``R = 1000`` creates a huge range of the number of trials needed to fill up all brackets. + +.. image:: images/hyperband_bracket.png + +On the other hand, holding ``R`` constant at ``R = 300`` and varying ``eta`` also leads to HyperBand configurations that are not very intuitive: + +.. image:: images/hyperband_eta.png + +The implementation takes the same configuration as the example given in the paper and exposes ``max_t``, which is not a parameter in the paper. + +2. The example in the `post `_ to calculate ``n_0`` is actually a little different than the algorithm given in the paper. In this implementation, we implement ``n_0`` according to the paper (which is `n` in the below example): + +.. image:: images/hyperband_allocation.png + + +3. There are also implementation specific details like how trials are placed into brackets which are not covered in the paper. This implementation places trials within brackets according to smaller bracket first - meaning that with low number of trials, there will be less early stopping. + +Median Stopping Rule +-------------------- + +The Median Stopping Rule implements the simple strategy of stopping a trial if its performance falls below the median of other trials at similar points in time. You can set the ``scheduler`` parameter as such: + +.. code-block:: python + + run_experiments({...}, scheduler=MedianStoppingRule()) + +.. autoclass:: ray.tune.schedulers.MedianStoppingRule + :noindex: + diff --git a/doc/source/tune-searchalg.rst b/doc/source/tune-searchalg.rst new file mode 100644 index 000000000000..97e8ce1bc295 --- /dev/null +++ b/doc/source/tune-searchalg.rst @@ -0,0 +1,70 @@ +Tune Search Algorithms +====================== + +Tune provides various hyperparameter search algorithms to efficiently optimize your model. Tune allows you to use different search algorithms in combination with different trial schedulers. Tune will by default implicitly use the Variant Generation algorithm to create trials. + +You can utilize these search algorithms as follows: + +.. code-block:: python + + run_experiments(experiments, search_alg=SearchAlgorithm(...)) + +Currently, Tune offers the following search algorithms: + +- `Grid Search and Random Search `__ +- `HyperOpt `__ + + +Variant Generation (Grid Search/Random Search) +---------------------------------------------- + +By default, Tune uses the `default search space and variant generation process `__ to create and queue trials. This supports random search and grid search as specified by the ``config`` parameter of the Experiment. + +.. autoclass:: ray.tune.suggest.BasicVariantGenerator + :show-inheritance: + :noindex: + + +HyperOpt Search (Tree-structured Parzen Estimators) +--------------------------------------------------- + +The ``HyperOptSearch`` is a SearchAlgorithm that is backed by `HyperOpt `__ to perform sequential model-based hyperparameter optimization. +In order to use this search algorithm, you will need to install HyperOpt via the following command: + +.. code-block:: bash + + $ pip install --upgrade git+git://github.com/hyperopt/hyperopt.git + +This algorithm requires using the `HyperOpt search space specification `__. You can use HyperOptSearch like follows: + +.. code-block:: python + + run_experiments(experiment_config, search_alg=HyperOptSearch(hyperopt_space, ... )) + +An example of this can be found in `hyperopt_example.py `__. + +.. autoclass:: ray.tune.suggest.HyperOptSearch + :show-inheritance: + :noindex: + + +Contributing a New Algorithm +---------------------------- + +If you are interested in implementing or contributing a new Search Algorithm, the API is straightforward: + +.. autoclass:: ray.tune.suggest.SearchAlgorithm + :members: + :noindex: + +Model-Based Suggestion Algorithms +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Often times, hyperparameter search algorithms are model-based and may be quite simple to implement. For this, one can extend the following abstract class and implement ``on_trial_result``, ``on_trial_complete``, and ``_suggest``. The abstract class will take care of Tune-specific boilerplate such as creating Trials and queuing trials: + +.. autoclass:: ray.tune.suggest.SuggestionAlgorithm + :show-inheritance: + :noindex: + + .. automethod:: ray.tune.suggest.SuggestionAlgorithm._suggest + :noindex: diff --git a/doc/source/tune-usage.rst b/doc/source/tune-usage.rst new file mode 100644 index 000000000000..b3be494a83d9 --- /dev/null +++ b/doc/source/tune-usage.rst @@ -0,0 +1,365 @@ +Tune User Guide +=============== + +Tune Overview +------------- + +.. image:: images/tune-api.svg + +Tune schedules a number of *trials* in a cluster. Each trial runs a user-defined Python function or class and is parameterized either by a *config* variation from Tune's Variant Generator or a user-specified **search algorithm**. The trials are scheduled and managed by a **trial scheduler**. + +More information about Tune's `search algorithms can be found here `__. + +More information about Tune's `trial schedulers can be found here `__. + + +Start by installing, importing, and initializing Ray. + +.. code-block:: python + + import ray + import ray.tune as tune + + ray.init() + +Tune provides a ``run_experiments`` function that generates and runs the trials as described by the `experiment specification `__. + +.. autofunction:: ray.tune.run_experiments + :noindex: + +This function will report status on the command line until all Trials stop: + +:: + + == Status == + Using FIFO scheduling algorithm. + Resources used: 4/8 CPUs, 0/0 GPUs + Result logdir: ~/ray_results/my_experiment + - train_func_0_lr=0.2,momentum=1: RUNNING [pid=6778], 209 s, 20604 ts, 7.29 acc + - train_func_1_lr=0.4,momentum=1: RUNNING [pid=6780], 208 s, 20522 ts, 53.1 acc + - train_func_2_lr=0.6,momentum=1: TERMINATED [pid=6789], 21 s, 2190 ts, 100 acc + - train_func_3_lr=0.2,momentum=2: RUNNING [pid=6791], 208 s, 41004 ts, 8.37 acc + - train_func_4_lr=0.4,momentum=2: RUNNING [pid=6800], 209 s, 41204 ts, 70.1 acc + - train_func_5_lr=0.6,momentum=2: TERMINATED [pid=6809], 10 s, 2164 ts, 100 acc + + +Experiment Configuration +------------------------ + +Specifying Experiments +~~~~~~~~~~~~~~~~~~~~~~ + +There are two ways to specify the configuration for an experiment - one via Python and one via JSON. + +**Using Python**: specify a configuration is to create an Experiment object. + +.. autoclass:: ray.tune.Experiment + :noindex: + +An example of this can be found in `hyperband_example.py `__. + +**Using JSON/Dict**: This uses the same fields as the ``ray.tune.Experiment``, except the experiment name is the key of the top level +dictionary. Tune will convert the dict into an ``ray.tune.Experiment`` object. + +.. code-block:: python + + experiment_spec = { + "my_experiment_name": { + "run": my_func, + "stop": { "mean_accuracy": 100 }, + "config": { + "alpha": tune.grid_search([0.2, 0.4, 0.6]), + "beta": tune.grid_search([1, 2]), + }, + "trial_resources": { "cpu": 1, "gpu": 0 }, + "repeat": 10, + "local_dir": "~/ray_results", + "upload_dir": "s3://your_bucket/path", + "checkpoint_freq": 10, + "max_failures": 2 + } + } + run_experiments(experiment_spec) + + +An example of this can be found in `async_hyperband_example.py `__. + +Model API +~~~~~~~~~ + +You can either pass in a Python function or Python class for model training as follows, each requiring a specific signature/interface: + +.. code-block:: python + :emphasize-lines: 3,8 + + experiment_spec = { + "my_experiment_name": { + "run": my_trainable + } + } + + # or with the Experiment API + experiment_spec = Experiment("my_experiment_name", my_trainable) + + run_experiments(experiments=experiment_spec) + + +**Python functions** will need to have the following signature: + +.. code-block:: python + + def trainable(config, reporter): + """ + Args: + config (dict): Parameters provided from the search algorithm + or variant generation. + reporter (Reporter): Handle to report intermediate metrics to Tune. + """ + +Tune will run this function on a separate thread in a Ray actor process. Note that trainable functions are not checkpointable, since they never return control back to their caller. See `Trial Checkpointing for more details `__. + +.. note:: + If you have a lambda function that you want to train, you will need to first register the function: ``tune.register_trainable("lambda_id", lambda x: ...)``. You can then use ``lambda_id`` in place of ``my_trainable``. + +**Python classes** passed into Tune will need to subclass ``ray.tune.Trainable``. + +.. autoclass:: ray.tune.Trainable + :members: __init__, _save, _restore, _train, _setup, _stop + :noindex: + + +Tune Search Space (Default) +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +You can use ``tune.grid_search`` to specify an axis of a grid search. By default, Tune also supports sampling parameters from user-specified lambda functions, which can be used independently or in combination with grid search. + +The following shows grid search over two nested parameters combined with random sampling from two lambda functions, generating 9 different trials. Note that the value of ``beta`` depends on the value of ``alpha``, which is represented by referencing ``spec.config.alpha`` in the lambda function. This lets you specify conditional parameter distributions. + +.. code-block:: python + :emphasize-lines: 4-11 + + run_experiments({ + "my_experiment_name": { + "run": my_trainable, + "config": { + "alpha": lambda spec: np.random.uniform(100), + "beta": lambda spec: spec.config.alpha * np.random.normal(), + "nn_layers": [ + tune.grid_search([16, 64, 256]), + tune.grid_search([16, 64, 256]), + ], + } + } + }) + + +.. note:: + Lambda functions will be evaluated during trial variant generation. If you need to pass a literal function in your config, use ``tune.function(...)`` to escape it. + +.. warning:: + If you specify a Search Algorithm, you may not be able to use this feature, as the algorithm may require a different search space declaration. + +For more information on variant generation, see `basic_variant.py `__. + +Sampling Multiple Times +~~~~~~~~~~~~~~~~~~~~~~~ + +By default, each random variable and grid search point is sampled once. To take multiple random samples or repeat grid search runs, add ``repeat: N`` to the experiment config. + +.. code-block:: python + :emphasize-lines: 12 + + run_experiments({ + "my_experiment_name": { + "run": my_trainable, + "config": { + "alpha": lambda spec: np.random.uniform(100), + "beta": lambda spec: spec.config.alpha * np.random.normal(), + "nn_layers": [ + tune.grid_search([16, 64, 256]), + tune.grid_search([16, 64, 256]), + ], + }, + "repeat": 10 + } + }) + +E.g. in the above, ``"repeat": 10`` repeats the 3x3 grid search 10 times, for a total of 90 trials, each with randomly sampled values of ``alpha`` and ``beta``. + + +Using GPUs (Resource Allocation) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Tune will allocate the specified GPU and CPU ``trial_resources`` to each individual trial (defaulting to 1 CPU per trial). Under the hood, Tune runs each trial as a Ray actor, using Ray's resource handling to allocate resources and place actors. A trial will not be scheduled unless at least that amount of resources is available in the cluster, preventing the cluster from being overloaded. + +If GPU resources are not requested, the ``CUDA_VISIBLE_DEVICES`` environment variable will be set as empty, disallowing GPU access. +Otherwise, it will be set to the GPUs in the list (this is managed by Ray). + +If your trainable function / class creates further Ray actors or tasks that also consume CPU / GPU resources, you will also want to set ``extra_cpu`` or ``extra_gpu`` to reserve extra resource slots for the actors you will create. For example, if a trainable class requires 1 GPU itself, but will launch 4 actors each using another GPU, then it should set ``"gpu": 1, "extra_gpu": 4``. + +.. code-block:: python + :emphasize-lines: 4-8 + + run_experiments({ + "my_experiment_name": { + "run": my_trainable, + "trial_resources": { + "cpu": 1, + "gpu": 1, + "extra_gpu": 4 + } + } + }) + + +Trial Checkpointing +~~~~~~~~~~~~~~~~~~~ + +To enable checkpointing, you must implement a `Trainable class `__ (Trainable functions are not checkpointable, since they never return control back to their caller). The easiest way to do this is to subclass the pre-defined ``Trainable`` class and implement its ``_train``, ``_save``, and ``_restore`` abstract methods `(example) `__. Implementing this interface is required to support resource multiplexing in Trial Schedulers such as HyperBand and PBT. + +For TensorFlow model training, this would look something like this `(full tensorflow example) `__: + +.. code-block:: python + + class MyClass(Trainable): + def _setup(self): + self.saver = tf.train.Saver() + self.sess = ... + self.iteration = 0 + + def _train(self): + self.sess.run(...) + self.iteration += 1 + + def _save(self, checkpoint_dir): + return self.saver.save( + self.sess, checkpoint_dir + "/save", + global_step=self.iteration) + + def _restore(self, path): + return self.saver.restore(self.sess, path) + + +Additionally, checkpointing can be used to provide fault-tolerance for experiments. This can be enabled by setting ``checkpoint_freq: N`` and ``max_failures: M`` to checkpoint trials every *N* iterations and recover from up to *M* crashes per trial, e.g.: + +.. code-block:: python + :emphasize-lines: 4,5 + + run_experiments({ + "my_experiment_name": { + "run": my_trainable + "checkpoint_freq": 10, + "max_failures": 5, + }, + }) + + +Handling Large Datasets +----------------------- + +You often will want to compute a large object (e.g., training data, model weights) on the driver and use that object within each trial. Tune provides a ``pin_in_object_store`` utility function that can be used to broadcast such large objects. Objects pinned in this way will never be evicted from the Ray object store while the driver process is running, and can be efficiently retrieved from any task via ``get_pinned_object``. + +.. code-block:: python + + import ray + from ray.tune import run_experiments + from ray.tune.util import pin_in_object_store, get_pinned_object + + import numpy as np + + ray.init() + + # X_id can be referenced in closures + X_id = pin_in_object_store(np.random.random(size=100000000)) + + def f(config, reporter): + X = get_pinned_object(X_id) + # use X + + run_experiments({ + "my_experiment_name": { + "run": f + } + }) + + +Logging and Visualizing Results +------------------------------- + +All results reported by the trainable will be logged locally to a unique directory per experiment, e.g. ``~/ray_results/my_experiment`` in the above example. On a cluster, incremental results will be synced to local disk on the head node. The log records are compatible with a number of visualization tools: + +To visualize learning in tensorboard, install TensorFlow: + +.. code-block:: bash + + $ pip install tensorflow + +Then, after you run a experiment, you can visualize your experiment with TensorBoard by specifying the output directory of your results. Note that if you running Ray on a remote cluster, you can forward the tensorboard port to your local machine through SSH using ``ssh -L 6006:localhost:6006
``: + +.. code-block:: bash + + $ tensorboard --logdir=~/ray_results/my_experiment + +.. image:: ray-tune-tensorboard.png + +To use rllab's VisKit (you may have to install some dependencies), run: + +.. code-block:: bash + + $ git clone https://github.com/rll/rllab.git + $ python rllab/rllab/viskit/frontend.py ~/ray_results/my_experiment + +.. image:: ray-tune-viskit.png + +Finally, to view the results with a `parallel coordinates visualization `__, open `ParallelCoordinatesVisualization.ipynb `__ as follows and run its cells: + +.. code-block:: bash + + $ cd $RAY_HOME/python/ray/tune + $ jupyter-notebook ParallelCoordinatesVisualization.ipynb + +.. image:: ray-tune-parcoords.png + + +Client API +---------- + +You can modify an ongoing experiment by adding or deleting trials using the Tune Client API. To do this, verify that you have the ``requests`` library installed: + +.. code-block:: bash + + $ pip install requests + +To use the Client API, you can start your experiment with ``with_server=True``: + +.. code-block:: python + + run_experiments({...}, with_server=True, server_port=4321) + +Then, on the client side, you can use the following class. The server address defaults to ``localhost:4321``. If on a cluster, you may want to forward this port (e.g. ``ssh -L :localhost:
``) so that you can use the Client on your local machine. + +.. autoclass:: ray.tune.web_server.TuneClient + :members: + + +For an example notebook for using the Client API, see the `Client API Example `__. + + +Examples +-------- + +You can find a comprehensive of examples `using Tune and its various features here `__, including examples using Keras, TensorFlow, and Population-Based Training. + + +Further Questions or Issues? +---------------------------- + +You can post questions or issues or feedback through the following channels: + +1. `Our Mailing List`_: For discussions about development, questions about + usage, or any general questions and feedback. +2. `GitHub Issues`_: For bug reports and feature requests. + +.. _`Our Mailing List`: https://groups.google.com/forum/#!forum/ray-dev +.. _`GitHub Issues`: https://github.com/ray-project/ray/issues diff --git a/doc/source/tune.rst b/doc/source/tune.rst index 74f36332ff3b..1057ade3a111 100644 --- a/doc/source/tune.rst +++ b/doc/source/tune.rst @@ -1,297 +1,108 @@ -Ray Tune: Hyperparameter Optimization Framework -=============================================== +Tune: Scalable Hyperparameter Search +==================================== -Ray Tune is a scalable hyperparameter optimization framework for reinforcement learning and deep learning. Go from running one experiment on a single machine to running on a large cluster with efficient search algorithms without changing your code. +.. image:: images/tune.png + :scale: 30% + :align: center +Tune is a scalable framework for hyperparameter search with a focus on deep learning and deep reinforcement learning. -Getting Started ---------------- - -Installation -~~~~~~~~~~~~ - -You'll need to first `install ray `__ to import Ray Tune. - -Quick Start -~~~~~~~~~~~ - -.. code-block:: python - - import ray - import ray.tune as tune - - ray.init() - tune.register_trainable("train_func", train_func) - - all_trials = tune.run_experiments({ - "my_experiment": { - "run": "train_func", - "stop": {"mean_accuracy": 99}, - "config": { - "lr": tune.grid_search([0.2, 0.4, 0.6]), - "momentum": tune.grid_search([0.1, 0.2]), - } - } - }) - - -For the function you wish to tune, add a two-line modification (note that we use PyTorch as an example but Ray Tune works with any deep learning framework): - -.. code-block:: python - :emphasize-lines: 1,14 - - def train_func(config, reporter): # add a reporter arg - model = NeuralNet() - optimizer = torch.optim.SGD( - model.parameters(), lr=config["lr"], momentum=config["momentum"]) - dataset = ( ... ) - - for idx, (data, target) in enumerate(dataset): - # ... - output = model(data) - loss = F.MSELoss(output, target) - loss.backward() - optimizer.step() - accuracy = eval_accuracy(...) - reporter(timesteps_total=idx, mean_accuracy=accuracy) # report metrics - -This PyTorch script runs a small grid search over the ``train_func`` function using Ray Tune, reporting status on the command line until the stopping condition of ``mean_accuracy >= 99`` is reached: - -:: - - == Status == - Using FIFO scheduling algorithm. - Resources used: 4/8 CPUs, 0/0 GPUs - Result logdir: ~/ray_results/my_experiment - - train_func_0_lr=0.2,momentum=1: RUNNING [pid=6778], 209 s, 20604 ts, 7.29 acc - - train_func_1_lr=0.4,momentum=1: RUNNING [pid=6780], 208 s, 20522 ts, 53.1 acc - - train_func_2_lr=0.6,momentum=1: TERMINATED [pid=6789], 21 s, 2190 ts, 100 acc - - train_func_3_lr=0.2,momentum=2: RUNNING [pid=6791], 208 s, 41004 ts, 8.37 acc - - train_func_4_lr=0.4,momentum=2: RUNNING [pid=6800], 209 s, 41204 ts, 70.1 acc - - train_func_5_lr=0.6,momentum=2: TERMINATED [pid=6809], 10 s, 2164 ts, 100 acc - -In order to report incremental progress, ``train_func`` periodically calls the ``reporter`` function passed in by Ray Tune to return the current timestep and other metrics. Incremental results will be synced to local disk on the head node of the cluster. - -`tune.run_experiments `__ returns a list of Trial objects which you can inspect results of via ``trial.last_result``. - -Learn more `about specifying experiments `__. - +You can find the code for Tune `here on GitHub `__. Features -------- -Ray Tune has the following features: +* Supports any deep learning framework, including PyTorch, Tensorflow, and Keras. -- Scalable implementations of search execution techniques such as `Population Based Training (PBT) `__, `Median Stopping Rule `__, and `HyperBand `__. - -- The ability to combine search execution and search algorithms, such as Model-Based Optimization (HyperOpt) with HyperBand. - -- Integration with visualization tools such as `TensorBoard `__, `rllab's VisKit `__, and a `parallel coordinates visualization `__. - -- Flexible trial variant generation, including grid search, random search, and conditional parameter distributions. - -- Resource-aware scheduling, including support for concurrent runs of algorithms that may themselves be parallel and distributed. - - -Concepts --------- +* Choose among scalable hyperparameter and model search techniques such as: -.. image:: images/tune-api.svg + - `Population Based Training (PBT) `__ -Ray Tune schedules a number of *trials* in a cluster. Each trial runs a user-defined Python function or class and is parameterized by a *config* variation passed to the user code. + - `Median Stopping Rule `__ -In order to run any given function, you need to run ``register_trainable`` to a name. This makes all Ray workers aware of the function. + - `HyperBand `__ -.. autofunction:: ray.tune.register_trainable +* Mix and match different hyperparameter optimization approaches - such as using `HyperOpt with HyperBand`_. -Ray Tune provides a ``run_experiments`` function that generates and runs the trials described by the experiment specification. The trials are scheduled and managed by a *trial scheduler* that implements the search algorithm (default is FIFO). +* Visualize results with `TensorBoard `__, `parallel coordinates (Plot.ly) `__, and `rllab's VisKit `__. -.. autofunction:: ray.tune.run_experiments +* Scale to running on a large distributed cluster without changing your code. -Ray Tune can be used anywhere Ray can, e.g. on your laptop with ``ray.init()`` embedded in a Python script, or in an `auto-scaling cluster `__ for massive parallelism. +* Parallelize training for models with GPU requirements or algorithms that may themselves be parallel and distributed, using Tune's `resource-aware scheduling `__, -You can find the code for Ray Tune `here on GitHub `__. +Take a look at `the User Guide `__ for a comprehensive overview on how to use Tune's features. +Getting Started +--------------- -Trial Schedulers ----------------- - -By default, Ray Tune schedules trials in serial order with the ``FIFOScheduler`` class. However, you can also specify a custom scheduling algorithm that can early stop trials, perturb parameters, or incorporate suggestions from an external service. Currently implemented trial schedulers include -`Population Based Training (PBT) `__, `Median Stopping Rule `__, and `HyperBand `__. - -.. code-block:: python - - run_experiments({...}, scheduler=AsyncHyperBandScheduler()) - -Search Algorithms ------------------ - -Tune allows you to use different search algorithms in combination with different scheduling algorithms. Currently, Tune offers the following search algorithms: - - - Grid search / Random Search - - Tree-structured Parzen Estimators (HyperOpt) - -If you are interested in implementing or contributing a new Search Algorithm, the API is straightforward: - -.. autoclass:: ray.tune.suggest.SearchAlgorithm - +Installation +~~~~~~~~~~~~ -HyperOpt Integration -~~~~~~~~~~~~~~~~~~~~ -The ``HyperOptSearch`` is a SearchAlgorithm that is backed by HyperOpt to perform sequential model-based hyperparameter optimization. -In order to use this search algorithm, you will need to install HyperOpt via the following command: +You'll need to first `install ray `__ to import Tune. .. code-block:: bash - $ pip install --upgrade git+git://github.com/hyperopt/hyperopt.git + pip install ray -An example of this can be found in `hyperopt_example.py `__. - -.. note:: - - The HyperOptScheduler takes an *increasing* metric in the reward attribute. If trying to minimize a loss, be sure to - specify *mean_loss* in the function/class reporting and *reward_attr=neg_mean_loss* in the HyperOptScheduler initializer. - -.. autoclass:: ray.tune.suggest.HyperOptSearch +Quick Start +~~~~~~~~~~~ -Handling Large Datasets ------------------------ +This example runs a small grid search over a neural network training function using Tune, reporting status on the command line until the stopping condition of ``mean_accuracy >= 99`` is reached. Tune works with any deep learning framework. -You often will want to compute a large object (e.g., training data, model weights) on the driver and use that object within each trial. Ray Tune provides a ``pin_in_object_store`` utility function that can be used to broadcast such large objects. Objects pinned in this way will never be evicted from the Ray object store while the driver process is running, and can be efficiently retrieved from any task via ``get_pinned_object``. +Tune uses Ray as a backend, so we will first import and initialize Ray. .. code-block:: python import ray - from ray.tune import register_trainable, run_experiments - from ray.tune.util import pin_in_object_store, get_pinned_object - - import numpy as np + import ray.tune as tune ray.init() - # X_id can be referenced in closures - X_id = pin_in_object_store(np.random.random(size=100000000)) - - def f(config, reporter): - X = get_pinned_object(X_id) - # use X - - register_trainable("f", f) - run_experiments(...) - - - - -Visualizing Results -------------------- - -Ray Tune logs trial results to a unique directory per experiment, e.g. ``~/ray_results/my_experiment`` in the above example. The log records are compatible with a number of visualization tools: - -To visualize learning in tensorboard, install TensorFlow: - -.. code-block:: bash - - $ pip install tensorflow - -Then, after you run a experiment, you can visualize your experiment with TensorBoard by specifying the output directory of your results. Note that if you running Ray on a remote cluster, you can forward the tensorboard port to your local machine through SSH using ``ssh -L 6006:localhost:6006
``: - -.. code-block:: bash - - $ tensorboard --logdir=~/ray_results/my_experiment - -.. image:: ray-tune-tensorboard.png - -To use rllab's VisKit (you may have to install some dependencies), run: - -.. code-block:: bash - - $ git clone https://github.com/rll/rllab.git - $ python rllab/rllab/viskit/frontend.py ~/ray_results/my_experiment - -.. image:: ray-tune-viskit.png - -Finally, to view the results with a `parallel coordinates visualization `__, open `ParallelCoordinatesVisualization.ipynb `__ as follows and run its cells: - -.. code-block:: bash - - $ cd $RAY_HOME/python/ray/tune - $ jupyter-notebook ParallelCoordinatesVisualization.ipynb - -.. image:: ray-tune-parcoords.png - -Trial Checkpointing -------------------- - -To enable checkpointing, you must implement a Trainable class (Trainable functions are not checkpointable, since they never return control back to their caller). The easiest way to do this is to subclass the pre-defined ``Trainable`` class and implement its ``_train``, ``_save``, and ``_restore`` abstract methods `(example) `__: Implementing this interface is required to support resource multiplexing in schedulers such as HyperBand and PBT. - -For TensorFlow model training, this would look something like this `(full tensorflow example) `__: +For the function you wish to tune, pass in a ``reporter`` object: .. code-block:: python + :emphasize-lines: 1,9 - class MyClass(Trainable): - def _setup(self): - self.saver = tf.train.Saver() - self.sess = ... - self.iteration = 0 - - def _train(self): - self.sess.run(...) - self.iteration += 1 - - def _save(self, checkpoint_dir): - return self.saver.save( - self.sess, checkpoint_dir + "/save", - global_step=self.iteration) - - def _restore(self, path): - return self.saver.restore(self.sess, path) + def train_func(config, reporter): # add a reporter arg + model = ( ... ) + optimizer = SGD(model.parameters(), + momentum=config["momentum"]) + dataset = ( ... ) + for idx, (data, target) in enumerate(dataset): + accuracy = model.fit(data, target) + reporter(mean_accuracy=accuracy) # report metrics -Additionally, checkpointing can be used to provide fault-tolerance for experiments. This can be enabled by setting ``checkpoint_freq: N`` and ``max_failures: M`` to checkpoint trials every *N* iterations and recover from up to *M* crashes per trial, e.g.: +**Finally**, configure your search and execute it on your Ray cluster: .. code-block:: python - run_experiments({ + all_trials = tune.run_experiments({ "my_experiment": { - ... - "checkpoint_freq": 10, - "max_failures": 5, - }, + "run": train_func, + "stop": {"mean_accuracy": 99}, + "config": {"momentum": tune.grid_search([0.1, 0.2])} + } }) -The class interface that must be implemented to enable checkpointing is as follows: - -.. autoclass:: ray.tune.trainable.Trainable - :members: _save, _restore, _train, _setup, _stop - - -Client API ----------- - -You can modify an ongoing experiment by adding or deleting trials using the Tune Client API. To do this, verify that you have the ``requests`` library installed: - -.. code-block:: bash - - $ pip install requests - -To use the Client API, you can start your experiment with ``with_server=True``: - -.. code-block:: python +Tune can be used anywhere Ray can, e.g. on your laptop with ``ray.init()`` embedded in a Python script, or in an `auto-scaling cluster `__ for massive parallelism. - run_experiments({...}, with_server=True, server_port=4321) +Citing Tune +----------- -Then, on the client side, you can use the following class. The server address defaults to ``localhost:4321``. If on a cluster, you may want to forward this port (e.g. ``ssh -L :localhost:
``) so that you can use the Client on your local machine. +If Tune helps you in your academic research, you are encouraged to cite `our paper `__. Here is an example bibtex: -.. autoclass:: ray.tune.web_server.TuneClient - :members: +.. code-block:: tex + @article{liaw2018tune, + title={Tune: A Research Platform for Distributed Model Selection and Training}, + author={Liaw, Richard and Liang, Eric and Nishihara, Robert + and Moritz, Philipp and Gonzalez, Joseph E and Stoica, Ion}, + journal={arXiv preprint arXiv:1807.05118}, + year={2018} + } -For an example notebook for using the Client API, see the `Client API Example `__. - - -Examples --------- -You can find a list of examples `using Ray Tune and its various features here `__, including examples using Keras, TensorFlow, and Population-Based Training. +.. _HyperOpt with HyperBand: https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/hyperopt_example.py diff --git a/python/ray/tune/README.rst b/python/ray/tune/README.rst index f569429df4a8..2d7533f56a0f 100644 --- a/python/ray/tune/README.rst +++ b/python/ray/tune/README.rst @@ -1,28 +1,22 @@ -Ray.tune: Hyperparameter Optimization Framework -=============================================== +Tune: Scalable Hyperparameter Search +==================================== -Ray.tune is a hyperparameter tuning framework for long-running tasks such as RL and deep learning training. +Tune is a scalable framework for hyperparameter search with a focus on deep learning and deep reinforcement learning. User documentation can be `found here `__. -Implementation overview ------------------------ -At a high level, Ray.tune takes in JSON experiment configs (e.g. that defines the grid or random search) -and compiles them into a number of `Trial` objects. It schedules trials on the Ray cluster using a given -`TrialScheduler` implementation (e.g. median stopping rule or HyperBand). +Citing Tune +----------- -This is implemented as follows: +If Tune helps you in your academic research, you are encouraged to cite `our paper `__. Here is an example bibtex: -- `variant_generator.py `__ - parses the config and generates the trial variants. +.. code-block:: tex -- `trial.py `__ manages the lifecycle - of the Ray actor responsible for executing the trial. - -- `trial_runner.py `__ tracks scheduling - state for all the trials of an experiment. TrialRunners are usually - created automatically by ``run_experiments(experiment_json)``, which parses and starts the experiments. - -- `trial_scheduler.py `__ - plugs into TrialRunner to implement custom prioritization or early stopping algorithms. + @article{liaw2018tune, + title={Tune: A Research Platform for Distributed Model Selection and Training}, + author={Liaw, Richard and Liang, Eric and Nishihara, Robert and + Moritz, Philipp and Gonzalez, Joseph E and Stoica, Ion}, + journal={arXiv preprint arXiv:1807.05118}, + year={2018} + } diff --git a/python/ray/tune/config_parser.py b/python/ray/tune/config_parser.py index b3d6feddb8ca..f19f980294e7 100644 --- a/python/ray/tune/config_parser.py +++ b/python/ray/tune/config_parser.py @@ -168,12 +168,6 @@ def create_trial_from_spec(spec, output_path, parser, **trial_kwargs): A trial object with corresponding parameters to the specification. """ try: - # Special case the `env` param for RLlib by automatically - # moving it into the `config` section. - if "env" in spec: - spec["config"] = spec.get("config", {}) - spec["config"]["env"] = spec["env"] - del spec["env"] args = parser.parse_args(to_argv(spec)) except SystemExit: raise TuneError("Error parsing args, see above message", spec) diff --git a/python/ray/tune/examples/README.rst b/python/ray/tune/examples/README.rst index 2983e6acf55e..3d35497c8841 100644 --- a/python/ray/tune/examples/README.rst +++ b/python/ray/tune/examples/README.rst @@ -1,4 +1,4 @@ -Ray Tune Examples -================= +Tune Examples +============= -Code examples for various schedulers and Ray Tune features. +Code examples for various schedulers and Tune features. diff --git a/python/ray/tune/examples/async_hyperband_example.py b/python/ray/tune/examples/async_hyperband_example.py index 0ef9d5fd8a74..13d27d9f8f56 100644 --- a/python/ray/tune/examples/async_hyperband_example.py +++ b/python/ray/tune/examples/async_hyperband_example.py @@ -12,9 +12,8 @@ import numpy as np import ray -from ray.tune import Trainable, register_trainable, \ - run_experiments -from ray.tune.async_hyperband import AsyncHyperBandScheduler +from ray.tune import Trainable, run_experiments +from ray.tune.schedulers import AsyncHyperBandScheduler class MyTrainableClass(Trainable): @@ -47,8 +46,6 @@ def _restore(self, checkpoint_path): self.timestep = json.loads(f.read())["timestep"] -register_trainable("my_class", MyTrainableClass) - if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( @@ -69,7 +66,7 @@ def _restore(self, checkpoint_path): run_experiments( { "asynchyperband_test": { - "run": "my_class", + "run": MyTrainableClass, "stop": { "training_iteration": 1 if args.smoke_test else 99999 }, diff --git a/python/ray/tune/examples/hyperband_example.py b/python/ray/tune/examples/hyperband_example.py index fdfec2d1058d..c015b2cd6c69 100755 --- a/python/ray/tune/examples/hyperband_example.py +++ b/python/ray/tune/examples/hyperband_example.py @@ -12,9 +12,8 @@ import numpy as np import ray -from ray.tune import Trainable, register_trainable, \ - run_experiments, Experiment -from ray.tune.hyperband import HyperBandScheduler +from ray.tune import Trainable, run_experiments, Experiment +from ray.tune.schedulers import HyperBandScheduler class MyTrainableClass(Trainable): @@ -47,8 +46,6 @@ def _restore(self, checkpoint_path): self.timestep = json.loads(f.read())["timestep"] -register_trainable("my_class", MyTrainableClass) - if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( @@ -66,7 +63,7 @@ def _restore(self, checkpoint_path): exp = Experiment( name="hyperband_test", - run="my_class", + run=MyTrainableClass, repeat=20, stop={"training_iteration": 1 if args.smoke_test else 99999}, config={ diff --git a/python/ray/tune/examples/hyperopt_example.py b/python/ray/tune/examples/hyperopt_example.py index a589bbc7c5e8..8f5a5c20b841 100644 --- a/python/ray/tune/examples/hyperopt_example.py +++ b/python/ray/tune/examples/hyperopt_example.py @@ -8,7 +8,7 @@ import ray from ray.tune import run_experiments, register_trainable -from ray.tune.async_hyperband import AsyncHyperBandScheduler +from ray.tune.schedulers import AsyncHyperBandScheduler from ray.tune.suggest import HyperOptSearch @@ -52,7 +52,6 @@ def easy_objective(config, reporter): }, } } - algo = HyperOptSearch( - config, space, max_concurrent=4, reward_attr="neg_mean_loss") + algo = HyperOptSearch(space, max_concurrent=4, reward_attr="neg_mean_loss") scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss") - run_experiments(search_alg=algo, scheduler=scheduler) + run_experiments(config, search_alg=algo, scheduler=scheduler) diff --git a/python/ray/tune/examples/pbt_example.py b/python/ray/tune/examples/pbt_example.py index 056be3edd4e7..f0d3f5266455 100755 --- a/python/ray/tune/examples/pbt_example.py +++ b/python/ray/tune/examples/pbt_example.py @@ -11,8 +11,8 @@ import time import ray -from ray.tune import Trainable, register_trainable, run_experiments -from ray.tune.pbt import PopulationBasedTraining +from ray.tune import Trainable, run_experiments +from ray.tune.schedulers import PopulationBasedTraining class MyTrainableClass(Trainable): @@ -54,8 +54,6 @@ def _restore(self, checkpoint_path): self.current_value = data["value"] -register_trainable("my_class", MyTrainableClass) - if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( @@ -79,7 +77,7 @@ def _restore(self, checkpoint_path): run_experiments( { "pbt_test": { - "run": "my_class", + "run": MyTrainableClass, "stop": { "training_iteration": 2 if args.smoke_test else 99999 }, diff --git a/python/ray/tune/examples/pbt_ppo_example.py b/python/ray/tune/examples/pbt_ppo_example.py index 24dd9acff859..b1842bd544ca 100755 --- a/python/ray/tune/examples/pbt_ppo_example.py +++ b/python/ray/tune/examples/pbt_ppo_example.py @@ -14,7 +14,7 @@ import ray from ray.tune import run_experiments -from ray.tune.pbt import PopulationBasedTraining +from ray.tune.schedulers import PopulationBasedTraining if __name__ == "__main__": diff --git a/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py b/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py index 409fc27a73a6..8eba6f301974 100755 --- a/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py +++ b/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py @@ -24,9 +24,8 @@ import ray from ray.tune import grid_search, run_experiments -from ray.tune import register_trainable from ray.tune import Trainable -from ray.tune.pbt import PopulationBasedTraining +from ray.tune.schedulers import PopulationBasedTraining num_classes = 10 @@ -179,9 +178,8 @@ def _stop(self): "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() - register_trainable("train_cifar10", Cifar10Model) train_spec = { - "run": "train_cifar10", + "run": Cifar10Model, "trial_resources": { "cpu": 1, "gpu": 1 diff --git a/python/ray/tune/examples/tune_mnist_async_hyperband.py b/python/ray/tune/examples/tune_mnist_async_hyperband.py index 6e37fc234230..471bcc42c3c8 100755 --- a/python/ray/tune/examples/tune_mnist_async_hyperband.py +++ b/python/ray/tune/examples/tune_mnist_async_hyperband.py @@ -33,7 +33,7 @@ import time import ray -from ray.tune import grid_search, run_experiments, register_trainable +from ray.tune import grid_search, run_experiments from tensorflow.examples.tutorials.mnist import input_data @@ -218,9 +218,8 @@ def train(config={'activation': 'relu'}, reporter=None): '--smoke-test', action='store_true', help='Finish quickly for testing') args, _ = parser.parse_known_args() - register_trainable('train_mnist', train) mnist_spec = { - 'run': 'train_mnist', + 'run': train, 'repeat': 10, 'stop': { 'mean_accuracy': 0.99, @@ -237,7 +236,7 @@ def train(config={'activation': 'relu'}, reporter=None): ray.init() - from ray.tune.async_hyperband import AsyncHyperBandScheduler + from ray.tune.schedulers import AsyncHyperBandScheduler run_experiments( { 'tune_mnist_test': mnist_spec diff --git a/python/ray/tune/examples/tune_mnist_keras.py b/python/ray/tune/examples/tune_mnist_keras.py index d23951c3a1c6..aa16c7046ef1 100644 --- a/python/ray/tune/examples/tune_mnist_keras.py +++ b/python/ray/tune/examples/tune_mnist_keras.py @@ -13,7 +13,7 @@ import ray from ray import tune -from ray.tune.async_hyperband import AsyncHyperBandScheduler +from ray.tune.schedulers import AsyncHyperBandScheduler class TuneCallback(keras.callbacks.Callback): diff --git a/python/ray/tune/examples/tune_mnist_ray_hyperband.py b/python/ray/tune/examples/tune_mnist_ray_hyperband.py index cae7607b183b..4eee34195edd 100755 --- a/python/ray/tune/examples/tune_mnist_ray_hyperband.py +++ b/python/ray/tune/examples/tune_mnist_ray_hyperband.py @@ -32,7 +32,7 @@ import ray from ray.tune import grid_search, run_experiments, register_trainable, \ Trainable -from ray.tune.hyperband import HyperBandScheduler +from ray.tune.schedulers import HyperBandScheduler from tensorflow.examples.tutorials.mnist import input_data import tensorflow as tf diff --git a/python/ray/tune/experiment.py b/python/ray/tune/experiment.py index 05009ba88de0..45352702e554 100644 --- a/python/ray/tune/experiment.py +++ b/python/ray/tune/experiment.py @@ -2,8 +2,13 @@ from __future__ import division from __future__ import print_function +import copy +import six +import types + from ray.tune.result import DEFAULT_RESULTS_DIR from ray.tune.error import TuneError +from ray.tune.registry import register_trainable class Experiment(object): @@ -11,19 +16,21 @@ class Experiment(object): Parameters: name (str): Name of experiment. - run (str): The algorithm or model to train. This may refer to the - name of a built-on algorithm (e.g. RLLib's DQN or PPO), or a - user-defined trainable function or class - registered in the tune registry. + run (function|class|str): The algorithm or model to train. + This may refer to the name of a built-on algorithm + (e.g. RLLib's DQN or PPO), a user-defined trainable + function or class, or the string identifier of a + trainable function or class registered in the tune registry. stop (dict): The stopping criteria. The keys may be any field in the return result of 'train()', whichever is reached first. Defaults to empty dict. - config (dict): Algorithm-specific configuration - (e.g. env, hyperparams). Defaults to empty dict. + config (dict): Algorithm-specific configuration for Tune variant + generation (e.g. env, hyperparams). Defaults to empty dict. + Custom search algorithms may ignore this. trial_resources (dict): Machine resources to allocate per trial, e.g. ``{"cpu": 64, "gpu": 8}``. Note that GPUs will not be assigned unless you specify them here. Defaults to 1 CPU and 0 - GPUs. + GPUs in ``Trainable.default_resource_request()``. repeat (int): Number of times to repeat each trial. Defaults to 1. local_dir (str): Local dir to save training results to. Defaults to ``~/ray_results``. @@ -34,6 +41,29 @@ class Experiment(object): max_failures (int): Try to recover a trial from its last checkpoint at least this many times. Only applies if checkpointing is enabled. Defaults to 3. + restore (str): Path to checkpoint. Only makes sense to set if + running 1 trial. Defaults to None. + + + Examples: + >>> experiment_spec = Experiment( + >>> "my_experiment_name", + >>> my_func, + >>> stop={"mean_accuracy": 100}, + >>> config={ + >>> "alpha": tune.grid_search([0.2, 0.4, 0.6]), + >>> "beta": tune.grid_search([1, 2]), + >>> }, + >>> trial_resources={ + >>> "cpu": 1, + >>> "gpu": 0 + >>> }, + >>> repeat=10, + >>> local_dir="~/ray_results", + >>> upload_dir="s3://your_bucket/path", + >>> checkpoint_freq=10, + >>> max_failures=2) + """ def __init__(self, @@ -46,20 +76,19 @@ def __init__(self, local_dir=None, upload_dir="", checkpoint_freq=0, - max_failures=3): + max_failures=3, + restore=None): spec = { - "run": run, + "run": self._register_if_needed(run), "stop": stop or {}, "config": config or {}, - "trial_resources": trial_resources or { - "cpu": 1, - "gpu": 0 - }, + "trial_resources": trial_resources, "repeat": repeat, "local_dir": local_dir or DEFAULT_RESULTS_DIR, "upload_dir": upload_dir, "checkpoint_freq": checkpoint_freq, - "max_failures": max_failures + "max_failures": max_failures, + "restore": restore } self.name = name @@ -75,11 +104,56 @@ def from_json(cls, name, spec): """ if "run" not in spec: raise TuneError("No trainable specified!") - exp = cls(name, spec["run"]) - exp.name = name - exp.spec = spec + + # Special case the `env` param for RLlib by automatically + # moving it into the `config` section. + if "env" in spec: + spec["config"] = spec.get("config", {}) + spec["config"]["env"] = spec["env"] + del spec["env"] + + spec = copy.deepcopy(spec) + + run_value = spec.pop("run") + try: + exp = cls(name, run_value, **spec) + except TypeError: + raise TuneError("Improper argument from JSON: {}.".format(spec)) return exp + def _register_if_needed(self, run_object): + """Registers Trainable or Function at runtime. + + Assumes already registered if run_object is a string. Does not + register lambdas because they could be part of variant generation. + Also, does not inspect interface of given run_object. + + Arguments: + run_object (str|function|class): Trainable to run. If string, + assumes it is an ID and does not modify it. Otherwise, + returns a string corresponding to the run_object name. + + Returns: + A string representing the trainable identifier. + """ + + if isinstance(run_object, six.string_types): + return run_object + elif isinstance(run_object, types.FunctionType): + if run_object.__name__ == "": + print("Not auto-registering lambdas - resolving as variant.") + return run_object + else: + name = run_object.__name__ + register_trainable(name, run_object) + return name + elif isinstance(run_object, type): + name = run_object.__name__ + register_trainable(name, run_object) + return name + else: + raise TuneError("Improper 'run' - not string nor trainable.") + def convert_to_experiment_list(experiments): """Produces a list of Experiment objects. diff --git a/python/ray/tune/function_runner.py b/python/ray/tune/function_runner.py index c7ccf8850162..763689cceab3 100644 --- a/python/ray/tune/function_runner.py +++ b/python/ray/tune/function_runner.py @@ -12,7 +12,12 @@ class StatusReporter(object): - """Object passed into your main() that you can report status through.""" + """Object passed into your main() that you can report status through. + + Example: + >>> reporter = StatusReporter() + >>> reporter(timesteps_total=1) + """ def __init__(self): self._latest_result = None diff --git a/python/ray/tune/schedulers/__init__.py b/python/ray/tune/schedulers/__init__.py new file mode 100644 index 000000000000..50bb447437e4 --- /dev/null +++ b/python/ray/tune/schedulers/__init__.py @@ -0,0 +1,14 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from ray.tune.schedulers.trial_scheduler import TrialScheduler, FIFOScheduler +from ray.tune.schedulers.hyperband import HyperBandScheduler +from ray.tune.schedulers.async_hyperband import AsyncHyperBandScheduler +from ray.tune.schedulers.median_stopping_rule import MedianStoppingRule +from ray.tune.schedulers.pbt import PopulationBasedTraining + +__all__ = [ + "TrialScheduler", "HyperBandScheduler", "AsyncHyperBandScheduler", + "MedianStoppingRule", "FIFOScheduler", "PopulationBasedTraining" +] diff --git a/python/ray/tune/async_hyperband.py b/python/ray/tune/schedulers/async_hyperband.py similarity index 98% rename from python/ray/tune/async_hyperband.py rename to python/ray/tune/schedulers/async_hyperband.py index a756425bb342..fc19273ad91a 100644 --- a/python/ray/tune/async_hyperband.py +++ b/python/ray/tune/schedulers/async_hyperband.py @@ -4,7 +4,7 @@ import numpy as np -from ray.tune.trial_scheduler import FIFOScheduler, TrialScheduler +from ray.tune.schedulers.trial_scheduler import FIFOScheduler, TrialScheduler class AsyncHyperBandScheduler(FIFOScheduler): diff --git a/python/ray/tune/hyperband.py b/python/ray/tune/schedulers/hyperband.py similarity index 99% rename from python/ray/tune/hyperband.py rename to python/ray/tune/schedulers/hyperband.py index 1b0cff52df94..8b30d97e53da 100644 --- a/python/ray/tune/hyperband.py +++ b/python/ray/tune/schedulers/hyperband.py @@ -5,7 +5,7 @@ import collections import numpy as np -from ray.tune.trial_scheduler import FIFOScheduler, TrialScheduler +from ray.tune.schedulers.trial_scheduler import FIFOScheduler, TrialScheduler from ray.tune.trial import Trial @@ -38,7 +38,7 @@ class HyperBandScheduler(FIFOScheduler): algorithm. It divides trials into brackets of varying sizes, and periodically early stops low-performing trials within each bracket. - To use this implementation of HyperBand with Ray Tune, all you need + To use this implementation of HyperBand with Tune, all you need to do is specify the max length of time a trial can run `max_t`, the time units `time_attr`, and the name of the reported objective value `reward_attr`. We automatically determine reasonable values for the other diff --git a/python/ray/tune/median_stopping_rule.py b/python/ray/tune/schedulers/median_stopping_rule.py similarity index 98% rename from python/ray/tune/median_stopping_rule.py rename to python/ray/tune/schedulers/median_stopping_rule.py index ad29cd8e83ae..378b42e480b0 100644 --- a/python/ray/tune/median_stopping_rule.py +++ b/python/ray/tune/schedulers/median_stopping_rule.py @@ -6,7 +6,7 @@ import numpy as np from ray.tune.trial import Trial -from ray.tune.trial_scheduler import FIFOScheduler, TrialScheduler +from ray.tune.schedulers.trial_scheduler import FIFOScheduler, TrialScheduler class MedianStoppingRule(FIFOScheduler): diff --git a/python/ray/tune/pbt.py b/python/ray/tune/schedulers/pbt.py similarity index 98% rename from python/ray/tune/pbt.py rename to python/ray/tune/schedulers/pbt.py index e58cc1261851..460e0b99b805 100644 --- a/python/ray/tune/pbt.py +++ b/python/ray/tune/schedulers/pbt.py @@ -8,7 +8,7 @@ from ray.tune.error import TuneError from ray.tune.trial import Trial -from ray.tune.trial_scheduler import FIFOScheduler, TrialScheduler +from ray.tune.schedulers.trial_scheduler import FIFOScheduler, TrialScheduler from ray.tune.suggest.variant_generator import format_vars # Parameters are transferred from the top PBT_QUANTILE fraction of trials to @@ -97,7 +97,7 @@ class PopulationBasedTraining(FIFOScheduler): during training time. This enables very fast hyperparameter discovery and also automatically discovers good annealing schedules. - This Ray Tune PBT implementation considers all trials added as part of the + This Tune PBT implementation considers all trials added as part of the PBT population. If the number of trials exceeds the cluster capacity, they will be time-multiplexed as to balance training progress across the population. diff --git a/python/ray/tune/trial_scheduler.py b/python/ray/tune/schedulers/trial_scheduler.py similarity index 92% rename from python/ray/tune/trial_scheduler.py rename to python/ray/tune/schedulers/trial_scheduler.py index 531d8d128cfd..15fa3cb4cdc6 100644 --- a/python/ray/tune/trial_scheduler.py +++ b/python/ray/tune/schedulers/trial_scheduler.py @@ -6,9 +6,11 @@ class TrialScheduler(object): - CONTINUE = "CONTINUE" - PAUSE = "PAUSE" - STOP = "STOP" + """Interface for implementing a Trial Scheduler class.""" + + CONTINUE = "CONTINUE" #: Status for continuing trial execution + PAUSE = "PAUSE" #: Status for pausing trial execution + STOP = "STOP" #: Status for stopping trial execution def on_trial_add(self, trial_runner, trial): """Called when a new trial is added to the trial runner.""" diff --git a/python/ray/tune/suggest/basic_variant.py b/python/ray/tune/suggest/basic_variant.py index 5e0fbe977ff4..4cb652be4591 100644 --- a/python/ray/tune/suggest/basic_variant.py +++ b/python/ray/tune/suggest/basic_variant.py @@ -2,7 +2,7 @@ from __future__ import division from __future__ import print_function -from itertools import chain +import itertools from ray.tune.error import TuneError from ray.tune.experiment import convert_to_experiment_list @@ -17,25 +17,29 @@ class BasicVariantGenerator(SearchAlgorithm): See also: `ray.tune.suggest.variant_generator`. Example: - >>> searcher = BasicVariantGenerator({"experiment": { ... }}) + >>> searcher = BasicVariantGenerator() + >>> searcher.add_configurations({"experiment": { ... }}) >>> list_of_trials = searcher.next_trials() >>> searcher.is_finished == True """ - def __init__(self, experiments=None): - """Constructs a generator given experiment specifications. + def __init__(self): + self._parser = make_parser() + self._trial_generator = [] + self._counter = 0 + self._finished = False + + def add_configurations(self, experiments): + """Chains generator given experiment specifications. Arguments: experiments (Experiment | list | dict): Experiments to run. """ experiment_list = convert_to_experiment_list(experiments) - self._parser = make_parser() - self._trial_generator = chain.from_iterable([ - self._generate_trials(experiment.spec, experiment.name) - for experiment in experiment_list - ]) - self._counter = 0 - self._finished = False + for experiment in experiment_list: + self._trial_generator = itertools.chain( + self._trial_generator, + self._generate_trials(experiment.spec, experiment.name)) def next_trials(self): """Provides Trial objects to be queued into the TrialRunner. diff --git a/python/ray/tune/suggest/hyperopt.py b/python/ray/tune/suggest/hyperopt.py index 9e239a488a97..26291c1ef85a 100644 --- a/python/ray/tune/suggest/hyperopt.py +++ b/python/ray/tune/suggest/hyperopt.py @@ -22,21 +22,35 @@ class HyperOptSearch(SuggestionAlgorithm): added trials will not be tracked by HyperOpt. Parameters: - experiments (Experiment | list | dict): Experiments to run. Will be - used by SuggestionAlgorithm parent class to initialize Trials. space (dict): HyperOpt configuration. Parameters will be sampled from this configuration and will be used to override parameters generated in the variant generation process. max_concurrent (int): Number of maximum concurrent trials. Defaults to 10. reward_attr (str): The training result objective value attribute. - This refers to an increasing value, which is internally negated - when interacting with HyperOpt so that HyperOpt can "maximize" - this value. + This refers to an increasing value. + + Example: + >>> space = { + >>> 'width': hp.uniform('width', 0, 20), + >>> 'height': hp.uniform('height', -100, 100), + >>> 'activation': hp.choice("activation", ["relu", "tanh"]) + >>> } + >>> config = { + >>> "my_exp": { + >>> "run": "exp", + >>> "repeat": 10 if args.smoke_test else 1000, + >>> "stop": { + >>> "training_iteration": 100 + >>> }, + >>> } + >>> } + >>> algo = HyperOptSearch( + >>> space, max_concurrent=4, reward_attr="neg_mean_loss") + >>> algo.add_configurations(config) """ def __init__(self, - experiments, space, max_concurrent=10, reward_attr="episode_reward_mean", @@ -51,7 +65,7 @@ def __init__(self, self._live_trial_mapping = {} self.rstate = np.random.RandomState() - super(HyperOptSearch, self).__init__(experiments=experiments, **kwargs) + super(HyperOptSearch, self).__init__(**kwargs) def _suggest(self, trial_id): if self._num_live_trials() >= self._max_concurrent: @@ -93,6 +107,11 @@ def on_trial_complete(self, result=None, error=False, early_terminated=False): + """Passes the result to HyperOpt unless early terminated or errored. + + The result is internally negated when interacting with HyperOpt + so that HyperOpt can "maximize" this value, as it minimizes on default. + """ ho_trial = self._get_hyperopt_trial(trial_id) if ho_trial is None: return diff --git a/python/ray/tune/suggest/search.py b/python/ray/tune/suggest/search.py index a3ee59bc3123..89c0d26f9e53 100644 --- a/python/ray/tune/suggest/search.py +++ b/python/ray/tune/suggest/search.py @@ -16,6 +16,14 @@ class SearchAlgorithm(object): See also: `ray.tune.suggest.BasicVariantGenerator`. """ + def add_configurations(self, experiments): + """Tracks given experiment specifications. + + Arguments: + experiments (Experiment | list | dict): Experiments to run. + """ + raise NotImplementedError + def next_trials(self): """Provides Trial objects to be queued into the TrialRunner. diff --git a/python/ray/tune/suggest/suggestion.py b/python/ray/tune/suggest/suggestion.py index e7e2000a5c33..d2c682b22102 100644 --- a/python/ray/tune/suggest/suggestion.py +++ b/python/ray/tune/suggest/suggestion.py @@ -2,7 +2,7 @@ from __future__ import division from __future__ import print_function -from itertools import chain +import itertools import copy from ray.tune.error import TuneError @@ -24,26 +24,35 @@ class SuggestionAlgorithm(SearchAlgorithm): subsequent notifications. Example: - >>> suggester = SuggestionAlgorithm({ ... }) + >>> suggester = SuggestionAlgorithm() + >>> suggester.add_configurations({ ... }) >>> new_parameters = suggester._suggest() >>> suggester.on_trial_complete(trial_id, result) >>> better_parameters = suggester._suggest() """ - def __init__(self, experiments=None): + def __init__(self): """Constructs a generator given experiment specifications. Arguments: experiments (Experiment | list | dict): Experiments to run. """ - experiment_list = convert_to_experiment_list(experiments) self._parser = make_parser() - self._trial_generator = chain.from_iterable([ - self._generate_trials(experiment.spec, experiment.name) - for experiment in experiment_list - ]) + self._trial_generator = [] self._finished = False + def add_configurations(self, experiments): + """Chains generator given experiment specifications. + + Arguments: + experiments (Experiment | list | dict): Experiments to run. + """ + experiment_list = convert_to_experiment_list(experiments) + for experiment in experiment_list: + self._trial_generator = itertools.chain( + self._trial_generator, + self._generate_trials(experiment.spec, experiment.name)) + def next_trials(self): """Provides a batch of Trial objects to be queued into the TrialRunner. @@ -104,7 +113,8 @@ def _suggest(self, trial_id): TrialRunner from querying. Example: - >>> suggester = SuggestionAlgorithm({ ... }, max_concurrent=1) + >>> suggester = SuggestionAlgorithm(max_concurrent=1) + >>> suggester.add_configurations({ ... }) >>> parameters_1 = suggester._suggest() >>> parameters_2 = suggester._suggest() >>> parameters_2 is None @@ -116,12 +126,12 @@ def _suggest(self, trial_id): class _MockSuggestionAlgorithm(SuggestionAlgorithm): - def __init__(self, experiments, max_concurrent=2, **kwargs): + def __init__(self, max_concurrent=2, **kwargs): self._max_concurrent = max_concurrent self.live_trials = {} self.counter = {"result": 0, "complete": 0} self.stall = False - super(_MockSuggestionAlgorithm, self).__init__(experiments, **kwargs) + super(_MockSuggestionAlgorithm, self).__init__(**kwargs) def _suggest(self, trial_id): if len(self.live_trials) < self._max_concurrent and not self.stall: diff --git a/python/ray/tune/suggest/variant_generator.py b/python/ray/tune/suggest/variant_generator.py index 866f7d262e3a..98b830754093 100644 --- a/python/ray/tune/suggest/variant_generator.py +++ b/python/ray/tune/suggest/variant_generator.py @@ -46,7 +46,11 @@ def generate_variants(unresolved_spec): def grid_search(values): - """Convenience method for specifying grid search over a value.""" + """Convenience method for specifying grid search over a value. + + Arguments: + values: An iterable whose parameters will be gridded. + """ return {"grid_search": values} diff --git a/python/ray/tune/test/trial_runner_test.py b/python/ray/tune/test/trial_runner_test.py index 068d08b2309b..02083ba10903 100644 --- a/python/ray/tune/test/trial_runner_test.py +++ b/python/ray/tune/test/trial_runner_test.py @@ -11,7 +11,7 @@ from ray.tune import Trainable, TuneError from ray.tune import register_env, register_trainable, run_experiments -from ray.tune.trial_scheduler import TrialScheduler, FIFOScheduler +from ray.tune.schedulers import TrialScheduler, FIFOScheduler from ray.tune.registry import _global_registry, TRAINABLE_CLASS from ray.tune.result import DEFAULT_RESULTS_DIR, TIMESTEPS_TOTAL, DONE from ray.tune.util import pin_in_object_store, get_pinned_object @@ -449,7 +449,8 @@ def train(config, reporter): register_trainable("f1", train) - alg = BasicVariantGenerator({ + alg = BasicVariantGenerator() + alg.add_configurations({ "foo": { "run": "f1", "config": { @@ -462,6 +463,30 @@ def train(config, reporter): self.assertEqual(trial.status, Trial.TERMINATED) self.assertEqual(trial.last_result[TIMESTEPS_TOTAL], 99) + def testAutoregisterTrainable(self): + def train(config, reporter): + for i in range(100): + reporter(timesteps_total=i) + + class B(Trainable): + def _train(self): + return dict(timesteps_this_iter=1, done=True) + + register_trainable("f1", train) + trials = run_experiments({ + "foo": { + "run": train, + "config": { + "script_min_iter_time_s": 0 + } + }, + "bar": { + "run": B + } + }) + for trial in trials: + self.assertEqual(trial.status, Trial.TERMINATED) + class VariantGeneratorTest(unittest.TestCase): def setUp(self): @@ -472,7 +497,8 @@ def tearDown(self): _register_all() # re-register the evicted objects def generate_trials(self, spec, name): - suggester = BasicVariantGenerator({name: spec}) + suggester = BasicVariantGenerator() + suggester.add_configurations({name: spec}) return suggester.next_trials() def testParseToTrials(self): @@ -611,7 +637,8 @@ def testMaxConcurrentSuggestions(self): } experiments = [Experiment.from_json("test", experiment_spec)] - searcher = _MockSuggestionAlgorithm(experiments, max_concurrent=4) + searcher = _MockSuggestionAlgorithm(max_concurrent=4) + searcher.add_configurations(experiments) trials = searcher.next_trials() self.assertEqual(len(trials), 4) self.assertEqual(searcher.next_trials(), []) @@ -667,7 +694,8 @@ def train(config, reporter): } for name, spec in experiments.items(): - trial_generator = BasicVariantGenerator({name: spec}) + trial_generator = BasicVariantGenerator() + trial_generator.add_configurations({name: spec}) for trial in trial_generator.next_trials(): trial.start() self.assertLessEqual(len(trial.logdir), 200) @@ -989,7 +1017,8 @@ def testSearchAlgNotification(self): ray.init(num_cpus=4, num_gpus=2) experiment_spec = {"run": "__fake", "stop": {"training_iteration": 2}} experiments = [Experiment.from_json("test", experiment_spec)] - searcher = _MockSuggestionAlgorithm(experiments, max_concurrent=10) + searcher = _MockSuggestionAlgorithm(max_concurrent=10) + searcher.add_configurations(experiments) runner = TrialRunner(search_alg=searcher) runner.step() trials = runner.get_trials() @@ -1009,7 +1038,8 @@ def testSearchAlgFinished(self): ray.init(num_cpus=4, num_gpus=2) experiment_spec = {"run": "__fake", "stop": {"training_iteration": 1}} experiments = [Experiment.from_json("test", experiment_spec)] - searcher = _MockSuggestionAlgorithm(experiments, max_concurrent=10) + searcher = _MockSuggestionAlgorithm(max_concurrent=10) + searcher.add_configurations(experiments) runner = TrialRunner(search_alg=searcher) runner.step() trials = runner.get_trials() @@ -1033,7 +1063,8 @@ def on_trial_result(self, *args, **kwargs): ray.init(num_cpus=4, num_gpus=2) experiment_spec = {"run": "__fake", "stop": {"training_iteration": 2}} experiments = [Experiment.from_json("test", experiment_spec)] - searcher = _MockSuggestionAlgorithm(experiments, max_concurrent=10) + searcher = _MockSuggestionAlgorithm(max_concurrent=10) + searcher.add_configurations(experiments) runner = TrialRunner(search_alg=searcher, scheduler=_MockScheduler()) runner.step() trials = runner.get_trials() @@ -1058,7 +1089,8 @@ def testSearchAlgStalled(self): } } experiments = [Experiment.from_json("test", experiment_spec)] - searcher = _MockSuggestionAlgorithm(experiments, max_concurrent=1) + searcher = _MockSuggestionAlgorithm(max_concurrent=1) + searcher.add_configurations(experiments) runner = TrialRunner(search_alg=searcher) runner.step() trials = runner.get_trials() diff --git a/python/ray/tune/test/trial_scheduler_test.py b/python/ray/tune/test/trial_scheduler_test.py index f23a59c9d812..36416cf03010 100644 --- a/python/ray/tune/test/trial_scheduler_test.py +++ b/python/ray/tune/test/trial_scheduler_test.py @@ -7,12 +7,11 @@ import numpy as np import ray -from ray.tune.hyperband import HyperBandScheduler -from ray.tune.async_hyperband import AsyncHyperBandScheduler -from ray.tune.pbt import PopulationBasedTraining, explore -from ray.tune.median_stopping_rule import MedianStoppingRule +from ray.tune.schedulers import (HyperBandScheduler, AsyncHyperBandScheduler, + PopulationBasedTraining, MedianStoppingRule, + TrialScheduler) +from ray.tune.schedulers.pbt import explore from ray.tune.trial import Trial, Resources -from ray.tune.trial_scheduler import TrialScheduler from ray.rllib import _register_all _register_all() diff --git a/python/ray/tune/trainable.py b/python/ray/tune/trainable.py index 38b31a5805a3..7d6301fa5794 100644 --- a/python/ray/tune/trainable.py +++ b/python/ray/tune/trainable.py @@ -36,26 +36,23 @@ class Trainable(object): Note that, if you don't require checkpoint/restore functionality, then instead of implementing this class you can also get away with supplying - just a `my_train(config, reporter)` function and calling: - - ``register_trainable("my_func", train)`` - - to register it for use with Tune. The function will be automatically - converted to this interface (sans checkpoint functionality). - - Attributes: - config (obj): The hyperparam configuration for this trial. - logdir (str): Directory in which training outputs should be placed. + just a ``my_train(config, reporter)`` function to the config. + The function will be automatically converted to this interface + (sans checkpoint functionality). """ def __init__(self, config=None, logger_creator=None): """Initialize an Trainable. + Sets up logging and points ``self.logdir`` to a directory in which + training outputs should be placed. + Subclasses should prefer defining ``_setup()`` instead of overriding ``__init__()`` directly. Args: - config (dict): Trainable-specific configuration data. + config (dict): Trainable-specific configuration data. By default + will be saved as ``self.config``. logger_creator (func): Function that creates a ray.tune.Logger object. If unspecified, a default logger is created. """ @@ -102,28 +99,36 @@ def train(self): """Runs one logical iteration of training. Subclasses should override ``_train()`` instead to return results. - This class automatically fills the following fields in the result: - done (bool): training is terminated. Filled only if not provided. - time_this_iter_s (float): Time in seconds - this iteration took to run. This may be overriden in order to - override the system-computed time difference. - time_total_s (float): Accumulated time in seconds - for this entire experiment. - experiment_id (str): Unique string identifier - for this experiment. This id is preserved - across checkpoint / restore calls. - training_iteration (int): The index of this - training iteration, e.g. call to train(). - pid (str): The pid of the training process. - date (str): A formatted date of - when the result was processed. - timestamp (str): A UNIX timestamp of - when the result was processed. - hostname (str): The hostname of the machine - hosting the training process. - node_ip (str): The node ip of the machine - hosting the training process. + + `done` (bool): training is terminated. Filled only if not provided. + + `time_this_iter_s` (float): Time in seconds this iteration + took to run. This may be overriden in order to override the + system-computed time difference. + + `time_total_s` (float): Accumulated time in seconds for this + entire experiment. + + `experiment_id` (str): Unique string identifier + for this experiment. This id is preserved + across checkpoint / restore calls. + + `training_iteration` (int): The index of this + training iteration, e.g. call to train(). + + `pid` (str): The pid of the training process. + + `date` (str): A formatted date of when the result was processed. + + `timestamp` (str): A UNIX timestamp of when the result + was processed. + + `hostname` (str): Hostname of the machine hosting the training + process. + + `node_ip` (str): Node ip of the machine hosting the training + process. Returns: A dict that describes training progress. @@ -283,7 +288,11 @@ def _restore(self, checkpoint_path): raise NotImplementedError def _setup(self): - """Subclasses should override this for custom initialization.""" + """Subclasses should override this for custom initialization. + + Subclasses can access the hyperparameter configuration via + ``self.config``. + """ pass def _stop(self): diff --git a/python/ray/tune/trial.py b/python/ray/tune/trial.py index 331de9d2cea9..be8c37e321c6 100644 --- a/python/ray/tune/trial.py +++ b/python/ray/tune/trial.py @@ -95,7 +95,6 @@ def __init__(self, The args here take the same meaning as the command line flags defined in ray.tune.config_parser. """ - if not has_trainable(trainable_name): # Make sure rllib agents are registered from ray import rllib # noqa: F401 @@ -267,8 +266,8 @@ def location_string(hostname, pid): self._status_string(), location_string( self.last_result.get(HOSTNAME), - self.last_result.get(PID))), - '{} s'.format(int(self.last_result.get(TIME_TOTAL_S))), + self.last_result.get(PID))), '{} s'.format( + int(self.last_result.get(TIME_TOTAL_S))) ] if self.last_result.get("episode_reward_mean") is not None: diff --git a/python/ray/tune/trial_runner.py b/python/ray/tune/trial_runner.py index 678a544d3bf2..0ac729ef5090 100644 --- a/python/ray/tune/trial_runner.py +++ b/python/ray/tune/trial_runner.py @@ -12,7 +12,7 @@ from ray.tune.result import TIME_THIS_ITER_S from ray.tune.web_server import TuneServer from ray.tune.trial import Trial, Resources -from ray.tune.trial_scheduler import FIFOScheduler, TrialScheduler +from ray.tune.schedulers import FIFOScheduler, TrialScheduler MAX_DEBUG_TRIALS = 20 diff --git a/python/ray/tune/tune.py b/python/ray/tune/tune.py index 6453d6d85a64..3d122091e7a7 100644 --- a/python/ray/tune/tune.py +++ b/python/ray/tune/tune.py @@ -6,13 +6,11 @@ from ray.tune.error import TuneError from ray.tune.suggest import BasicVariantGenerator -from ray.tune.hyperband import HyperBandScheduler -from ray.tune.async_hyperband import AsyncHyperBandScheduler -from ray.tune.median_stopping_rule import MedianStoppingRule from ray.tune.trial import Trial, DEBUG_PRINT_INTERVAL from ray.tune.log_sync import wait_for_log_sync from ray.tune.trial_runner import TrialRunner -from ray.tune.trial_scheduler import FIFOScheduler +from ray.tune.schedulers import (HyperBandScheduler, AsyncHyperBandScheduler, + FIFOScheduler, MedianStoppingRule) from ray.tune.web_server import TuneServer _SCHEDULERS = { @@ -38,10 +36,11 @@ def run_experiments(experiments=None, server_port=TuneServer.DEFAULT_PORT, verbose=True, queue_trials=False): - """Tunes experiments. + """Runs and blocks until all trials finish. Args: - experiments (Experiment | list | dict): Experiments to run. + experiments (Experiment | list | dict): Experiments to run. Will be + passed to `search_alg` via `add_configurations`. search_alg (SearchAlgorithm): Search Algorithm. Defaults to BasicVariantGenerator. scheduler (TrialScheduler): Scheduler for executing @@ -56,6 +55,22 @@ def run_experiments(experiments=None, be set to True when running on an autoscaling cluster to enable automatic scale-up. + Examples: + >>> experiment_spec = Experiment("experiment", my_func) + >>> run_experiments(experiments=experiment_spec) + + >>> experiment_spec = {"experiment": {"run": my_func}} + >>> run_experiments(experiments=experiment_spec) + + >>> run_experiments( + >>> experiments=experiment_spec, + >>> scheduler=MedianStoppingRule(...)) + + >>> run_experiments( + >>> experiments=experiment_spec, + >>> search_alg=SearchAlgorithm(), + >>> scheduler=MedianStoppingRule(...)) + Returns: List of Trial objects, holding data for each executed trial. """ @@ -63,9 +78,9 @@ def run_experiments(experiments=None, scheduler = FIFOScheduler() if search_alg is None: - assert experiments is not None, "Experiments need to be specified" \ - "if search_alg is not provided." - search_alg = BasicVariantGenerator(experiments) + search_alg = BasicVariantGenerator() + + search_alg.add_configurations(experiments) runner = TrialRunner( search_alg, diff --git a/python/ray/tune/web_server.py b/python/ray/tune/web_server.py index d774befd5bb2..86e903dd4df3 100644 --- a/python/ray/tune/web_server.py +++ b/python/ray/tune/web_server.py @@ -124,7 +124,8 @@ def get_trial(): elif command == TuneClient.ADD: name = args["name"] spec = args["spec"] - trial_generator = BasicVariantGenerator({name: spec}) + trial_generator = BasicVariantGenerator() + trial_generator.add_configurations({name: spec}) for trial in trial_generator.next_trials(): runner.add_trial(trial) else: