diff --git a/.gitignore b/.gitignore
index f8130b3a2f85..91189b6f9c41 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,9 @@
/src/ray/object_manager/format/*_generated.h
/src/ray/raylet/format/*_generated.h
+# Modin source files
+/python/ray/modin
+
# Redis temporary files
*dump.rdb
diff --git a/.travis.yml b/.travis.yml
index debf450738a7..5f45138cbdda 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -131,6 +131,21 @@ script:
# module is only found if the test directory is in the PYTHONPATH.
- export PYTHONPATH="$PYTHONPATH:./test/"
+ # ray tune tests
+ - python python/ray/tune/test/dependency_test.py
+ - python -m pytest -v python/ray/tune/test/trial_runner_test.py
+ - python -m pytest -v python/ray/tune/test/trial_scheduler_test.py
+ - python -m pytest -v python/ray/tune/test/experiment_test.py
+ - python -m pytest -v python/ray/tune/test/tune_server_test.py
+ - python -m pytest -v python/ray/tune/test/ray_trial_executor_test.py
+ - python -m pytest -v python/ray/tune/test/automl_searcher_test.py
+
+ # ray rllib tests
+ - python -m pytest -v python/ray/rllib/test/test_catalog.py
+ - python -m pytest -v python/ray/rllib/test/test_filters.py
+ - python -m pytest -v python/ray/rllib/test/test_optimizers.py
+ - python -m pytest -v python/ray/rllib/test/test_evaluators.py
+
- python -m pytest -v python/ray/test/test_global_state.py
- python -m pytest -v python/ray/test/test_queue.py
- python -m pytest -v python/ray/test/test_ray_init.py
@@ -153,24 +168,12 @@ script:
- python -m pytest -v test/credis_test.py
- python -m pytest -v test/node_manager_test.py
- # ray tune tests
- - python python/ray/tune/test/dependency_test.py
- - python -m pytest -v python/ray/tune/test/trial_runner_test.py
- - python -m pytest -v python/ray/tune/test/trial_scheduler_test.py
- - python -m pytest -v python/ray/tune/test/experiment_test.py
- - python -m pytest -v python/ray/tune/test/tune_server_test.py
- - python -m pytest -v python/ray/tune/test/ray_trial_executor_test.py
- - python -m pytest -v python/ray/tune/test/automl_searcher_test.py
-
- # ray rllib tests
- - python -m pytest -v python/ray/rllib/test/test_catalog.py
- - python -m pytest -v python/ray/rllib/test/test_filters.py
- - python -m pytest -v python/ray/rllib/test/test_optimizers.py
- - python -m pytest -v python/ray/rllib/test/test_evaluators.py
-
# ray temp file tests
- python -m pytest -v test/tempfile_test.py
+ # modin test files
+ - python python/ray/test/test_modin.py
+
deploy:
- provider: s3
access_key_id: AKIAJ2L7XDUSZVTXI5QA
diff --git a/.travis/install-dependencies.sh b/.travis/install-dependencies.sh
index 3f1ea4922bc8..5bae4ba87f8d 100755
--- a/.travis/install-dependencies.sh
+++ b/.travis/install-dependencies.sh
@@ -24,7 +24,7 @@ if [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "linux" ]]; then
wget https://repo.continuum.io/miniconda/Miniconda2-4.5.4-Linux-x86_64.sh -O miniconda.sh -nv
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
- pip install -q cython==0.27.3 cmake tensorflow gym opencv-python pyyaml pandas==0.22 requests \
+ pip install -q cython==0.27.3 cmake tensorflow gym opencv-python pyyaml pandas==0.23.4 requests \
feather-format lxml openpyxl xlrd py-spy setproctitle faulthandler pytest-timeout mock
elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then
sudo apt-get update
@@ -33,7 +33,7 @@ elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then
wget https://repo.continuum.io/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh -O miniconda.sh -nv
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
- pip install -q cython==0.27.3 cmake tensorflow gym opencv-python pyyaml pandas==0.22 requests \
+ pip install -q cython==0.27.3 cmake tensorflow gym opencv-python pyyaml pandas==0.23.4 requests \
feather-format lxml openpyxl xlrd py-spy setproctitle pytest-timeout
elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then
# check that brew is installed
@@ -50,7 +50,7 @@ elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then
wget https://repo.continuum.io/miniconda/Miniconda2-4.5.4-MacOSX-x86_64.sh -O miniconda.sh -nv
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
- pip install -q cython==0.27.3 cmake tensorflow gym opencv-python pyyaml pandas==0.22 requests \
+ pip install -q cython==0.27.3 cmake tensorflow gym opencv-python pyyaml pandas==0.23.4 requests \
feather-format lxml openpyxl xlrd py-spy setproctitle faulthandler pytest-timeout mock
elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "macosx" ]]; then
# check that brew is installed
@@ -67,7 +67,7 @@ elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "macosx" ]]; then
wget https://repo.continuum.io/miniconda/Miniconda3-4.5.4-MacOSX-x86_64.sh -O miniconda.sh -nv
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
- pip install -q cython==0.27.3 cmake tensorflow gym opencv-python pyyaml pandas==0.22 requests \
+ pip install -q cython==0.27.3 cmake tensorflow gym opencv-python pyyaml pandas==0.23.4 requests \
feather-format lxml openpyxl xlrd py-spy setproctitle pytest-timeout
elif [[ "$LINT" == "1" ]]; then
sudo apt-get update
diff --git a/.travis/test-wheels.sh b/.travis/test-wheels.sh
index 1765135ec9be..f7870ea52d49 100755
--- a/.travis/test-wheels.sh
+++ b/.travis/test-wheels.sh
@@ -59,7 +59,7 @@ if [[ "$platform" == "linux" ]]; then
if [[ "$NUMBER_OF_WHEELS" != "5" ]]; then
echo "Wrong number of wheels found."
ls -l $ROOT_DIR/../.whl/
- exit 1
+ exit 2
fi
elif [[ "$platform" == "macosx" ]]; then
@@ -94,5 +94,5 @@ elif [[ "$platform" == "macosx" ]]; then
done
else
echo "Unrecognized environment."
- exit 1
+ exit 3
fi
diff --git a/README.rst b/README.rst
index 3a8855b2439e..7e50123d9bf6 100644
--- a/README.rst
+++ b/README.rst
@@ -1,5 +1,6 @@
-Ray
-===
+.. raw:: html
+
+
.. image:: https://travis-ci.com/ray-project/ray.svg?branch=master
:target: https://travis-ci.com/ray-project/ray
@@ -7,9 +8,12 @@ Ray
.. image:: https://readthedocs.org/projects/ray/badge/?version=latest
:target: http://ray.readthedocs.io/en/latest/?badge=latest
+.. image:: https://img.shields.io/badge/pypi-0.6.0-blue.svg
+ :target: https://pypi.org/project/ray/
+
|
-Ray is a flexible, high-performance distributed execution framework.
+**Ray is a flexible, high-performance distributed execution framework.**
Ray is easy to install: ``pip install ray``
diff --git a/cmake/Modules/ArrowExternalProject.cmake b/cmake/Modules/ArrowExternalProject.cmake
index 572e12b27b43..5a054afc74f1 100644
--- a/cmake/Modules/ArrowExternalProject.cmake
+++ b/cmake/Modules/ArrowExternalProject.cmake
@@ -15,10 +15,10 @@
# - PLASMA_SHARED_LIB
set(arrow_URL https://github.com/apache/arrow.git)
-# The PR for this commit is https://github.com/apache/arrow/pull/2826. We
+# The PR for this commit is https://github.com/apache/arrow/pull/3061. We
# include the link here to make it easier to find the right commit because
# Arrow often rewrites git history and invalidates certain commits.
-set(arrow_TAG b4f7ed6d6ed5cdb6dd136bac3181a438f35c8ea0)
+set(arrow_TAG a667fca3b71772886bb2595986266d2039823dcc)
set(ARROW_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/external/arrow-install)
set(ARROW_HOME ${ARROW_INSTALL_PREFIX})
diff --git a/doc/source/autoscaling.rst b/doc/source/autoscaling.rst
index aca1375753a9..90c8e92f3d27 100644
--- a/doc/source/autoscaling.rst
+++ b/doc/source/autoscaling.rst
@@ -78,6 +78,8 @@ You can use ``ray exec`` to conveniently run commands on clusters. Note that scr
You can also use ``ray submit`` to execute Python scripts on clusters. This will ``rsync`` the designated file onto the cluster and execute it with the given arguments.
+.. code-block:: bash
+
# Run a Python script in a detached tmux session
$ ray submit cluster.yaml --tmux --start --stop tune_experiment.py
diff --git a/doc/source/conf.py b/doc/source/conf.py
index e362f73309a3..2a2b1a37c207 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -42,6 +42,7 @@
"ray.core.generated.ClientTableData",
"ray.core.generated.GcsTableEntry",
"ray.core.generated.HeartbeatTableData",
+ "ray.core.generated.HeartbeatBatchTableData",
"ray.core.generated.DriverTableData",
"ray.core.generated.ErrorTableData",
"ray.core.generated.ProfileTableData",
diff --git a/doc/source/images/ray_logo.png b/doc/source/images/ray_logo.png
new file mode 100644
index 000000000000..05840a7ff453
Binary files /dev/null and b/doc/source/images/ray_logo.png differ
diff --git a/doc/source/installation.rst b/doc/source/installation.rst
index 20d49edaea7d..68bd37ae96f5 100644
--- a/doc/source/installation.rst
+++ b/doc/source/installation.rst
@@ -10,7 +10,7 @@ You can install the latest stable version of Ray as follows.
.. code-block:: bash
- pip install -U ray
+ pip install -U ray # also recommended: ray[debug]
Trying snapshots from master
----------------------------
@@ -37,16 +37,16 @@ Here are links to the latest wheels (which are built off of master). To install
=================== ===================
-.. _`Linux Python 3.7`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.3-cp37-cp37m-manylinux1_x86_64.whl
-.. _`Linux Python 3.6`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.3-cp36-cp36m-manylinux1_x86_64.whl
-.. _`Linux Python 3.5`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.3-cp35-cp35m-manylinux1_x86_64.whl
-.. _`Linux Python 3.4`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.3-cp34-cp34m-manylinux1_x86_64.whl
-.. _`Linux Python 2.7`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.3-cp27-cp27mu-manylinux1_x86_64.whl
-.. _`MacOS Python 3.7`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.3-cp37-cp37m-macosx_10_6_intel.whl
-.. _`MacOS Python 3.6`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.3-cp36-cp36m-macosx_10_6_intel.whl
-.. _`MacOS Python 3.5`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.3-cp35-cp35m-macosx_10_6_intel.whl
-.. _`MacOS Python 3.4`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.3-cp34-cp34m-macosx_10_6_intel.whl
-.. _`MacOS Python 2.7`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.3-cp27-cp27m-macosx_10_6_intel.whl
+.. _`Linux Python 3.7`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp37-cp37m-manylinux1_x86_64.whl
+.. _`Linux Python 3.6`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp36-cp36m-manylinux1_x86_64.whl
+.. _`Linux Python 3.5`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp35-cp35m-manylinux1_x86_64.whl
+.. _`Linux Python 3.4`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp34-cp34m-manylinux1_x86_64.whl
+.. _`Linux Python 2.7`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp27-cp27mu-manylinux1_x86_64.whl
+.. _`MacOS Python 3.7`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp37-cp37m-macosx_10_6_intel.whl
+.. _`MacOS Python 3.6`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp36-cp36m-macosx_10_6_intel.whl
+.. _`MacOS Python 3.5`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp35-cp35m-macosx_10_6_intel.whl
+.. _`MacOS Python 3.4`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp34-cp34m-macosx_10_6_intel.whl
+.. _`MacOS Python 2.7`: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp27-cp27m-macosx_10_6_intel.whl
Building Ray from source
diff --git a/doc/source/rllib-algorithms.rst b/doc/source/rllib-algorithms.rst
index 66bf08a6c399..1d0501215745 100644
--- a/doc/source/rllib-algorithms.rst
+++ b/doc/source/rllib-algorithms.rst
@@ -133,10 +133,10 @@ Tuned examples: `Pendulum-v0 `__ `[implementation] `__
-RLlib DQN is implemented using the SyncReplayOptimizer. The algorithm can be scaled by increasing the number of workers, using the AsyncGradientsOptimizer for async DQN, or using Ape-X. Memory usage is reduced by compressing samples in the replay buffer with LZ4. All of the DQN improvements evaluated in `Rainbow `__ are available, though not all are enabled by default.
+RLlib DQN is implemented using the SyncReplayOptimizer. The algorithm can be scaled by increasing the number of workers, using the AsyncGradientsOptimizer for async DQN, or using Ape-X. Memory usage is reduced by compressing samples in the replay buffer with LZ4. All of the DQN improvements evaluated in `Rainbow `__ are available, though not all are enabled by default. See also how to use `parametric-actions in DQN `__.
Tuned examples: `PongDeterministic-v4 `__, `Rainbow configuration `__, `{BeamRider,Breakout,Qbert,SpaceInvaders}NoFrameskip-v4 `__, `with Dueling and Double-Q `__, `with Distributional DQN `__.
diff --git a/doc/source/rllib-env.rst b/doc/source/rllib-env.rst
index c1381f561cd4..37ea011a0b5c 100644
--- a/doc/source/rllib-env.rst
+++ b/doc/source/rllib-env.rst
@@ -7,42 +7,56 @@ RLlib works with several different types of environments, including `OpenAI Gym
**Compatibility matrix**:
-============= ================ ================== =========== ==================
-Algorithm Discrete Actions Continuous Actions Multi-Agent Recurrent Policies
-============= ================ ================== =========== ==================
-A2C, A3C **Yes** **Yes** **Yes** **Yes**
-PPO **Yes** **Yes** **Yes** **Yes**
-PG **Yes** **Yes** **Yes** **Yes**
-IMPALA **Yes** No **Yes** **Yes**
-DQN, Rainbow **Yes** No **Yes** No
-DDPG, TD3 No **Yes** **Yes** No
-APEX-DQN **Yes** No **Yes** No
-APEX-DDPG No **Yes** **Yes** No
-ES **Yes** **Yes** No No
-ARS **Yes** **Yes** No No
-============= ================ ================== =========== ==================
-
-In the high-level agent APIs, environments are identified with string names. By default, the string will be interpreted as a gym `environment name `__, however you can also register custom environments by name:
+============= ======================= ================== =========== ==================
+Algorithm Discrete Actions Continuous Actions Multi-Agent Recurrent Policies
+============= ======================= ================== =========== ==================
+A2C, A3C **Yes** `+parametric`_ **Yes** **Yes** **Yes**
+PPO **Yes** `+parametric`_ **Yes** **Yes** **Yes**
+PG **Yes** `+parametric`_ **Yes** **Yes** **Yes**
+IMPALA **Yes** `+parametric`_ No **Yes** **Yes**
+DQN, Rainbow **Yes** `+parametric`_ No **Yes** No
+DDPG, TD3 No **Yes** **Yes** No
+APEX-DQN **Yes** `+parametric`_ No **Yes** No
+APEX-DDPG No **Yes** **Yes** No
+ES **Yes** **Yes** No No
+ARS **Yes** **Yes** No No
+============= ======================= ================== =========== ==================
+
+.. _`+parametric`: rllib-models.html#variable-length-parametric-action-spaces
+
+You can pass either a string name or a Python class to specify an environment. By default, strings will be interpreted as a gym `environment name `__. Custom env classes must take a single ``env_config`` parameter in their constructor:
.. code-block:: python
import ray
- from ray.tune.registry import register_env
from ray.rllib.agents import ppo
- def env_creator(env_config):
- import gym
- return gym.make("CartPole-v0") # or return your own custom env
+ class MyEnv(gym.Env):
+ def __init__(self, env_config):
+ self.action_space = ...
+ self.observation_space = ...
+ ...
- register_env("my_env", env_creator)
ray.init()
- trainer = ppo.PPOAgent(env="my_env", config={
- "env_config": {}, # config to pass to env creator
+ trainer = ppo.PPOAgent(env=MyEnv, config={
+ "env_config": {}, # config to pass to env class
})
while True:
print(trainer.train())
+You can also register a custom env creator function with a string name. This function must take a single ``env_config`` parameter and return an env instance:
+
+.. code-block:: python
+
+ from ray.tune.registry import register_env
+
+ def env_creator(env_config):
+ return MyEnv(...) # return an env instance
+
+ register_env("my_env", env_creator)
+ trainer = ppo.PPOAgent(env="my_env")
+
Configuring Environments
------------------------
diff --git a/doc/source/rllib-models.rst b/doc/source/rllib-models.rst
index 5fde37f53087..9e7070b66c48 100644
--- a/doc/source/rllib-models.rst
+++ b/doc/source/rllib-models.rst
@@ -30,7 +30,7 @@ The following is a list of the built-in model hyperparameters:
Custom Models
-------------
-Custom models should subclass the common RLlib `model class `__ and override the ``_build_layers_v2`` method. This method takes in a dict of tensor inputs (the observation ``obs``, ``prev_action``, and ``prev_reward``), and returns a feature layer and float vector of the specified output size. You can also override the ``value_function`` method to implement a custom value branch. A self-supervised loss can be defined via the ``loss`` method. The model can then be registered and used in place of a built-in model:
+Custom models should subclass the common RLlib `model class `__ and override the ``_build_layers_v2`` method. This method takes in a dict of tensor inputs (the observation ``obs``, ``prev_action``, and ``prev_reward``, ``is_training``), and returns a feature layer and float vector of the specified output size. You can also override the ``value_function`` method to implement a custom value branch. A self-supervised loss can be defined via the ``loss`` method. The model can then be registered and used in place of a built-in model:
.. code-block:: python
@@ -44,7 +44,7 @@ Custom models should subclass the common RLlib `model class >> print(input_dict)
{'prev_actions': ,
'prev_rewards': ,
+ 'is_training': ,
'obs': OrderedDict([
('sensors', OrderedDict([
('front_cam', [
@@ -110,6 +111,47 @@ Custom models should subclass the common RLlib `model class `__ and associated `training scripts `__. You can also reference the `unit tests `__ for Tuple and Dict spaces, which show how to access nested observation fields.
+Custom Recurrent Models
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Instead of using the ``use_lstm: True`` option, it can be preferable use a custom recurrent model. This provides more control over postprocessing of the LSTM output and can also allow the use of multiple LSTM cells to process different portions of the input. The only difference from a normal custom model is that you have to define ``self.state_init``, ``self.state_in``, and ``self.state_out``. You can refer to the existing `lstm.py `__ model as an example to implement your own model:
+
+.. code-block:: python
+
+ class MyCustomLSTM(Model):
+ def _build_layers_v2(self, input_dict, num_outputs, options):
+ # Some initial layers to process inputs, shape [BATCH, OBS...].
+ features = some_hidden_layers(input_dict["obs"])
+
+ # Add back the nested time dimension for tf.dynamic_rnn, new shape
+ # will be [BATCH, MAX_SEQ_LEN, OBS...].
+ last_layer = add_time_dimension(features, self.seq_lens)
+
+ # Setup the LSTM cell (see lstm.py for an example)
+ lstm = rnn.BasicLSTMCell(256, state_is_tuple=True)
+ self.state_init = ...
+ self.state_in = ...
+ lstm_out, lstm_state = tf.nn.dynamic_rnn(
+ lstm,
+ last_layer,
+ initial_state=...,
+ sequence_length=self.seq_lens,
+ time_major=False,
+ dtype=tf.float32)
+ self.state_out = list(lstm_state)
+
+ # Drop the time dimension again so back to shape [BATCH, OBS...].
+ # Note that we retain the zero padding (see issue #2992).
+ last_layer = tf.reshape(lstm_out, [-1, cell_size])
+ logits = linear(last_layer, num_outputs, "action",
+ normc_initializer(0.01))
+ return logits, last_layer
+
+Batch Normalization
+~~~~~~~~~~~~~~~~~~~
+
+You can use ``tf.layers.batch_normalization(x, training=input_dict["is_training"])`` to add batch norm layers to your custom model: `code example `__. RLlib will automatically run the update ops for the batch norm layers during optimization (see `tf_policy_graph.py `__ and `multi_gpu_impl.py `__ for the exact handling of these updates).
+
Custom Preprocessors
--------------------
@@ -188,6 +230,53 @@ Then, you can create an agent with your custom policy graph by:
In this example we overrode existing methods of the existing DDPG policy graph, i.e., `_build_q_network`, `_build_p_network`, `_build_action_network`, `_build_actor_critic_loss`, but you can also replace the entire graph class entirely.
+Variable-length / Parametric Action Spaces
+------------------------------------------
+
+Custom models can be used to work with environments where (1) the set of valid actions varies per step, and/or (2) the number of valid actions is very large, as in `OpenAI Five `__ and `Horizon `__. The general idea is that the meaning of actions can be completely conditioned on the observation, i.e., the ``a`` in ``Q(s, a)`` becomes just a token in ``[0, MAX_AVAIL_ACTIONS)`` that only has meaning in the context of ``s``. This works with algorithms in the `DQN and policy-gradient families `__ and can be implemented as follows:
+
+1. The environment should return a mask and/or list of valid action embeddings as part of the observation for each step. To enable batching, the number of actions can be allowed to vary from 1 to some max number:
+
+.. code-block:: python
+
+ class MyParamActionEnv(gym.Env):
+ def __init__(self, max_avail_actions):
+ self.action_space = Discrete(max_avail_actions)
+ self.observation_space = Dict({
+ "action_mask": Box(0, 1, shape=(max_avail_actions, )),
+ "avail_actions": Box(-1, 1, shape=(max_avail_actions, action_embedding_sz)),
+ "real_obs": ...,
+ })
+
+2. A custom model can be defined that can interpret the ``action_mask`` and ``avail_actions`` portions of the observation. Here the model computes the action logits via the dot product of some network output and each action embedding. Invalid actions can be masked out of the softmax by scaling the probability to zero:
+
+.. code-block:: python
+
+ class MyParamActionModel(Model):
+ def _build_layers_v2(self, input_dict, num_outputs, options):
+ avail_actions = input_dict["obs"]["avail_actions"]
+ action_mask = input_dict["obs"]["action_mask"]
+
+ output = FullyConnectedNetwork(
+ input_dict["obs"]["real_obs"], num_outputs=action_embedding_sz)
+
+ # Expand the model output to [BATCH, 1, EMBED_SIZE]. Note that the
+ # avail actions tensor is of shape [BATCH, MAX_ACTIONS, EMBED_SIZE].
+ intent_vector = tf.expand_dims(output, 1)
+
+ # Shape of logits is [BATCH, MAX_ACTIONS].
+ action_logits = tf.reduce_sum(avail_actions * intent_vector, axis=2)
+
+ # Mask out invalid actions (use tf.float32.min for stability)
+ inf_mask = tf.maximum(tf.log(action_mask), tf.float32.min)
+ masked_logits = inf_mask + action_logits
+
+ return masked_logits, last_layer
+
+
+Depending on your use case it may make sense to use just the masking, just action embeddings, or both. For a runnable example of this in code, check out `parametric_action_cartpole.py `__. Note that since masking introduces ``tf.float32.min`` values into the model output, this technique might not work with all algorithm options. For example, algorithms might crash if they incorrectly process the ``tf.float32.min`` values. The cartpole example has working configurations for DQN and several policy gradient algorithms.
+
+
Model-Based Rollouts
--------------------
@@ -199,7 +288,8 @@ With a custom policy graph, you can also perform model-based rollouts and option
def compute_actions(self,
obs_batch,
state_batches,
- is_training=False,
+ prev_action_batch=None,
+ prev_reward_batch=None,
episodes=None):
# compute a batch of actions based on the current obs_batch
# and state of each episode (i.e., for multiagent). You can do
diff --git a/doc/source/rllib-training.rst b/doc/source/rllib-training.rst
index 6b1366f4ee08..e647b0a2791f 100644
--- a/doc/source/rllib-training.rst
+++ b/doc/source/rllib-training.rst
@@ -73,13 +73,13 @@ In an example below, we train A2C by specifying 8 workers through the config fla
python ray/python/ray/rllib/train.py --env=PongDeterministic-v4 \
--run=A2C --config '{"num_workers": 8}'
-.. image:: rllib-config.svg
-
Specifying Resources
~~~~~~~~~~~~~~~~~~~~
You can control the degree of parallelism used by setting the ``num_workers`` hyperparameter for most agents. The number of GPUs the driver should use can be set via the ``num_gpus`` option. Similarly, the resource allocation to workers can be controlled via ``num_cpus_per_worker``, ``num_gpus_per_worker``, and ``custom_resources_per_worker``. The number of GPUs can be a fractional quantity to allocate only a fraction of a GPU. For example, with DQN you can pack five agents onto one GPU by setting ``num_gpus: 0.2``. Note that in Ray < 0.6.0 fractional GPU support requires setting the environment variable ``RAY_USE_XRAY=1``.
+.. image:: rllib-config.svg
+
Common Parameters
~~~~~~~~~~~~~~~~~
@@ -224,35 +224,6 @@ Sometimes, it is necessary to coordinate between pieces of code that live in dif
Ray actors provide high levels of performance, so in more complex cases they can be used implement communication patterns such as parameter servers and allreduce.
-Debugging
----------
-
-Gym Monitor
-~~~~~~~~~~~
-
-The ``"monitor": true`` config can be used to save Gym episode videos to the result dir. For example:
-
-.. code-block:: bash
-
- python ray/python/ray/rllib/train.py --env=PongDeterministic-v4 \
- --run=A2C --config '{"num_workers": 2, "monitor": true}'
-
- # videos will be saved in the ~/ray_results/ dir, for example
- openaigym.video.0.31401.video000000.meta.json
- openaigym.video.0.31401.video000000.mp4
- openaigym.video.0.31403.video000000.meta.json
- openaigym.video.0.31403.video000000.mp4
-
-Log Verbosity
-~~~~~~~~~~~~~
-
-You can control the agent log level via the ``"log_level"`` flag. Valid values are "INFO" (default), "DEBUG", "WARN", and "ERROR". This can be used to increase or decrease the verbosity of internal logging. For example:
-
-.. code-block:: bash
-
- python ray/python/ray/rllib/train.py --env=PongDeterministic-v4 \
- --run=A2C --config '{"num_workers": 2, "log_level": "DEBUG"}'
-
Callbacks and Custom Metrics
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -278,6 +249,10 @@ You can provide callback functions to be called at points during policy evaluati
episode.episode_id, episode.length, mean_pole_angle))
episode.custom_metrics["mean_pole_angle"] = mean_pole_angle
+ def on_train_result(info):
+ print("agent.train() result: {} -> {} episodes".format(
+ info["agent"].__name__, info["result"]["episodes_this_iter"]))
+
ray.init()
trials = tune.run_experiments({
"test": {
@@ -288,6 +263,7 @@ You can provide callback functions to be called at points during policy evaluati
"on_episode_start": tune.function(on_episode_start),
"on_episode_step": tune.function(on_episode_step),
"on_episode_end": tune.function(on_episode_end),
+ "on_train_result": tune.function(on_train_result),
},
},
}
@@ -297,6 +273,113 @@ Custom metrics can be accessed and visualized like any other training result:
.. image:: custom_metric.png
+Example: Curriculum Learning
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Let's look at two ways to use the above APIs to implement `curriculum learning `__. In curriculum learning, the agent task is adjusted over time to improve the learning process. Suppose that we have an environment class with a ``set_phase()`` method that we can call to adjust the task difficulty over time:
+
+Approach 1: Use the Agent API and update the environment between calls to ``train()``. This example shows the agent being run inside a Tune function:
+
+.. code-block:: python
+
+ import ray
+ from ray import tune
+ from ray.rllib.agents.ppo import PPOAgent
+
+ def train(config, reporter):
+ agent = PPOAgent(config=config, env=YourEnv)
+ while True:
+ result = agent.train()
+ reporter(**result)
+ if result["episode_reward_mean"] > 200:
+ phase = 2
+ elif result["episode_reward_mean"] > 100:
+ phase = 1
+ else:
+ phase = 0
+ agent.optimizer.foreach_evaluator(lambda ev: ev.env.set_phase(phase))
+
+ ray.init()
+ tune.run_experiments({
+ "curriculum": {
+ "run": train,
+ "config": {
+ "num_gpus": 0,
+ "num_workers": 2,
+ },
+ "trial_resources": {
+ "cpu": 1,
+ "gpu": lambda spec: spec.config.num_gpus,
+ "extra_cpu": lambda spec: spec.config.num_workers,
+ },
+ },
+ })
+
+Approach 2: Use the callbacks API to update the environment on new training results:
+
+.. code-block:: python
+
+ import ray
+ from ray import tune
+
+ def on_train_result(info):
+ result = info["result"]
+ if result["episode_reward_mean"] > 200:
+ phase = 2
+ elif result["episode_reward_mean"] > 100:
+ phase = 1
+ else:
+ phase = 0
+ agent = info["agent"]
+ agent.optimizer.foreach_evaluator(lambda ev: ev.env.set_phase(phase))
+
+ ray.init()
+ tune.run_experiments({
+ "curriculum": {
+ "run": "PPO",
+ "env": YourEnv,
+ "config": {
+ "callbacks": {
+ "on_train_result": tune.function(on_train_result),
+ },
+ },
+ },
+ })
+
+Debugging
+---------
+
+Gym Monitor
+~~~~~~~~~~~
+
+The ``"monitor": true`` config can be used to save Gym episode videos to the result dir. For example:
+
+.. code-block:: bash
+
+ python ray/python/ray/rllib/train.py --env=PongDeterministic-v4 \
+ --run=A2C --config '{"num_workers": 2, "monitor": true}'
+
+ # videos will be saved in the ~/ray_results/ dir, for example
+ openaigym.video.0.31401.video000000.meta.json
+ openaigym.video.0.31401.video000000.mp4
+ openaigym.video.0.31403.video000000.meta.json
+ openaigym.video.0.31403.video000000.mp4
+
+Log Verbosity
+~~~~~~~~~~~~~
+
+You can control the agent log level via the ``"log_level"`` flag. Valid values are "INFO" (default), "DEBUG", "WARN", and "ERROR". This can be used to increase or decrease the verbosity of internal logging. For example:
+
+.. code-block:: bash
+
+ python ray/python/ray/rllib/train.py --env=PongDeterministic-v4 \
+ --run=A2C --config '{"num_workers": 2, "log_level": "DEBUG"}'
+
+Stack Traces
+~~~~~~~~~~~~
+
+You can use the ``ray stack`` command to dump the stack traces of all the Python workers on a single node. This can be useful for debugging unexpected hangs or performance issues.
+
REST API
--------
diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst
index 23c69506e008..e96bd6fccbcb 100644
--- a/doc/source/rllib.rst
+++ b/doc/source/rllib.rst
@@ -15,7 +15,7 @@ RLlib has extra dependencies on top of ``ray``. First, you'll need to install ei
.. code-block:: bash
pip install tensorflow # or tensorflow-gpu
- pip install ray[rllib]
+ pip install ray[rllib] # also recommended: ray[debug]
You might also want to clone the `Ray repo `__ for convenient access to RLlib helper scripts:
@@ -56,7 +56,7 @@ Algorithms
- `Deep Deterministic Policy Gradients (DDPG, TD3) `__
- - `Deep Q Networks (DQN, Rainbow) `__
+ - `Deep Q Networks (DQN, Rainbow, Parametric DQN) `__
- `Policy Gradients `__
@@ -75,6 +75,7 @@ Models and Preprocessors
* `Custom Models `__
* `Custom Preprocessors `__
* `Customizing Policy Graphs `__
+* `Variable-length / Parametric Action Spaces `__
* `Model-Based Rollouts `__
RLlib Concepts
diff --git a/doc/source/tune.rst b/doc/source/tune.rst
index 87f28531bee8..14c95fb0edcb 100644
--- a/doc/source/tune.rst
+++ b/doc/source/tune.rst
@@ -45,7 +45,7 @@ You'll need to first `install ray `__ to import Tune.
.. code-block:: bash
- pip install ray
+ pip install ray # also recommended: ray[debug]
Quick Start
diff --git a/java/runtime/src/main/java/org/ray/runtime/AbstractRayRuntime.java b/java/runtime/src/main/java/org/ray/runtime/AbstractRayRuntime.java
index d8de9a086407..10dc172fd4d9 100644
--- a/java/runtime/src/main/java/org/ray/runtime/AbstractRayRuntime.java
+++ b/java/runtime/src/main/java/org/ray/runtime/AbstractRayRuntime.java
@@ -75,7 +75,7 @@ public RayObject put(T obj) {
public void put(UniqueId objectId, T obj) {
UniqueId taskId = workerContext.getCurrentTask().taskId;
- RayLog.core.info("Putting object {}, for task {} ", objectId, taskId);
+ RayLog.core.debug("Putting object {}, for task {} ", objectId, taskId);
objectStoreProxy.put(objectId, obj, null);
}
diff --git a/java/runtime/src/main/java/org/ray/runtime/raylet/RayletClientImpl.java b/java/runtime/src/main/java/org/ray/runtime/raylet/RayletClientImpl.java
index 9cf70c348209..91937ba14b1e 100644
--- a/java/runtime/src/main/java/org/ray/runtime/raylet/RayletClientImpl.java
+++ b/java/runtime/src/main/java/org/ray/runtime/raylet/RayletClientImpl.java
@@ -90,8 +90,8 @@ public TaskSpec getTask() {
@Override
public void fetchOrReconstruct(List objectIds, boolean fetchOnly,
UniqueId currentTaskId) {
- if (RayLog.core.isInfoEnabled()) {
- RayLog.core.info("Blocked on objects for task {}, object IDs are {}",
+ if (RayLog.core.isDebugEnabled()) {
+ RayLog.core.debug("Blocked on objects for task {}, object IDs are {}",
UniqueIdUtil.computeTaskId(objectIds.get(0)), objectIds);
}
nativeFetchOrReconstruct(client, UniqueIdUtil.getIdBytes(objectIds),
@@ -172,7 +172,7 @@ private static ByteBuffer convertTaskSpecToFlatbuffer(TaskSpec task) {
final int parentTaskIdOffset = fbb.createString(task.parentTaskId.toByteBuffer());
final int parentCounter = task.parentCounter;
final int actorCreateIdOffset = fbb.createString(task.actorCreationId.toByteBuffer());
- final int actorCreateDummyIdOffset = fbb.createString(UniqueId.NIL.toByteBuffer());
+ final int actorCreateDummyIdOffset = fbb.createString(task.actorId.toByteBuffer());
final int actorIdOffset = fbb.createString(task.actorId.toByteBuffer());
final int actorHandleIdOffset = fbb.createString(task.actorHandleId.toByteBuffer());
final int actorCounter = task.actorCounter;
diff --git a/java/test/src/main/java/org/ray/api/test/StressTest.java b/java/test/src/main/java/org/ray/api/test/StressTest.java
new file mode 100644
index 000000000000..4fab74aed199
--- /dev/null
+++ b/java/test/src/main/java/org/ray/api/test/StressTest.java
@@ -0,0 +1,98 @@
+package org.ray.api.test;
+
+import com.google.common.collect.ImmutableList;
+import java.util.ArrayList;
+import java.util.List;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.ray.api.Ray;
+import org.ray.api.RayActor;
+import org.ray.api.RayObject;
+import org.ray.api.id.UniqueId;
+
+@RunWith(MyRunner.class)
+public class StressTest {
+
+ public static int echo(int x) {
+ return x;
+ }
+
+ @Test
+ public void testSubmittingTasks() {
+ for (int numIterations : ImmutableList.of(1, 10, 100, 1000)) {
+ int numTasks = 1000 / numIterations;
+ for (int i = 0; i < numIterations; i++) {
+ List resultIds = new ArrayList<>();
+ for (int j = 0; j < numTasks; j++) {
+ resultIds.add(Ray.call(StressTest::echo, 1).getId());
+ }
+ for (Integer result : Ray.get(resultIds)) {
+ Assert.assertEquals(result, Integer.valueOf(1));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testDependency() {
+ RayObject x = Ray.call(StressTest::echo, 1);
+ for (int i = 0; i < 1000; i++) {
+ x = Ray.call(StressTest::echo, x);
+ }
+ Assert.assertEquals(x.get(), Integer.valueOf(1));
+ }
+
+ public static class Actor {
+
+ public int ping() {
+ return 1;
+ }
+ }
+
+ public static class Worker {
+
+ private RayActor actor;
+
+ public Worker(RayActor actor) {
+ this.actor = actor;
+ }
+
+ public int ping(int n) {
+ List objectIds = new ArrayList<>();
+ for (int i = 0; i < n; i++) {
+ objectIds.add(Ray.call(Actor::ping, actor).getId());
+ }
+ int sum = 0;
+ for (Integer result : Ray.get(objectIds)) {
+ sum += result;
+ }
+ return sum;
+ }
+ }
+
+ @Test
+ public void testSubmittingManyTasksToOneActor() {
+ RayActor actor = Ray.createActor(Actor::new);
+ List objectIds = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ RayActor worker = Ray.createActor(Worker::new, actor);
+ objectIds.add(Ray.call(Worker::ping, worker, 100).getId());
+ }
+ for (Integer result : Ray.get(objectIds)) {
+ Assert.assertEquals(result, Integer.valueOf(100));
+ }
+ }
+
+ @Test
+ public void testPuttingAndGettingManyObjects() {
+ Integer objectToPut = 1;
+ List> objects = new ArrayList<>();
+ for (int i = 0; i < 100_000; i++) {
+ objects.add(Ray.put(objectToPut));
+ }
+ for (RayObject object : objects) {
+ Assert.assertEquals(object.get(), objectToPut);
+ }
+ }
+}
diff --git a/python/ray/__init__.py b/python/ray/__init__.py
index a507cdd2e7a2..ed024a107aa5 100644
--- a/python/ray/__init__.py
+++ b/python/ray/__init__.py
@@ -46,6 +46,9 @@
e.args += (helpful_message, )
raise
+modin_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "modin")
+sys.path.insert(0, modin_path)
+
from ray.raylet import ObjectID, _config # noqa: E402
from ray.profiling import profile # noqa: E402
from ray.worker import (error_info, init, connect, disconnect, get, put, wait,
@@ -62,7 +65,7 @@
from ray.actor import method # noqa: E402
# Ray version string.
-__version__ = "0.5.3"
+__version__ = "0.6.0"
__all__ = [
"error_info", "init", "connect", "disconnect", "get", "put", "wait",
diff --git a/python/ray/autoscaler/aws/example-full.yaml b/python/ray/autoscaler/aws/example-full.yaml
index 0d04e0dceaee..d74d45823c21 100644
--- a/python/ray/autoscaler/aws/example-full.yaml
+++ b/python/ray/autoscaler/aws/example-full.yaml
@@ -89,9 +89,9 @@ setup_commands:
# has your Ray repo pre-cloned. Then, you can replace the pip installs
# below with a git checkout (and possibly a recompile).
- echo 'export PATH="$HOME/anaconda3/envs/tensorflow_p36/bin:$PATH"' >> ~/.bashrc
- # - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.3-cp27-cp27mu-manylinux1_x86_64.whl
- # - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.3-cp35-cp35m-manylinux1_x86_64.whl
- - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.3-cp36-cp36m-manylinux1_x86_64.whl
+ # - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp27-cp27mu-manylinux1_x86_64.whl
+ # - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp35-cp35m-manylinux1_x86_64.whl
+ - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp36-cp36m-manylinux1_x86_64.whl
# Consider uncommenting these if you also want to run apt-get commands during setup
# - sudo pkill -9 apt-get || true
# - sudo pkill -9 dpkg || true
diff --git a/python/ray/autoscaler/gcp/example-full.yaml b/python/ray/autoscaler/gcp/example-full.yaml
index a3df6ad612c2..6afbb464fa6a 100644
--- a/python/ray/autoscaler/gcp/example-full.yaml
+++ b/python/ray/autoscaler/gcp/example-full.yaml
@@ -124,9 +124,9 @@ setup_commands:
pip install
google-api-python-client==1.6.7
cython==0.27.3
- # - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.3-cp27-cp27mu-manylinux1_x86_64.whl
- # - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.3-cp35-cp35m-manylinux1_x86_64.whl
- # - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.3-cp36-cp36m-manylinux1_x86_64.whl
+ # - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp27-cp27mu-manylinux1_x86_64.whl
+ # - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp35-cp35m-manylinux1_x86_64.whl
+ # - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp36-cp36m-manylinux1_x86_64.whl
- >-
cd ~
&& git clone https://github.com/ray-project/ray || true
diff --git a/python/ray/experimental/sgd/sgd_worker.py b/python/ray/experimental/sgd/sgd_worker.py
index 3dd0eefca86d..0d4b45c7c8bc 100644
--- a/python/ray/experimental/sgd/sgd_worker.py
+++ b/python/ray/experimental/sgd/sgd_worker.py
@@ -205,9 +205,6 @@ def for_model(self, fn):
def compute_gradients(self):
start = time.time()
feed_dict = self._grad_feed_dict()
- # Aggregate feed dicts for each model on this worker.
- for model in self.models:
- feed_dict.update(model.get_feed_dict())
# We only need to fetch the first per_device_grad, since they are
# averaged across all devices by allreduce.
fetches = self.sess.run(
diff --git a/python/ray/gcs_utils.py b/python/ray/gcs_utils.py
index bbdbe04cf7fd..347f7ab9f806 100644
--- a/python/ray/gcs_utils.py
+++ b/python/ray/gcs_utils.py
@@ -11,6 +11,7 @@
from ray.core.generated.ErrorTableData import ErrorTableData
from ray.core.generated.ProfileTableData import ProfileTableData
from ray.core.generated.HeartbeatTableData import HeartbeatTableData
+from ray.core.generated.HeartbeatBatchTableData import HeartbeatBatchTableData
from ray.core.generated.DriverTableData import DriverTableData
from ray.core.generated.ObjectTableData import ObjectTableData
from ray.core.generated.ray.protocol.Task import Task
@@ -20,14 +21,16 @@
__all__ = [
"GcsTableEntry", "ClientTableData", "ErrorTableData", "HeartbeatTableData",
- "DriverTableData", "ProfileTableData", "ObjectTableData", "Task",
- "TablePrefix", "TablePubsub", "construct_error_message"
+ "HeartbeatBatchTableData", "DriverTableData", "ProfileTableData",
+ "ObjectTableData", "Task", "TablePrefix", "TablePubsub",
+ "construct_error_message"
]
FUNCTION_PREFIX = "RemoteFunction:"
# xray heartbeats
XRAY_HEARTBEAT_CHANNEL = str(TablePubsub.HEARTBEAT).encode("ascii")
+XRAY_HEARTBEAT_BATCH_CHANNEL = str(TablePubsub.HEARTBEAT_BATCH).encode("ascii")
# xray driver updates
XRAY_DRIVER_CHANNEL = str(TablePubsub.DRIVER).encode("ascii")
diff --git a/python/ray/memory_monitor.py b/python/ray/memory_monitor.py
index 23d6c12b5f9c..00cf86816dbf 100644
--- a/python/ray/memory_monitor.py
+++ b/python/ray/memory_monitor.py
@@ -56,8 +56,9 @@ def __init__(self, error_threshold=0.95, check_interval=1):
if not psutil:
logger.warning(
"WARNING: Not monitoring node memory since `psutil` is not "
- "installed. Install this with `pip install psutil` to enable "
- "debugging of memory-related crashes.")
+ "installed. Install this with `pip install psutil` "
+ "(or ray[debug]) to enable debugging of memory-related "
+ "crashes.")
def raise_if_low_memory(self):
if not psutil:
diff --git a/python/ray/monitor.py b/python/ray/monitor.py
index 625641790de9..a37f75de7cf1 100644
--- a/python/ray/monitor.py
+++ b/python/ray/monitor.py
@@ -50,11 +50,6 @@ def __init__(self,
# Setup subscriptions to the primary Redis server and the Redis shards.
self.primary_subscribe_client = self.redis.pubsub(
ignore_subscribe_messages=True)
- self.shard_subscribe_clients = []
- for redis_client in self.state.redis_clients:
- subscribe_client = redis_client.pubsub(
- ignore_subscribe_messages=True)
- self.shard_subscribe_clients.append(subscribe_client)
# Keep a mapping from local scheduler client ID to IP address to use
# for updating the load metrics.
self.local_scheduler_id_to_ip_map = {}
@@ -90,49 +85,50 @@ def __init__(self,
str(e)))
self.issue_gcs_flushes = False
- def subscribe(self, channel, primary=True):
- """Subscribe to the given channel.
+ def subscribe(self, channel):
+ """Subscribe to the given channel on the primary Redis shard.
Args:
channel (str): The channel to subscribe to.
- primary: If True, then we only subscribe to the primary Redis
- shard. Otherwise we subscribe to all of the other shards but
- not the primary.
Raises:
Exception: An exception is raised if the subscription fails.
"""
- if primary:
- self.primary_subscribe_client.subscribe(channel)
- else:
- for subscribe_client in self.shard_subscribe_clients:
- subscribe_client.subscribe(channel)
+ self.primary_subscribe_client.subscribe(channel)
- def xray_heartbeat_handler(self, unused_channel, data):
- """Handle an xray heartbeat message from Redis."""
+ def xray_heartbeat_batch_handler(self, unused_channel, data):
+ """Handle an xray heartbeat batch message from Redis."""
gcs_entries = ray.gcs_utils.GcsTableEntry.GetRootAsGcsTableEntry(
data, 0)
heartbeat_data = gcs_entries.Entries(0)
- message = ray.gcs_utils.HeartbeatTableData.GetRootAsHeartbeatTableData(
- heartbeat_data, 0)
- num_resources = message.ResourcesAvailableLabelLength()
- static_resources = {}
- dynamic_resources = {}
- for i in range(num_resources):
- dyn = message.ResourcesAvailableLabel(i)
- static = message.ResourcesTotalLabel(i)
- dynamic_resources[dyn] = message.ResourcesAvailableCapacity(i)
- static_resources[static] = message.ResourcesTotalCapacity(i)
-
- # Update the load metrics for this local scheduler.
- client_id = ray.utils.binary_to_hex(message.ClientId())
- ip = self.local_scheduler_id_to_ip_map.get(client_id)
- if ip:
- self.load_metrics.update(ip, static_resources, dynamic_resources)
- else:
- print("Warning: could not find ip for client {} in {}.".format(
- client_id, self.local_scheduler_id_to_ip_map))
+
+ message = (ray.gcs_utils.HeartbeatBatchTableData.
+ GetRootAsHeartbeatBatchTableData(heartbeat_data, 0))
+
+ for j in range(message.BatchLength()):
+ heartbeat_message = message.Batch(j)
+
+ num_resources = heartbeat_message.ResourcesAvailableLabelLength()
+ static_resources = {}
+ dynamic_resources = {}
+ for i in range(num_resources):
+ dyn = heartbeat_message.ResourcesAvailableLabel(i)
+ static = heartbeat_message.ResourcesTotalLabel(i)
+ dynamic_resources[dyn] = (
+ heartbeat_message.ResourcesAvailableCapacity(i))
+ static_resources[static] = (
+ heartbeat_message.ResourcesTotalCapacity(i))
+
+ # Update the load metrics for this local scheduler.
+ client_id = ray.utils.binary_to_hex(heartbeat_message.ClientId())
+ ip = self.local_scheduler_id_to_ip_map.get(client_id)
+ if ip:
+ self.load_metrics.update(ip, static_resources,
+ dynamic_resources)
+ else:
+ print("Warning: could not find ip for client {} in {}.".format(
+ client_id, self.local_scheduler_id_to_ip_map))
def _xray_clean_up_entries_for_driver(self, driver_id):
"""Remove this driver's object/task entries from redis.
@@ -222,8 +218,7 @@ def process_messages(self, max_messages=10000):
max_messages: The maximum number of messages to process before
returning.
"""
- subscribe_clients = (
- [self.primary_subscribe_client] + self.shard_subscribe_clients)
+ subscribe_clients = [self.primary_subscribe_client]
for subscribe_client in subscribe_clients:
for _ in range(max_messages):
message = subscribe_client.get_message()
@@ -237,9 +232,9 @@ def process_messages(self, max_messages=10000):
# Determine the appropriate message handler.
message_handler = None
- if channel == ray.gcs_utils.XRAY_HEARTBEAT_CHANNEL:
+ if channel == ray.gcs_utils.XRAY_HEARTBEAT_BATCH_CHANNEL:
# Similar functionality as local scheduler info channel
- message_handler = self.xray_heartbeat_handler
+ message_handler = self.xray_heartbeat_batch_handler
elif channel == ray.gcs_utils.XRAY_DRIVER_CHANNEL:
# Handles driver death.
message_handler = self.xray_driver_removed_handler
@@ -299,7 +294,7 @@ def run(self):
clients and cleaning up state accordingly.
"""
# Initialize the subscription channel.
- self.subscribe(ray.gcs_utils.XRAY_HEARTBEAT_CHANNEL, primary=False)
+ self.subscribe(ray.gcs_utils.XRAY_HEARTBEAT_BATCH_CHANNEL)
self.subscribe(ray.gcs_utils.XRAY_DRIVER_CHANNEL)
# TODO(rkn): If there were any dead clients at startup, we should clean
diff --git a/python/ray/rllib/agents/a3c/a3c_tf_policy_graph.py b/python/ray/rllib/agents/a3c/a3c_tf_policy_graph.py
index 6f079713abae..8aa60645aaeb 100644
--- a/python/ray/rllib/agents/a3c/a3c_tf_policy_graph.py
+++ b/python/ray/rllib/agents/a3c/a3c_tf_policy_graph.py
@@ -53,7 +53,8 @@ def __init__(self, observation_space, action_space, config):
self.model = ModelCatalog.get_model({
"obs": self.observations,
"prev_actions": prev_actions,
- "prev_rewards": prev_rewards
+ "prev_rewards": prev_rewards,
+ "is_training": self._get_is_training_placeholder(),
}, observation_space, logit_dim, self.config["model"])
action_dist = dist_class(self.model.outputs)
self.vf = self.model.value_function()
diff --git a/python/ray/rllib/agents/agent.py b/python/ray/rllib/agents/agent.py
index 18adda82d178..b84154f5bbb1 100644
--- a/python/ray/rllib/agents/agent.py
+++ b/python/ray/rllib/agents/agent.py
@@ -2,12 +2,13 @@
from __future__ import division
from __future__ import print_function
+from datetime import datetime
import copy
-import os
import logging
+import os
import pickle
+import six
import tempfile
-from datetime import datetime
import tensorflow as tf
import ray
@@ -15,7 +16,7 @@
from ray.rllib.evaluation.policy_evaluator import PolicyEvaluator
from ray.rllib.optimizers.policy_optimizer import PolicyOptimizer
from ray.rllib.utils import FilterManager, deep_update, merge_dicts
-from ray.tune.registry import ENV_CREATOR, _global_registry
+from ray.tune.registry import ENV_CREATOR, register_env, _global_registry
from ray.tune.trainable import Trainable
from ray.tune.trial import Resources
from ray.tune.logger import UnifiedLogger
@@ -40,6 +41,7 @@
"on_episode_step": None, # arg: {"env": .., "episode": ...}
"on_episode_end": None, # arg: {"env": .., "episode": ...}
"on_sample_end": None, # arg: {"samples": .., "evaluator": ...}
+ "on_train_result": None, # arg: {"agent": ..., "result": ...}
},
# === Policy ===
@@ -61,6 +63,8 @@
# Whether to clip rewards prior to experience postprocessing. Setting to
# None means clip for Atari only.
"clip_rewards": None,
+ # Whether to np.clip() actions to the action space low/high range spec.
+ "clip_actions": True,
# Whether to use rllib or deepmind preprocessors by default
"preprocessor_pref": "deepmind",
@@ -226,6 +230,7 @@ def session_creator():
num_envs=config["num_envs_per_worker"],
observation_filter=config["observation_filter"],
clip_rewards=config["clip_rewards"],
+ clip_actions=config["clip_actions"],
env_config=config["env_config"],
model_config=config["model"],
policy_config=config,
@@ -274,7 +279,7 @@ def __init__(self, config=None, env=None, logger_creator=None):
self.global_vars = {"timestep": 0}
# Agents allow env ids to be passed directly to the constructor.
- self._env_id = env or config.get("env")
+ self._env_id = _register_if_needed(env or config.get("env"))
# Create a default logger creator if no logger_creator is specified
if logger_creator is None:
@@ -316,7 +321,13 @@ def train(self):
logger.debug("synchronized filters: {}".format(
self.local_evaluator.filters))
- return Trainable.train(self)
+ result = Trainable.train(self)
+ if self.config["callbacks"].get("on_train_result"):
+ self.config["callbacks"]["on_train_result"]({
+ "agent": self,
+ "result": result,
+ })
+ return result
def _setup(self, config):
env = self._env_id
@@ -385,13 +396,11 @@ def compute_action(self, observation, state=None, policy_id="default"):
observation, update=False)
if state:
return self.local_evaluator.for_policy(
- lambda p: p.compute_single_action(
- filtered_obs, state, is_training=False),
+ lambda p: p.compute_single_action(filtered_obs, state),
policy_id=policy_id)
return self.local_evaluator.for_policy(
- lambda p: p.compute_single_action(
- filtered_obs, state, is_training=False)[0],
- policy_id=policy_id)
+ lambda p: p.compute_single_action(filtered_obs, state)[0],
+ policy_id=policy_id)
def get_weights(self, policies=None):
"""Return a dictionary of policy ids to weights.
@@ -446,6 +455,15 @@ def _restore(self, checkpoint_path):
self.__setstate__(extra_data)
+def _register_if_needed(env_object):
+ if isinstance(env_object, six.string_types):
+ return env_object
+ elif isinstance(env_object, type):
+ name = env_object.__name__
+ register_env(name, lambda config: env_object(config))
+ return name
+
+
def get_agent_class(alg):
"""Returns the class of a known agent given its name."""
diff --git a/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py b/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py
index 738c4e9ac130..eb5f14c2d1c9 100644
--- a/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py
+++ b/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py
@@ -199,7 +199,9 @@ def __init__(self, observation_space, action_space, config):
self.stochastic = tf.placeholder(tf.bool, (), name="stochastic")
self.eps = tf.placeholder(tf.float32, (), name="eps")
self.cur_observations = tf.placeholder(
- tf.float32, shape=(None, ) + observation_space.shape)
+ tf.float32,
+ shape=(None, ) + observation_space.shape,
+ name="cur_obs")
# Actor: P (policy) network
with tf.variable_scope(P_SCOPE) as scope:
@@ -236,7 +238,11 @@ def __init__(self, observation_space, action_space, config):
# p network evaluation
with tf.variable_scope(P_SCOPE, reuse=True) as scope:
+ prev_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
self.p_t = self._build_p_network(self.obs_t, observation_space)
+ p_batchnorm_update_ops = list(
+ set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) -
+ prev_update_ops)
# target p network evaluation
with tf.variable_scope(P_TARGET_SCOPE) as scope:
@@ -257,6 +263,7 @@ def __init__(self, observation_space, action_space, config):
is_target=True)
# q network evaluation
+ prev_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
with tf.variable_scope(Q_SCOPE) as scope:
q_t, model = self._build_q_network(self.obs_t, observation_space,
self.act_t)
@@ -269,6 +276,8 @@ def __init__(self, observation_space, action_space, config):
twin_q_t, twin_model = self._build_q_network(
self.obs_t, observation_space, self.act_t)
self.twin_q_func_vars = _scope_vars(scope.name)
+ q_batchnorm_update_ops = list(
+ set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - prev_update_ops)
# target q network evalution
with tf.variable_scope(Q_TARGET_SCOPE) as scope:
@@ -345,7 +354,8 @@ def __init__(self, observation_space, action_space, config):
obs_input=self.cur_observations,
action_sampler=self.output_actions,
loss=model.loss() + self.loss.total_loss,
- loss_inputs=self.loss_inputs)
+ loss_inputs=self.loss_inputs,
+ update_ops=q_batchnorm_update_ops + p_batchnorm_update_ops)
self.sess.run(tf.global_variables_initializer())
# Note that this encompasses both the policy and Q-value networks and
@@ -359,7 +369,8 @@ def __init__(self, observation_space, action_space, config):
def _build_q_network(self, obs, obs_space, actions):
q_net = QNetwork(
ModelCatalog.get_model({
- "obs": obs
+ "obs": obs,
+ "is_training": self._get_is_training_placeholder(),
}, obs_space, 1, self.config["model"]), actions,
self.config["critic_hiddens"],
self.config["critic_hidden_activation"])
@@ -368,7 +379,8 @@ def _build_q_network(self, obs, obs_space, actions):
def _build_p_network(self, obs, obs_space):
return PNetwork(
ModelCatalog.get_model({
- "obs": obs
+ "obs": obs,
+ "is_training": self._get_is_training_placeholder(),
}, obs_space, 1, self.config["model"]), self.dim_actions,
self.config["actor_hiddens"],
self.config["actor_hidden_activation"]).action_scores
diff --git a/python/ray/rllib/agents/dqn/dqn_policy_graph.py b/python/ray/rllib/agents/dqn/dqn_policy_graph.py
index 6125cd9d387b..c883ef25067d 100644
--- a/python/ray/rllib/agents/dqn/dqn_policy_graph.py
+++ b/python/ray/rllib/agents/dqn/dqn_policy_graph.py
@@ -30,16 +30,21 @@ def __init__(self,
sigma0=0.5):
self.model = model
with tf.variable_scope("action_value"):
- action_out = model.last_layer
- for i in range(len(hiddens)):
- if use_noisy:
- action_out = self.noisy_layer("hidden_%d" % i, action_out,
- hiddens[i], sigma0)
- else:
- action_out = layers.fully_connected(
- action_out,
- num_outputs=hiddens[i],
- activation_fn=tf.nn.relu)
+ if hiddens:
+ action_out = model.last_layer
+ for i in range(len(hiddens)):
+ if use_noisy:
+ action_out = self.noisy_layer(
+ "hidden_%d" % i, action_out, hiddens[i], sigma0)
+ else:
+ action_out = layers.fully_connected(
+ action_out,
+ num_outputs=hiddens[i],
+ activation_fn=tf.nn.relu)
+ else:
+ # Avoid postprocessing the outputs. This enables custom models
+ # to be used for parametric action DQN.
+ action_out = model.outputs
if use_noisy:
action_scores = self.noisy_layer(
"output",
@@ -47,11 +52,13 @@ def __init__(self,
num_actions * num_atoms,
sigma0,
non_linear=False)
- else:
+ elif hiddens:
action_scores = layers.fully_connected(
action_out,
num_outputs=num_actions * num_atoms,
activation_fn=None)
+ else:
+ action_scores = model.outputs
if num_atoms > 1:
# Distributional Q-learning uses a discrete support z
# to represent the action value distribution
@@ -107,7 +114,7 @@ def __init__(self,
self.logits = support_logits_per_action
self.dist = support_prob_per_action
else:
- action_scores_mean = tf.reduce_mean(action_scores, 1)
+ action_scores_mean = _reduce_mean_ignore_inf(action_scores, 1)
action_scores_centered = action_scores - tf.expand_dims(
action_scores_mean, 1)
self.value = state_score + action_scores_centered
@@ -176,11 +183,15 @@ class QValuePolicy(object):
def __init__(self, q_values, observations, num_actions, stochastic, eps):
deterministic_actions = tf.argmax(q_values, axis=1)
batch_size = tf.shape(observations)[0]
- random_actions = tf.random_uniform(
- tf.stack([batch_size]),
- minval=0,
- maxval=num_actions,
- dtype=tf.int64)
+
+ # Special case masked out actions (q_value ~= -inf) so that we don't
+ # even consider them for exploration.
+ random_valid_action_logits = tf.where(
+ tf.equal(q_values, tf.float32.min),
+ tf.ones_like(q_values) * tf.float32.min, tf.ones_like(q_values))
+ random_actions = tf.squeeze(
+ tf.multinomial(random_valid_action_logits, 1), axis=1)
+
chose_random = tf.random_uniform(
tf.stack([batch_size]), minval=0, maxval=1, dtype=tf.float32) < eps
stochastic_actions = tf.where(chose_random, random_actions,
@@ -295,8 +306,12 @@ def __init__(self, observation_space, action_space, config):
# q network evaluation
with tf.variable_scope(Q_SCOPE, reuse=True):
+ prev_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
q_t, q_logits_t, q_dist_t, model = self._build_q_network(
self.obs_t, observation_space)
+ q_batchnorm_update_ops = list(
+ set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) -
+ prev_update_ops)
# target q network evalution
with tf.variable_scope(Q_TARGET_SCOPE) as scope:
@@ -361,15 +376,17 @@ def __init__(self, observation_space, action_space, config):
obs_input=self.cur_observations,
action_sampler=self.output_actions,
loss=model.loss() + self.loss.loss,
- loss_inputs=self.loss_inputs)
+ loss_inputs=self.loss_inputs,
+ update_ops=q_batchnorm_update_ops)
self.sess.run(tf.global_variables_initializer())
def _build_q_network(self, obs, space):
qnet = QNetwork(
ModelCatalog.get_model({
- "obs": obs
- }, space, 1, self.config["model"]), self.num_actions,
- self.config["dueling"], self.config["hiddens"],
+ "obs": obs,
+ "is_training": self._get_is_training_placeholder(),
+ }, space, self.num_actions, self.config["model"]),
+ self.num_actions, self.config["dueling"], self.config["hiddens"],
self.config["noisy"], self.config["num_atoms"],
self.config["v_min"], self.config["v_max"], self.config["sigma0"])
return qnet.value, qnet.logits, qnet.dist, qnet.model
@@ -507,6 +524,14 @@ def _postprocess_dqn(policy_graph, sample_batch):
return batch
+def _reduce_mean_ignore_inf(x, axis):
+ """Same as tf.reduce_mean() but ignores -inf values."""
+ mask = tf.not_equal(x, tf.float32.min)
+ x_zeroed = tf.where(mask, x, tf.zeros_like(x))
+ return (tf.reduce_sum(x_zeroed, axis) / tf.reduce_sum(
+ tf.cast(mask, tf.float32), axis))
+
+
def _huber_loss(x, delta=1.0):
"""Reference: https://en.wikipedia.org/wiki/Huber_loss"""
return tf.where(
diff --git a/python/ray/rllib/agents/impala/vtrace_policy_graph.py b/python/ray/rllib/agents/impala/vtrace_policy_graph.py
index 3d9e4214b7c7..cfa2f1373aae 100644
--- a/python/ray/rllib/agents/impala/vtrace_policy_graph.py
+++ b/python/ray/rllib/agents/impala/vtrace_policy_graph.py
@@ -133,6 +133,7 @@ def __init__(self,
"obs": observations,
"prev_actions": prev_actions,
"prev_rewards": prev_rewards,
+ "is_training": self._get_is_training_placeholder(),
},
observation_space,
logit_dim,
diff --git a/python/ray/rllib/agents/pg/pg_policy_graph.py b/python/ray/rllib/agents/pg/pg_policy_graph.py
index 8cbb3a588b49..2a342c117fb3 100644
--- a/python/ray/rllib/agents/pg/pg_policy_graph.py
+++ b/python/ray/rllib/agents/pg/pg_policy_graph.py
@@ -35,7 +35,8 @@ def __init__(self, obs_space, action_space, config):
self.model = ModelCatalog.get_model({
"obs": obs,
"prev_actions": prev_actions,
- "prev_rewards": prev_rewards
+ "prev_rewards": prev_rewards,
+ "is_training": self._get_is_training_placeholder(),
}, obs_space, self.logit_dim, self.config["model"])
action_dist = dist_class(self.model.outputs) # logit for each action
diff --git a/python/ray/rllib/agents/ppo/ppo.py b/python/ray/rllib/agents/ppo/ppo.py
index eb556877c5a7..d5e50832f451 100644
--- a/python/ray/rllib/agents/ppo/ppo.py
+++ b/python/ray/rllib/agents/ppo/ppo.py
@@ -24,7 +24,7 @@
"sample_batch_size": 200,
# Number of timesteps collected for each SGD round
"train_batch_size": 4000,
- # Total SGD batch size across all devices for SGD (multi-gpu only)
+ # Total SGD batch size across all devices for SGD
"sgd_minibatch_size": 128,
# Number of SGD iterations in each outer loop
"num_sgd_iter": 30,
@@ -49,7 +49,8 @@
"batch_mode": "truncate_episodes",
# Which observation filter to apply to the observation
"observation_filter": "MeanStdFilter",
- # Use the sync samples optimizer instead of the multi-gpu one
+ # Uses the sync samples optimizer instead of the multi-gpu one. This does
+ # not support minibatches.
"simple_optimizer": False,
})
# __sphinx_doc_end__
@@ -110,6 +111,11 @@ def _validate_config(self):
and not self.config["simple_optimizer"]):
logger.warn("forcing simple_optimizer=True in multi-agent mode")
self.config["simple_optimizer"] = True
+ if self.config["observation_filter"] != "NoFilter":
+ # TODO(ekl): consider setting the default to be NoFilter
+ logger.warn(
+ "By default, observations will be normalized with {}".format(
+ self.config["observation_filter"]))
def _train(self):
prev_steps = self.optimizer.num_steps_sampled
diff --git a/python/ray/rllib/agents/ppo/ppo_policy_graph.py b/python/ray/rllib/agents/ppo/ppo_policy_graph.py
index f43a336253be..3762f16f9084 100644
--- a/python/ray/rllib/agents/ppo/ppo_policy_graph.py
+++ b/python/ray/rllib/agents/ppo/ppo_policy_graph.py
@@ -158,7 +158,8 @@ def __init__(self,
{
"obs": obs_ph,
"prev_actions": prev_actions_ph,
- "prev_rewards": prev_rewards_ph
+ "prev_rewards": prev_rewards_ph,
+ "is_training": self._get_is_training_placeholder(),
},
observation_space,
logit_dim,
@@ -191,7 +192,8 @@ def __init__(self,
self.value_function = ModelCatalog.get_model({
"obs": obs_ph,
"prev_actions": prev_actions_ph,
- "prev_rewards": prev_rewards_ph
+ "prev_rewards": prev_rewards_ph,
+ "is_training": self._get_is_training_placeholder(),
}, observation_space, 1, vf_config).outputs
self.value_function = tf.reshape(self.value_function, [-1])
else:
diff --git a/python/ray/rllib/evaluation/policy_evaluator.py b/python/ray/rllib/evaluation/policy_evaluator.py
index db5f7ee887b2..33d5ee219ca1 100644
--- a/python/ray/rllib/evaluation/policy_evaluator.py
+++ b/python/ray/rllib/evaluation/policy_evaluator.py
@@ -100,6 +100,7 @@ def __init__(self,
num_envs=1,
observation_filter="NoFilter",
clip_rewards=None,
+ clip_actions=True,
env_config=None,
model_config=None,
policy_config=None,
@@ -155,6 +156,8 @@ def __init__(self,
clip_rewards (bool): Whether to clip rewards to [-1, 1] prior to
experience postprocessing. Setting to None means clip for Atari
only.
+ clip_actions (bool): Whether to clip action values to the range
+ specified by the policy action space.
env_config (dict): Config to pass to the env creator.
model_config (dict): Config to use when creating the policy model.
policy_config (dict): Config to pass to the policy. In the
@@ -289,7 +292,8 @@ def make_env(vector_index):
self.callbacks,
horizon=episode_horizon,
pack=pack_episodes,
- tf_sess=self.tf_sess)
+ tf_sess=self.tf_sess,
+ clip_actions=clip_actions)
self.sampler.start()
else:
self.sampler = SyncSampler(
@@ -302,7 +306,8 @@ def make_env(vector_index):
self.callbacks,
horizon=episode_horizon,
pack=pack_episodes,
- tf_sess=self.tf_sess)
+ tf_sess=self.tf_sess,
+ clip_actions=clip_actions)
logger.debug("Created evaluator with env {} ({}), policies {}".format(
self.async_env, self.env, self.policy_map))
diff --git a/python/ray/rllib/evaluation/policy_graph.py b/python/ray/rllib/evaluation/policy_graph.py
index 9de59d269a03..c19da286b0b9 100644
--- a/python/ray/rllib/evaluation/policy_graph.py
+++ b/python/ray/rllib/evaluation/policy_graph.py
@@ -42,7 +42,6 @@ def compute_actions(self,
state_batches,
prev_action_batch=None,
prev_reward_batch=None,
- is_training=False,
episodes=None):
"""Compute actions for the current policy.
@@ -51,7 +50,6 @@ def compute_actions(self,
state_batches (list): list of RNN state input batches, if any
prev_action_batch (np.ndarray): batch of previous action values
prev_reward_batch (np.ndarray): batch of previous rewards
- is_training (bool): whether we are training the policy
episodes (list): MultiAgentEpisode for each obs in obs_batch.
This provides access to all of the internal episode state,
which may be useful for model-based or multiagent algorithms.
@@ -71,7 +69,6 @@ def compute_single_action(self,
state,
prev_action_batch=None,
prev_reward_batch=None,
- is_training=False,
episode=None):
"""Unbatched version of compute_actions.
@@ -80,7 +77,6 @@ def compute_single_action(self,
state_batches (list): list of RNN state inputs, if any
prev_action_batch (np.ndarray): batch of previous action values
prev_reward_batch (np.ndarray): batch of previous rewards
- is_training (bool): whether we are training the policy
episode (MultiAgentEpisode): this provides access to all of the
internal episode state, which may be useful for model-based or
multi-agent algorithms.
@@ -92,7 +88,7 @@ def compute_single_action(self,
"""
[action], state_out, info = self.compute_actions(
- [obs], [[s] for s in state], is_training, episodes=[episode])
+ [obs], [[s] for s in state], episodes=[episode])
return action, [s[0] for s in state_out], \
{k: v[0] for k, v in info.items()}
diff --git a/python/ray/rllib/evaluation/sampler.py b/python/ray/rllib/evaluation/sampler.py
index 2fd2fc4e272a..0bda18bc0361 100644
--- a/python/ray/rllib/evaluation/sampler.py
+++ b/python/ray/rllib/evaluation/sampler.py
@@ -2,6 +2,7 @@
from __future__ import division
from __future__ import print_function
+import gym
from collections import defaultdict, namedtuple
import logging
import numpy as np
@@ -47,7 +48,8 @@ def __init__(self,
callbacks,
horizon=None,
pack=False,
- tf_sess=None):
+ tf_sess=None,
+ clip_actions=True):
self.async_vector_env = AsyncVectorEnv.wrap_async(env)
self.unroll_length = unroll_length
self.horizon = horizon
@@ -58,7 +60,8 @@ def __init__(self,
self.rollout_provider = _env_runner(
self.async_vector_env, self.extra_batches.put, self.policies,
self.policy_mapping_fn, self.unroll_length, self.horizon,
- self._obs_filters, clip_rewards, pack, callbacks, tf_sess)
+ self._obs_filters, clip_rewards, clip_actions, pack, callbacks,
+ tf_sess)
self.metrics_queue = queue.Queue()
def get_data(self):
@@ -104,7 +107,8 @@ def __init__(self,
callbacks,
horizon=None,
pack=False,
- tf_sess=None):
+ tf_sess=None,
+ clip_actions=True):
for _, f in obs_filters.items():
assert getattr(f, "is_concurrent", False), \
"Observation Filter must support concurrent updates."
@@ -123,6 +127,7 @@ def __init__(self,
self.pack = pack
self.tf_sess = tf_sess
self.callbacks = callbacks
+ self.clip_actions = clip_actions
def run(self):
try:
@@ -135,8 +140,8 @@ def _run(self):
rollout_provider = _env_runner(
self.async_vector_env, self.extra_batches.put, self.policies,
self.policy_mapping_fn, self.unroll_length, self.horizon,
- self._obs_filters, self.clip_rewards, self.pack, self.callbacks,
- self.tf_sess)
+ self._obs_filters, self.clip_rewards, self.clip_actions, self.pack,
+ self.callbacks, self.tf_sess)
while True:
# The timeout variable exists because apparently, if one worker
# dies, the other workers won't die with it, unless the timeout is
@@ -197,6 +202,7 @@ def _env_runner(async_vector_env,
horizon,
obs_filters,
clip_rewards,
+ clip_actions,
pack,
callbacks,
tf_sess=None):
@@ -217,6 +223,7 @@ def _env_runner(async_vector_env,
clip_rewards (bool): Whether to clip rewards before postprocessing.
pack (bool): Whether to pack multiple episodes into each batch. This
guarantees batches will be exactly `unroll_length` in size.
+ clip_actions (bool): Whether to clip actions to the space range.
callbacks (dict): User callbacks to run on episode events.
tf_sess (Session|None): Optional tensorflow session to use for batching
TF policy evaluations.
@@ -272,7 +279,7 @@ def new_episode():
# Do batched policy eval
eval_results = _do_policy_eval(tf_sess, to_eval, policies,
- active_episodes)
+ active_episodes, clip_actions)
# Process results and update episode state
actions_to_send = _process_policy_eval_results(
@@ -413,7 +420,7 @@ def _process_observations(async_vector_env, policies, batch_builder_pool,
return active_envs, to_eval, outputs
-def _do_policy_eval(tf_sess, to_eval, policies, active_episodes):
+def _do_policy_eval(tf_sess, to_eval, policies, active_episodes, clip_actions):
"""Call compute actions on observation batches to get next actions.
Returns:
@@ -436,20 +443,25 @@ def _do_policy_eval(tf_sess, to_eval, policies, active_episodes):
builder, [t.obs for t in eval_data],
rnn_in_cols,
prev_action_batch=[t.prev_action for t in eval_data],
- prev_reward_batch=[t.prev_reward for t in eval_data],
- is_training=True)
+ prev_reward_batch=[t.prev_reward for t in eval_data])
else:
eval_results[policy_id] = policy.compute_actions(
[t.obs for t in eval_data],
rnn_in_cols,
prev_action_batch=[t.prev_action for t in eval_data],
prev_reward_batch=[t.prev_reward for t in eval_data],
- is_training=True,
episodes=[active_episodes[t.env_id] for t in eval_data])
if builder:
for k, v in pending_fetches.items():
eval_results[k] = builder.get(v)
+ if clip_actions:
+ for policy_id, results in eval_results.items():
+ policy = _get_or_raise(policies, policy_id)
+ actions, rnn_out_cols, pi_info_cols = results
+ eval_results[policy_id] = (_clip_actions(
+ actions, policy.action_space), rnn_out_cols, pi_info_cols)
+
return eval_results
@@ -518,6 +530,31 @@ def _fetch_atari_metrics(async_vector_env):
return atari_out
+def _clip_actions(actions, space):
+ """Called to clip actions to the specified range of this policy.
+
+ Arguments:
+ actions: Batch of actions or TupleActions.
+ space: Action space the actions should be present in.
+
+ Returns:
+ Clipped batch of actions.
+ """
+
+ if isinstance(space, gym.spaces.Box):
+ return np.clip(actions, space.low, space.high)
+ elif isinstance(space, gym.spaces.Tuple):
+ if not isinstance(actions, TupleActions):
+ raise ValueError("Expected tuple space for actions {}: {}".format(
+ actions, space))
+ out = []
+ for a, s in zip(actions.batches, space.spaces):
+ out.append(_clip_actions(a, s))
+ return TupleActions(out)
+ else:
+ return actions
+
+
def _unbatch_tuple_actions(action_batch):
# convert list of batches -> batch of lists
if isinstance(action_batch, TupleActions):
diff --git a/python/ray/rllib/evaluation/tf_policy_graph.py b/python/ray/rllib/evaluation/tf_policy_graph.py
index 40e540013fef..95e7a5d66bcb 100644
--- a/python/ray/rllib/evaluation/tf_policy_graph.py
+++ b/python/ray/rllib/evaluation/tf_policy_graph.py
@@ -30,7 +30,7 @@ class TFPolicyGraph(PolicyGraph):
Examples:
>>> policy = TFPolicyGraphSubclass(
- sess, obs_input, action_sampler, loss, loss_inputs, is_training)
+ sess, obs_input, action_sampler, loss, loss_inputs)
>>> print(policy.compute_actions([1, 0, 2]))
(array([0, 1, 1]), [], {})
@@ -53,7 +53,8 @@ def __init__(self,
prev_reward_input=None,
seq_lens=None,
max_seq_len=20,
- batch_divisibility_req=1):
+ batch_divisibility_req=1,
+ update_ops=None):
"""Initialize the policy graph.
Arguments:
@@ -82,6 +83,9 @@ def __init__(self,
batch_divisibility_req (int): pad all agent experiences batches to
multiples of this value. This only has an effect if not using
a LSTM model.
+ update_ops (list): override the batchnorm update ops to run when
+ applying gradients. Otherwise we run all update ops found in
+ the current variable scope.
"""
self.observation_space = observation_space
@@ -94,7 +98,7 @@ def __init__(self,
self._loss = loss
self._loss_inputs = loss_inputs
self._loss_input_dict = dict(self._loss_inputs)
- self._is_training = tf.placeholder_with_default(True, ())
+ self._is_training = self._get_is_training_placeholder()
self._state_inputs = state_inputs or []
self._state_outputs = state_outputs or []
for i, ph in enumerate(self._state_inputs):
@@ -108,14 +112,24 @@ def __init__(self,
for (g, v) in self.gradients(self._optimizer)
if g is not None]
self._grads = [g for (g, v) in self._grads_and_vars]
- # specify global_step for TD3 which needs to count the num updates
- self._apply_op = self._optimizer.apply_gradients(
- self._grads_and_vars,
- global_step=tf.train.get_or_create_global_step())
-
self._variables = ray.experimental.TensorFlowVariables(
self._loss, self._sess)
+ # gather update ops for any batch norm layers
+ if update_ops:
+ self._update_ops = update_ops
+ else:
+ self._update_ops = tf.get_collection(
+ tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name)
+ if self._update_ops:
+ logger.debug("Update ops to run on apply gradient: {}".format(
+ self._update_ops))
+ with tf.control_dependencies(self._update_ops):
+ # specify global_step for TD3 which needs to count the num updates
+ self._apply_op = self._optimizer.apply_gradients(
+ self._grads_and_vars,
+ global_step=tf.train.get_or_create_global_step())
+
if len(self._state_inputs) != len(self._state_outputs):
raise ValueError(
"Number of state input and output tensors must match, got: "
@@ -138,7 +152,6 @@ def build_compute_actions(self,
state_batches=None,
prev_action_batch=None,
prev_reward_batch=None,
- is_training=False,
episodes=None):
state_batches = state_batches or []
assert len(self._state_inputs) == len(state_batches), \
@@ -151,7 +164,7 @@ def build_compute_actions(self,
builder.add_feed_dict({self._prev_action_input: prev_action_batch})
if self._prev_reward_input is not None and prev_reward_batch:
builder.add_feed_dict({self._prev_reward_input: prev_reward_batch})
- builder.add_feed_dict({self._is_training: is_training})
+ builder.add_feed_dict({self._is_training: False})
builder.add_feed_dict(dict(zip(self._state_inputs, state_batches)))
fetches = builder.add_fetches([self._sampler] + self._state_outputs +
[self.extra_compute_action_fetches()])
@@ -162,12 +175,11 @@ def compute_actions(self,
state_batches=None,
prev_action_batch=None,
prev_reward_batch=None,
- is_training=False,
episodes=None):
builder = TFRunBuilder(self._sess, "compute_actions")
fetches = self.build_compute_actions(builder, obs_batch, state_batches,
prev_action_batch,
- prev_reward_batch, is_training)
+ prev_reward_batch)
return builder.get(fetches)
def _get_loss_inputs_dict(self, batch):
@@ -287,6 +299,15 @@ def gradients(self, optimizer):
def loss_inputs(self):
return self._loss_inputs
+ def _get_is_training_placeholder(self):
+ """Get the placeholder for _is_training, i.e., for batch norm layers.
+
+ This can be called safely before __init__ has run.
+ """
+ if not hasattr(self, "_is_training"):
+ self._is_training = tf.placeholder_with_default(False, ())
+ return self._is_training
+
class LearningRateSchedule(object):
"""Mixin for TFPolicyGraph that adds a learning rate schedule."""
diff --git a/python/ray/rllib/evaluation/torch_policy_graph.py b/python/ray/rllib/evaluation/torch_policy_graph.py
index cb990c36f8bf..a762927bab44 100644
--- a/python/ray/rllib/evaluation/torch_policy_graph.py
+++ b/python/ray/rllib/evaluation/torch_policy_graph.py
@@ -72,7 +72,6 @@ def compute_actions(self,
state_batches=None,
prev_action_batch=None,
prev_reward_batch=None,
- is_training=False,
episodes=None):
if state_batches:
raise NotImplementedError("Torch RNN support")
diff --git a/python/ray/rllib/examples/batch_norm_model.py b/python/ray/rllib/examples/batch_norm_model.py
new file mode 100644
index 000000000000..abd4b53666a2
--- /dev/null
+++ b/python/ray/rllib/examples/batch_norm_model.py
@@ -0,0 +1,64 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+"""Example of using a custom model with batch norm."""
+
+import argparse
+
+import tensorflow as tf
+import tensorflow.contrib.slim as slim
+
+import ray
+from ray.rllib.models import Model, ModelCatalog
+from ray.rllib.models.misc import normc_initializer
+from ray.tune import run_experiments
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--num-iters", type=int, default=200)
+parser.add_argument("--run", type=str, default="PPO")
+
+
+class BatchNormModel(Model):
+ def _build_layers_v2(self, input_dict, num_outputs, options):
+ last_layer = input_dict["obs"]
+ hiddens = [256, 256]
+ for i, size in enumerate(hiddens):
+ label = "fc{}".format(i)
+ last_layer = slim.fully_connected(
+ last_layer,
+ size,
+ weights_initializer=normc_initializer(1.0),
+ activation_fn=tf.nn.tanh,
+ scope=label)
+ # Add a batch norm layer
+ last_layer = tf.layers.batch_normalization(
+ last_layer, training=input_dict["is_training"])
+ output = slim.fully_connected(
+ last_layer,
+ num_outputs,
+ weights_initializer=normc_initializer(0.01),
+ activation_fn=None,
+ scope="fc_out")
+ return output, last_layer
+
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+ ray.init()
+
+ ModelCatalog.register_custom_model("bn_model", BatchNormModel)
+ run_experiments({
+ "batch_norm_demo": {
+ "run": args.run,
+ "env": "Pendulum-v0" if args.run == "DDPG" else "CartPole-v0",
+ "stop": {
+ "training_iteration": args.num_iters
+ },
+ "config": {
+ "model": {
+ "custom_model": "bn_model",
+ },
+ "num_workers": 0,
+ },
+ },
+ })
diff --git a/python/ray/rllib/examples/carla/a3c_lane_keep.py b/python/ray/rllib/examples/carla/a3c_lane_keep.py
deleted file mode 100644
index 9629808ba4c7..000000000000
--- a/python/ray/rllib/examples/carla/a3c_lane_keep.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import ray
-from ray.tune import register_env, run_experiments
-
-from env import CarlaEnv, ENV_CONFIG
-from models import register_carla_model
-from scenarios import LANE_KEEP
-
-env_name = "carla_env"
-env_config = ENV_CONFIG.copy()
-env_config.update({
- "verbose": False,
- "x_res": 80,
- "y_res": 80,
- "use_depth_camera": False,
- "discrete_actions": False,
- "server_map": "/Game/Maps/Town02",
- "reward_function": "lane_keep",
- "enable_planner": False,
- "scenarios": [LANE_KEEP],
-})
-
-register_env(env_name, lambda env_config: CarlaEnv(env_config))
-register_carla_model()
-
-ray.init()
-run_experiments({
- "carla-a3c": {
- "run": "A3C",
- "env": "carla_env",
- "config": {
- "env_config": env_config,
- "model": {
- "custom_model": "carla",
- "custom_options": {
- "image_shape": [80, 80, 6],
- },
- "conv_filters": [
- [16, [8, 8], 4],
- [32, [4, 4], 2],
- [512, [10, 10], 1],
- ],
- },
- "gamma": 0.8,
- "num_workers": 1,
- },
- },
-})
diff --git a/python/ray/rllib/examples/carla/dqn_lane_keep.py b/python/ray/rllib/examples/carla/dqn_lane_keep.py
deleted file mode 100644
index 84fed98cd5f9..000000000000
--- a/python/ray/rllib/examples/carla/dqn_lane_keep.py
+++ /dev/null
@@ -1,53 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import ray
-from ray.tune import register_env, run_experiments
-
-from env import CarlaEnv, ENV_CONFIG
-from models import register_carla_model
-from scenarios import LANE_KEEP
-
-env_name = "carla_env"
-env_config = ENV_CONFIG.copy()
-env_config.update({
- "verbose": False,
- "x_res": 80,
- "y_res": 80,
- "use_depth_camera": False,
- "discrete_actions": True,
- "server_map": "/Game/Maps/Town02",
- "reward_function": "lane_keep",
- "enable_planner": False,
- "scenarios": [LANE_KEEP],
-})
-
-register_env(env_name, lambda env_config: CarlaEnv(env_config))
-register_carla_model()
-
-ray.init()
-run_experiments({
- "carla-dqn": {
- "run": "DQN",
- "env": "carla_env",
- "config": {
- "env_config": env_config,
- "model": {
- "custom_model": "carla",
- "custom_options": {
- "image_shape": [80, 80, 6],
- },
- "conv_filters": [
- [16, [8, 8], 4],
- [32, [4, 4], 2],
- [512, [10, 10], 1],
- ],
- },
- "timesteps_per_iteration": 100,
- "learning_starts": 1000,
- "schedule_max_timesteps": 100000,
- "gamma": 0.8,
- },
- },
-})
diff --git a/python/ray/rllib/examples/carla/ppo_lane_keep.py b/python/ray/rllib/examples/carla/ppo_lane_keep.py
deleted file mode 100644
index ac0f6ff8aff0..000000000000
--- a/python/ray/rllib/examples/carla/ppo_lane_keep.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import ray
-from ray.tune import register_env, run_experiments
-
-from env import CarlaEnv, ENV_CONFIG
-from models import register_carla_model
-from scenarios import LANE_KEEP
-
-env_name = "carla_env"
-env_config = ENV_CONFIG.copy()
-env_config.update({
- "verbose": False,
- "x_res": 80,
- "y_res": 80,
- "use_depth_camera": False,
- "discrete_actions": False,
- "server_map": "/Game/Maps/Town02",
- "reward_function": "lane_keep",
- "enable_planner": False,
- "scenarios": [LANE_KEEP],
-})
-
-register_env(env_name, lambda env_config: CarlaEnv(env_config))
-register_carla_model()
-
-ray.init()
-run_experiments({
- "carla-ppo": {
- "run": "PPO",
- "env": "carla_env",
- "config": {
- "env_config": env_config,
- "model": {
- "custom_model": "carla",
- "custom_options": {
- "image_shape": [80, 80, 6],
- },
- "conv_filters": [
- [16, [8, 8], 4],
- [32, [4, 4], 2],
- [512, [10, 10], 1],
- ],
- },
- "num_workers": 1,
- "timesteps_per_batch": 2000,
- "min_steps_per_task": 100,
- "lambda": 0.95,
- "clip_param": 0.2,
- "num_sgd_iter": 20,
- "sgd_stepsize": 0.0001,
- "sgd_batchsize": 32,
- "devices": ["/gpu:0"],
- "tf_session_args": {
- "gpu_options": {
- "allow_growth": True
- }
- }
- },
- },
-})
diff --git a/python/ray/rllib/examples/carla/train_a3c.py b/python/ray/rllib/examples/carla/train_a3c.py
index 2c12cd8245cf..8fbcfbc576d1 100644
--- a/python/ray/rllib/examples/carla/train_a3c.py
+++ b/python/ray/rllib/examples/carla/train_a3c.py
@@ -3,13 +3,12 @@
from __future__ import print_function
import ray
-from ray.tune import grid_search, register_env, run_experiments
+from ray.tune import grid_search, run_experiments
from env import CarlaEnv, ENV_CONFIG
from models import register_carla_model
from scenarios import TOWN2_STRAIGHT
-env_name = "carla_env"
env_config = ENV_CONFIG.copy()
env_config.update({
"verbose": False,
@@ -23,7 +22,6 @@
"scenarios": TOWN2_STRAIGHT,
})
-register_env(env_name, lambda env_config: CarlaEnv(env_config))
register_carla_model()
redis_address = ray.services.get_node_ip_address() + ":6379"
@@ -31,7 +29,7 @@
run_experiments({
"carla-a3c": {
"run": "A3C",
- "env": "carla_env",
+ "env": CarlaEnv,
"config": {
"env_config": env_config,
"use_gpu_for_workers": True,
diff --git a/python/ray/rllib/examples/carla/train_dqn.py b/python/ray/rllib/examples/carla/train_dqn.py
index fa2dba1053aa..27aa65444d38 100644
--- a/python/ray/rllib/examples/carla/train_dqn.py
+++ b/python/ray/rllib/examples/carla/train_dqn.py
@@ -3,13 +3,12 @@
from __future__ import print_function
import ray
-from ray.tune import register_env, run_experiments
+from ray.tune import run_experiments
from env import CarlaEnv, ENV_CONFIG
from models import register_carla_model
from scenarios import TOWN2_ONE_CURVE
-env_name = "carla_env"
env_config = ENV_CONFIG.copy()
env_config.update({
"verbose": False,
@@ -21,7 +20,6 @@
"scenarios": TOWN2_ONE_CURVE,
})
-register_env(env_name, lambda env_config: CarlaEnv(env_config))
register_carla_model()
ray.init()
@@ -35,7 +33,7 @@ def shape_out(spec):
run_experiments({
"carla-dqn": {
"run": "DQN",
- "env": "carla_env",
+ "env": CarlaEnv,
"config": {
"env_config": env_config,
"model": {
diff --git a/python/ray/rllib/examples/carla/train_ppo.py b/python/ray/rllib/examples/carla/train_ppo.py
index a9339ca79481..6c49240142c2 100644
--- a/python/ray/rllib/examples/carla/train_ppo.py
+++ b/python/ray/rllib/examples/carla/train_ppo.py
@@ -3,13 +3,12 @@
from __future__ import print_function
import ray
-from ray.tune import register_env, run_experiments
+from ray.tune import run_experiments
from env import CarlaEnv, ENV_CONFIG
from models import register_carla_model
from scenarios import TOWN2_STRAIGHT
-env_name = "carla_env"
env_config = ENV_CONFIG.copy()
env_config.update({
"verbose": False,
@@ -20,14 +19,13 @@
"server_map": "/Game/Maps/Town02",
"scenarios": TOWN2_STRAIGHT,
})
-register_env(env_name, lambda env_config: CarlaEnv(env_config))
register_carla_model()
ray.init(redirect_output=True)
run_experiments({
"carla": {
"run": "PPO",
- "env": "carla_env",
+ "env": CarlaEnv,
"config": {
"env_config": env_config,
"model": {
diff --git a/python/ray/rllib/examples/custom_env.py b/python/ray/rllib/examples/custom_env.py
index 66c0288081f9..0d96eef6acb6 100644
--- a/python/ray/rllib/examples/custom_env.py
+++ b/python/ray/rllib/examples/custom_env.py
@@ -11,7 +11,6 @@
import ray
from ray.tune import run_experiments
-from ray.tune.registry import register_env
class SimpleCorridor(gym.Env):
@@ -42,13 +41,13 @@ def step(self, action):
if __name__ == "__main__":
- env_creator_name = "corridor"
- register_env(env_creator_name, lambda config: SimpleCorridor(config))
+ # Can also register the env creator function explicitly with:
+ # register_env("corridor", lambda config: SimpleCorridor(config))
ray.init()
run_experiments({
"demo": {
"run": "PPO",
- "env": "corridor",
+ "env": SimpleCorridor, # or "corridor" if registered above
"config": {
"env_config": {
"corridor_length": 5,
diff --git a/python/ray/rllib/examples/custom_metrics_and_callbacks.py b/python/ray/rllib/examples/custom_metrics_and_callbacks.py
index eec7bffb571f..c92ae8783748 100644
--- a/python/ray/rllib/examples/custom_metrics_and_callbacks.py
+++ b/python/ray/rllib/examples/custom_metrics_and_callbacks.py
@@ -35,6 +35,13 @@ def on_sample_end(info):
print("returned sample batch of size {}".format(info["samples"].count))
+def on_train_result(info):
+ print("agent.train() result: {} -> {} episodes".format(
+ info["agent"], info["result"]["episodes_this_iter"]))
+ # you can mutate the result dict to add new fields to return
+ info["result"]["callback_ok"] = True
+
+
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--num-iters", type=int, default=2000)
@@ -54,6 +61,7 @@ def on_sample_end(info):
"on_episode_step": tune.function(on_episode_step),
"on_episode_end": tune.function(on_episode_end),
"on_sample_end": tune.function(on_sample_end),
+ "on_train_result": tune.function(on_train_result),
},
},
}
@@ -64,3 +72,4 @@ def on_sample_end(info):
print(custom_metrics)
assert "mean_pole_angle" in custom_metrics
assert type(custom_metrics["mean_pole_angle"]) is float
+ assert "callback_ok" in trials[0].last_result
diff --git a/python/ray/rllib/examples/parametric_action_cartpole.py b/python/ray/rllib/examples/parametric_action_cartpole.py
new file mode 100644
index 000000000000..a1438f0a2412
--- /dev/null
+++ b/python/ray/rllib/examples/parametric_action_cartpole.py
@@ -0,0 +1,196 @@
+"""Example of handling variable length and/or parametric action spaces.
+
+This is a toy example of the action-embedding based approach for handling large
+discrete action spaces (potentially infinite in size), similar to how
+OpenAI Five works:
+
+ https://neuro.cs.ut.ee/the-use-of-embeddings-in-openai-five/
+
+This currently works with RLlib's policy gradient style algorithms
+(e.g., PG, PPO, IMPALA, A2C) and also DQN.
+
+Note that since the model outputs now include "-inf" tf.float32.min
+values, not all algorithm options are supported at the moment. For example,
+algorithms might crash if they don't properly ignore the -inf action scores.
+Working configurations are given below.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import random
+import numpy as np
+import gym
+from gym.spaces import Box, Discrete, Dict
+import tensorflow as tf
+import tensorflow.contrib.slim as slim
+
+import ray
+from ray.rllib.models import Model, ModelCatalog
+from ray.rllib.models.misc import normc_initializer
+from ray.tune import run_experiments
+from ray.tune.registry import register_env
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--stop", type=int, default=200)
+parser.add_argument("--run", type=str, default="PPO")
+
+
+class ParametricActionCartpole(gym.Env):
+ """Parametric action version of CartPole.
+
+ In this env there are only ever two valid actions, but we pretend there are
+ actually up to `max_avail_actions` actions that can be taken, and the two
+ valid actions are randomly hidden among this set.
+
+ At each step, we emit a dict of:
+ - the actual cart observation
+ - a mask of valid actions (e.g., [0, 0, 1, 0, 0, 1] for 6 max avail)
+ - the list of action embeddings (w/ zeroes for invalid actions) (e.g.,
+ [[0, 0],
+ [0, 0],
+ [-0.2322, -0.2569],
+ [0, 0],
+ [0, 0],
+ [0.7878, 1.2297]] for max_avail_actions=6)
+
+ In a real environment, the actions embeddings would be larger than two
+ units of course, and also there would be a variable number of valid actions
+ per step instead of always [LEFT, RIGHT].
+ """
+
+ def __init__(self, max_avail_actions):
+ # Use simple random 2-unit action embeddings for [LEFT, RIGHT]
+ self.left_action_embed = np.random.randn(2)
+ self.right_action_embed = np.random.randn(2)
+ self.action_space = Discrete(max_avail_actions)
+ self.wrapped = gym.make("CartPole-v0")
+ self.observation_space = Dict({
+ "action_mask": Box(0, 1, shape=(max_avail_actions, )),
+ "avail_actions": Box(-1, 1, shape=(max_avail_actions, 2)),
+ "cart": self.wrapped.observation_space,
+ })
+
+ def update_avail_actions(self):
+ self.action_assignments = [[0, 0]] * self.action_space.n
+ self.action_mask = [0] * self.action_space.n
+ self.left_idx, self.right_idx = random.sample(
+ range(self.action_space.n), 2)
+ self.action_assignments[self.left_idx] = self.left_action_embed
+ self.action_assignments[self.right_idx] = self.right_action_embed
+ self.action_mask[self.left_idx] = 1
+ self.action_mask[self.right_idx] = 1
+
+ def reset(self):
+ self.update_avail_actions()
+ return {
+ "action_mask": self.action_mask,
+ "avail_actions": self.action_assignments,
+ "cart": self.wrapped.reset(),
+ }
+
+ def step(self, action):
+ if action == self.left_idx:
+ actual_action = 0
+ elif action == self.right_idx:
+ actual_action = 1
+ else:
+ raise ValueError(
+ "Chosen action was not one of the non-zero action embeddings",
+ action, self.action_assignments, self.action_mask,
+ self.left_idx, self.right_idx)
+ orig_obs, rew, done, info = self.wrapped.step(actual_action)
+ self.update_avail_actions()
+ obs = {
+ "action_mask": self.action_mask,
+ "avail_actions": self.action_assignments,
+ "cart": orig_obs,
+ }
+ return obs, rew, done, info
+
+
+class ParametricActionsModel(Model):
+ """Parametric action model that handles the dot product and masking.
+
+ This assumes the outputs are logits for a single Categorical action dist.
+ Getting this to work with a more complex output (e.g., if the action space
+ is a tuple of several distributions) is also possible but left as an
+ exercise to the reader.
+ """
+
+ def _build_layers_v2(self, input_dict, num_outputs, options):
+ # Extract the available actions tensor from the observation.
+ avail_actions = input_dict["obs"]["avail_actions"]
+ action_mask = input_dict["obs"]["action_mask"]
+ action_embed_size = avail_actions.shape[2].value
+ if num_outputs != avail_actions.shape[1].value:
+ raise ValueError(
+ "This model assumes num outputs is equal to max avail actions",
+ num_outputs, avail_actions)
+
+ # Standard FC net component.
+ last_layer = input_dict["obs"]["cart"]
+ hiddens = [256, 256]
+ for i, size in enumerate(hiddens):
+ label = "fc{}".format(i)
+ last_layer = slim.fully_connected(
+ last_layer,
+ size,
+ weights_initializer=normc_initializer(1.0),
+ activation_fn=tf.nn.tanh,
+ scope=label)
+ output = slim.fully_connected(
+ last_layer,
+ action_embed_size,
+ weights_initializer=normc_initializer(0.01),
+ activation_fn=None,
+ scope="fc_out")
+
+ # Expand the model output to [BATCH, 1, EMBED_SIZE]. Note that the
+ # avail actions tensor is of shape [BATCH, MAX_ACTIONS, EMBED_SIZE].
+ intent_vector = tf.expand_dims(output, 1)
+
+ # Batch dot product => shape of logits is [BATCH, MAX_ACTIONS].
+ action_logits = tf.reduce_sum(avail_actions * intent_vector, axis=2)
+
+ # Mask out invalid actions (use tf.float32.min for stability)
+ inf_mask = tf.maximum(tf.log(action_mask), tf.float32.min)
+ masked_logits = inf_mask + action_logits
+
+ return masked_logits, last_layer
+
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+ ray.init()
+
+ ModelCatalog.register_custom_model("pa_model", ParametricActionsModel)
+ register_env("pa_cartpole", lambda _: ParametricActionCartpole(10))
+ if args.run == "PPO":
+ cfg = {
+ "observation_filter": "NoFilter", # don't filter the action list
+ "vf_share_layers": True, # don't create duplicate value model
+ }
+ elif args.run == "DQN":
+ cfg = {
+ "hiddens": [], # don't postprocess the action scores
+ }
+ else:
+ cfg = {}
+ run_experiments({
+ "parametric_cartpole": {
+ "run": args.run,
+ "env": "pa_cartpole",
+ "stop": {
+ "episode_reward_mean": args.stop,
+ },
+ "config": dict({
+ "model": {
+ "custom_model": "pa_model",
+ },
+ "num_workers": 0,
+ }, **cfg),
+ },
+ })
diff --git a/python/ray/rllib/models/action_dist.py b/python/ray/rllib/models/action_dist.py
index 75a43deeb789..76d45e244151 100644
--- a/python/ray/rllib/models/action_dist.py
+++ b/python/ray/rllib/models/action_dist.py
@@ -95,19 +95,10 @@ class DiagGaussian(ActionDistribution):
second half the gaussian standard deviations.
"""
- def __init__(self, inputs, low=None, high=None):
+ def __init__(self, inputs):
ActionDistribution.__init__(self, inputs)
mean, log_std = tf.split(inputs, 2, axis=1)
self.mean = mean
- self.low = low
- self.high = high
-
- # Squash to range if specified. We use a sigmoid here this to avoid the
- # mean drifting too far past the bounds and causing nan outputs.
- # https://github.com/ray-project/ray/issues/1862
- if low is not None:
- self.mean = low + tf.sigmoid(self.mean) * (high - low)
-
self.log_std = log_std
self.std = tf.exp(log_std)
@@ -131,10 +122,7 @@ def entropy(self):
reduction_indices=[1])
def sample(self):
- out = self.mean + self.std * tf.random_normal(tf.shape(self.mean))
- if self.low is not None:
- out = tf.clip_by_value(out, self.low, self.high)
- return out
+ return self.mean + self.std * tf.random_normal(tf.shape(self.mean))
class Deterministic(ActionDistribution):
@@ -147,34 +135,6 @@ def sample(self):
return self.inputs
-def squash_to_range(dist_cls, low, high):
- """Squashes an action distribution to a range in (low, high).
-
- Arguments:
- dist_cls (class): ActionDistribution class to wrap.
- low (float|array): Scalar value or array of values.
- high (float|array): Scalar value or array of values.
- """
-
- class SquashToRangeWrapper(dist_cls):
- def __init__(self, inputs):
- dist_cls.__init__(self, inputs, low=low, high=high)
-
- def logp(self, x):
- return dist_cls.logp(self, x)
-
- def kl(self, other):
- return dist_cls.kl(self, other)
-
- def entropy(self):
- return dist_cls.entropy(self)
-
- def sample(self):
- return dist_cls.sample(self)
-
- return SquashToRangeWrapper
-
-
class MultiActionDistribution(ActionDistribution):
"""Action distribution that operates for list of actions.
diff --git a/python/ray/rllib/models/catalog.py b/python/ray/rllib/models/catalog.py
index 8f0b8ac82540..f9e8af2829f8 100644
--- a/python/ray/rllib/models/catalog.py
+++ b/python/ray/rllib/models/catalog.py
@@ -15,8 +15,7 @@
from ray.rllib.env.external_env import ExternalEnv
from ray.rllib.env.vector_env import VectorEnv
from ray.rllib.models.action_dist import (
- Categorical, Deterministic, DiagGaussian, MultiActionDistribution,
- squash_to_range)
+ Categorical, Deterministic, DiagGaussian, MultiActionDistribution)
from ray.rllib.models.preprocessors import get_preprocessor
from ray.rllib.models.fcnet import FullyConnectedNetwork
from ray.rllib.models.visionnet import VisionNetwork
@@ -38,7 +37,7 @@
"fcnet_hiddens": [256, 256],
# For control envs, documented in ray.rllib.models.Model
"free_log_std": False,
- # Whether to squash the action output to space range
+ # (deprecated) Whether to use sigmoid to squash actions to space range
"squash_to_range": False,
# == LSTM ==
@@ -114,8 +113,9 @@ def get_action_dist(action_space, config, dist_type=None):
if dist_type is None:
dist = DiagGaussian
if config.get("squash_to_range"):
- dist = squash_to_range(dist, action_space.low,
- action_space.high)
+ raise ValueError(
+ "The squash_to_range option is deprecated. See the "
+ "clip_actions agent option instead.")
return dist, action_space.shape[0] * 2
elif dist_type == "deterministic":
return Deterministic, action_space.shape[0]
@@ -217,7 +217,7 @@ def _get_model(input_dict, obs_space, num_outputs, options, state_in,
seq_lens):
if options.get("custom_model"):
model = options["custom_model"]
- logger.info("Using custom model {}".format(model))
+ logger.debug("Using custom model {}".format(model))
return _global_registry.get(RLLIB_MODEL, model)(
input_dict,
obs_space,
diff --git a/python/ray/rllib/models/model.py b/python/ray/rllib/models/model.py
index d5147168c2fb..561b636dc863 100644
--- a/python/ray/rllib/models/model.py
+++ b/python/ray/rllib/models/model.py
@@ -23,7 +23,7 @@ class Model(object):
Attributes:
input_dict (dict): Dictionary of input tensors, including "obs",
- "prev_action", "prev_reward".
+ "prev_action", "prev_reward", "is_training".
outputs (Tensor): The output vector of this model, of shape
[BATCH_SIZE, num_outputs].
last_layer (Tensor): The feature layer right before the model output,
@@ -108,7 +108,7 @@ def _build_layers_v2(self, input_dict, num_outputs, options):
Arguments:
input_dict (dict): Dictionary of input tensors, including "obs",
- "prev_action", "prev_reward".
+ "prev_action", "prev_reward", "is_training".
num_outputs (int): Output tensor must be of size
[BATCH_SIZE, num_outputs].
options (dict): Model options.
@@ -124,6 +124,7 @@ def _build_layers_v2(self, input_dict, num_outputs, options):
>>> print(input_dict)
{'prev_actions': ,
'prev_rewards': ,
+ 'is_training': ,
'obs': OrderedDict([
('sensors', OrderedDict([
('front_cam', [
diff --git a/python/ray/rllib/models/preprocessors.py b/python/ray/rllib/models/preprocessors.py
index 074fda29b96a..a4af708b7915 100644
--- a/python/ray/rllib/models/preprocessors.py
+++ b/python/ray/rllib/models/preprocessors.py
@@ -2,6 +2,7 @@
from __future__ import division
from __future__ import print_function
+from collections import OrderedDict
import cv2
import logging
import numpy as np
@@ -164,6 +165,8 @@ def _init_shape(self, obs_space, options):
return (size, )
def transform(self, observation):
+ if not isinstance(observation, OrderedDict):
+ observation = OrderedDict(sorted(list(observation.items())))
assert len(observation) == len(self.preprocessors), \
(len(observation), len(self.preprocessors))
return np.concatenate([
diff --git a/python/ray/rllib/models/visionnet.py b/python/ray/rllib/models/visionnet.py
index 4105af7dd367..1d856e42cec4 100644
--- a/python/ray/rllib/models/visionnet.py
+++ b/python/ray/rllib/models/visionnet.py
@@ -16,7 +16,7 @@ def _build_layers_v2(self, input_dict, num_outputs, options):
inputs = input_dict["obs"]
filters = options.get("conv_filters")
if not filters:
- filters = get_filter_config(options)
+ filters = get_filter_config(inputs)
activation = get_activation_fn(options.get("conv_activation"))
@@ -47,7 +47,7 @@ def _build_layers_v2(self, input_dict, num_outputs, options):
return flatten(fc2), flatten(fc1)
-def get_filter_config(options):
+def get_filter_config(inputs):
filters_84x84 = [
[16, [8, 8], 4],
[32, [4, 4], 2],
@@ -58,12 +58,15 @@ def get_filter_config(options):
[32, [4, 4], 2],
[256, [11, 11], 1],
]
- dim = options.get("dim")
- if dim == 84:
+ shape = inputs.shape.as_list()[1:]
+ if len(shape) == 3 and shape[:2] == [84, 84]:
return filters_84x84
- elif dim == 42:
+ elif len(shape) == 3 and shape[:2] == [42, 42]:
return filters_42x42
else:
raise ValueError(
- "No default configuration for image size={}".format(dim) +
- ", you must specify `conv_filters` manually as a model option.")
+ "No default configuration for obs input {}".format(inputs) +
+ ", you must specify `conv_filters` manually as a model option. "
+ "Default configurations are only available for inputs of size "
+ "[?, 42, 42, K] and [?, 84, 84, K]. You may alternatively want "
+ "to use a custom model or preprocessor.")
diff --git a/python/ray/rllib/optimizers/multi_gpu_impl.py b/python/ray/rllib/optimizers/multi_gpu_impl.py
index 1affe8df395e..c548b20cc022 100644
--- a/python/ray/rllib/optimizers/multi_gpu_impl.py
+++ b/python/ray/rllib/optimizers/multi_gpu_impl.py
@@ -3,12 +3,15 @@
from __future__ import print_function
from collections import namedtuple
+import logging
import tensorflow as tf
# Variable scope in which created variables will be placed under
TOWER_SCOPE_NAME = "tower"
+logger = logging.getLogger(__name__)
+
class LocalSyncParallelOptimizer(object):
"""Optimizer that runs in parallel across multiple local devices.
@@ -63,6 +66,8 @@ def __init__(self,
# First initialize the shared loss network
with tf.name_scope(TOWER_SCOPE_NAME):
self._shared_loss = build_graph(self.loss_inputs)
+ shared_ops = tf.get_collection(
+ tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name)
# Then setup the per-device loss graphs that use the shared weights
self._batch_index = tf.placeholder(tf.int32, name="batch_index")
@@ -95,7 +100,20 @@ def __init__(self,
clipped, _ = tf.clip_by_global_norm(clipped, grad_norm_clipping)
for i, (grad, var) in enumerate(avg):
avg[i] = (clipped[i], var)
- self._train_op = self.optimizer.apply_gradients(avg)
+
+ # gather update ops for any batch norm layers. TODO(ekl) here we will
+ # use all the ops found which won't work for DQN / DDPG, but those
+ # aren't supported with multi-gpu right now anyways.
+ self._update_ops = tf.get_collection(
+ tf.GraphKeys.UPDATE_OPS, scope=tf.get_variable_scope().name)
+ for op in shared_ops:
+ self._update_ops.remove(op) # only care about tower update ops
+ if self._update_ops:
+ logger.debug("Update ops to run on apply gradient: {}".format(
+ self._update_ops))
+
+ with tf.control_dependencies(self._update_ops):
+ self._train_op = self.optimizer.apply_gradients(avg)
def load_data(self, sess, inputs, state_inputs):
"""Bulk loads the specified inputs into device memory.
diff --git a/python/ray/rllib/optimizers/multi_gpu_optimizer.py b/python/ray/rllib/optimizers/multi_gpu_optimizer.py
index 7e01ee9041dc..771acb5ac72c 100644
--- a/python/ray/rllib/optimizers/multi_gpu_optimizer.py
+++ b/python/ray/rllib/optimizers/multi_gpu_optimizer.py
@@ -3,6 +3,7 @@
from __future__ import print_function
import logging
+import math
import numpy as np
from collections import defaultdict
import tensorflow as tf
@@ -44,7 +45,9 @@ def _init(self,
if not num_gpus:
self.devices = ["/cpu:0"]
else:
- self.devices = ["/gpu:{}".format(i) for i in range(num_gpus)]
+ self.devices = [
+ "/gpu:{}".format(i) for i in range(int(math.ceil(num_gpus)))
+ ]
self.batch_size = int(sgd_batch_size / len(self.devices)) * len(
self.devices)
assert self.batch_size % len(self.devices) == 0
diff --git a/python/ray/rllib/test/test_catalog.py b/python/ray/rllib/test/test_catalog.py
index 852a02fc4d1e..efa1aba0e2f0 100644
--- a/python/ray/rllib/test/test_catalog.py
+++ b/python/ray/rllib/test/test_catalog.py
@@ -72,13 +72,13 @@ def testDefaultModels(self):
with tf.variable_scope("test1"):
p1 = ModelCatalog.get_model({
- "obs": np.zeros((10, 3), dtype=np.float32)
+ "obs": tf.zeros((10, 3), dtype=tf.float32)
}, Box(0, 1, shape=(3, ), dtype=np.float32), 5, {})
self.assertEqual(type(p1), FullyConnectedNetwork)
with tf.variable_scope("test2"):
p2 = ModelCatalog.get_model({
- "obs": np.zeros((10, 84, 84, 3), dtype=np.float32)
+ "obs": tf.zeros((10, 84, 84, 3), dtype=tf.float32)
}, Box(0, 1, shape=(84, 84, 3), dtype=np.float32), 5, {})
self.assertEqual(type(p2), VisionNetwork)
diff --git a/python/ray/rllib/test/test_multi_agent_env.py b/python/ray/rllib/test/test_multi_agent_env.py
index 5b4099b3c71f..5712390c05c6 100644
--- a/python/ray/rllib/test/test_multi_agent_env.py
+++ b/python/ray/rllib/test/test_multi_agent_env.py
@@ -323,7 +323,6 @@ def compute_actions(self,
state_batches,
prev_action_batch=None,
prev_reward_batch=None,
- is_training=False,
episodes=None):
return [0] * len(obs_batch), [[h] * len(obs_batch)], {}
@@ -348,7 +347,6 @@ def compute_actions(self,
state_batches,
prev_action_batch=None,
prev_reward_batch=None,
- is_training=False,
episodes=None):
# Pretend we did a model-based rollout and want to return
# the extra trajectory.
diff --git a/python/ray/rllib/test/test_policy_evaluator.py b/python/ray/rllib/test/test_policy_evaluator.py
index 7b4d6c8b5ae0..cf319a7e922b 100644
--- a/python/ray/rllib/test/test_policy_evaluator.py
+++ b/python/ray/rllib/test/test_policy_evaluator.py
@@ -25,7 +25,6 @@ def compute_actions(self,
state_batches,
prev_action_batch=None,
prev_reward_batch=None,
- is_training=False,
episodes=None):
return [0] * len(obs_batch), [], {}
@@ -43,7 +42,6 @@ def compute_actions(self,
state_batches,
prev_action_batch=None,
prev_reward_batch=None,
- is_training=False,
episodes=None):
raise Exception("intentional error")
diff --git a/python/ray/rllib/test/test_supported_spaces.py b/python/ray/rllib/test/test_supported_spaces.py
index b98a006bca3b..7a5e45ef3aa4 100644
--- a/python/ray/rllib/test/test_supported_spaces.py
+++ b/python/ray/rllib/test/test_supported_spaces.py
@@ -120,12 +120,15 @@ def testAll(self):
stats,
check_bounds=True)
check_support("DQN", {"timesteps_per_iteration": 1}, stats)
- check_support("A3C", {
- "num_workers": 1,
- "optimizer": {
- "grads_per_step": 1
- }
- }, stats)
+ check_support(
+ "A3C", {
+ "num_workers": 1,
+ "optimizer": {
+ "grads_per_step": 1
+ }
+ },
+ stats,
+ check_bounds=True)
check_support(
"PPO", {
"num_workers": 1,
@@ -133,9 +136,6 @@ def testAll(self):
"train_batch_size": 10,
"sample_batch_size": 10,
"sgd_minibatch_size": 1,
- "model": {
- "squash_to_range": True
- },
},
stats,
check_bounds=True)
@@ -153,7 +153,13 @@ def testAll(self):
"num_rollouts": 1,
"rollouts_used": 1
}, stats)
- check_support("PG", {"num_workers": 1, "optimizer": {}}, stats)
+ check_support(
+ "PG", {
+ "num_workers": 1,
+ "optimizer": {}
+ },
+ stats,
+ check_bounds=True)
num_unexpected_errors = 0
for (alg, a_name, o_name), stat in sorted(stats.items()):
if stat not in ["ok", "unsupported"]:
diff --git a/python/ray/rllib/tuned_examples/atari-dist-dqn.yaml b/python/ray/rllib/tuned_examples/atari-dist-dqn.yaml
index 57cd5635d78b..d351e403f2e2 100644
--- a/python/ray/rllib/tuned_examples/atari-dist-dqn.yaml
+++ b/python/ray/rllib/tuned_examples/atari-dist-dqn.yaml
@@ -27,5 +27,5 @@ basic-dqn:
prioritized_replay_alpha: 0.5
beta_annealing_fraction: 1.0
final_prioritized_replay_beta: 1.0
- num_gpus: 1
+ num_gpus: 0.2
timesteps_per_iteration: 10000
diff --git a/python/ray/rllib/tuned_examples/atari-dqn.yaml b/python/ray/rllib/tuned_examples/atari-dqn.yaml
index 264ddfd27b41..b8731bb054ef 100644
--- a/python/ray/rllib/tuned_examples/atari-dqn.yaml
+++ b/python/ray/rllib/tuned_examples/atari-dqn.yaml
@@ -1,4 +1,4 @@
-# Runs on a single g3.16xl node
+# Runs on a single g3.4xl node
# See https://github.com/ray-project/rl-experiments for results
atari-basic-dqn:
env:
@@ -29,5 +29,5 @@ atari-basic-dqn:
prioritized_replay_alpha: 0.5
beta_annealing_fraction: 1.0
final_prioritized_replay_beta: 1.0
- num_gpus: 1
+ num_gpus: 0.2
timesteps_per_iteration: 10000
diff --git a/python/ray/rllib/tuned_examples/atari-duel-ddqn.yaml b/python/ray/rllib/tuned_examples/atari-duel-ddqn.yaml
index be59d15ba807..b5a13162b61e 100644
--- a/python/ray/rllib/tuned_examples/atari-duel-ddqn.yaml
+++ b/python/ray/rllib/tuned_examples/atari-duel-ddqn.yaml
@@ -1,3 +1,5 @@
+# Runs on a single g3.4xl node
+# See https://github.com/ray-project/rl-experiments for results
dueling-ddqn:
env:
grid_search:
@@ -27,5 +29,5 @@ dueling-ddqn:
prioritized_replay_alpha: 0.5
beta_annealing_fraction: 1.0
final_prioritized_replay_beta: 1.0
- num_gpus: 1
+ num_gpus: 0.2
timesteps_per_iteration: 10000
diff --git a/python/ray/rllib/tuned_examples/pong-impala-fast.yaml b/python/ray/rllib/tuned_examples/pong-impala-fast.yaml
index 3466b63ea1c4..3c29f4e0c08e 100644
--- a/python/ray/rllib/tuned_examples/pong-impala-fast.yaml
+++ b/python/ray/rllib/tuned_examples/pong-impala-fast.yaml
@@ -9,7 +9,7 @@ pong-impala-fast:
config:
sample_batch_size: 50
train_batch_size: 1000
- num_workers: 256
+ num_workers: 128
num_envs_per_worker: 5
broadcast_interval: 5
max_sample_requests_in_flight_per_worker: 1
diff --git a/python/ray/rllib/tuned_examples/pong-ppo.yaml b/python/ray/rllib/tuned_examples/pong-ppo.yaml
index 1447481643fe..d7e273cc6e2b 100644
--- a/python/ray/rllib/tuned_examples/pong-ppo.yaml
+++ b/python/ray/rllib/tuned_examples/pong-ppo.yaml
@@ -1,17 +1,26 @@
-# On a Tesla K80 GPU, this achieves the maximum reward in about 1-1.5 hours.
+# On a single GPU, this achieves maximum reward in ~15-20 minutes.
#
-# $ python train.py -f tuned_examples/pong-ppo.yaml --ray-num-gpus=1
+# $ python train.py -f tuned_examples/pong-ppo.yaml
#
-# - PPO_PongDeterministic-v4_0: TERMINATED [pid=16387], 4984 s, 1117981 ts, 21 rew
-# - PPO_PongDeterministic-v4_0: TERMINATED [pid=83606], 4592 s, 1068671 ts, 21 rew
-#
-pong-deterministic-ppo:
- env: PongDeterministic-v4
+pong-ppo:
+ env: PongNoFrameskip-v4
run: PPO
- stop:
- episode_reward_mean: 21
config:
- gamma: 0.99
- num_workers: 4
- num_sgd_iter: 20
+ lambda: 0.95
+ kl_coeff: 0.5
+ clip_rewards: True
+ clip_param: 0.1
+ vf_clip_param: 10.0
+ entropy_coeff: 0.01
+ train_batch_size: 5000
+ sample_batch_size: 20
+ sgd_minibatch_size: 500
+ num_sgd_iter: 10
+ num_workers: 32
+ num_envs_per_worker: 5
+ batch_mode: truncate_episodes
+ observation_filter: NoFilter
+ vf_share_layers: true
num_gpus: 1
+ model:
+ dim: 42
diff --git a/python/ray/scripts/scripts.py b/python/ray/scripts/scripts.py
index d78f2f9c1dd1..60b05e4226d2 100644
--- a/python/ray/scripts/scripts.py
+++ b/python/ray/scripts/scripts.py
@@ -591,9 +591,21 @@ def submit(cluster_config_file, screen, tmux, stop, start, cluster_name,
cmd = " ".join(["python", target] + list(script_args))
exec_cluster(cluster_config_file, cmd, screen, tmux, stop, False,
cluster_name, port_forward)
- if tmux:
- logger.info("Use `ray attach {} --tmux` "
- "to check on command status.".format(cluster_config_file))
+
+ if tmux or screen:
+ attach_command_parts = ["ray attach", cluster_config_file]
+ if cluster_name is not None:
+ attach_command_parts.append(
+ "--cluster-name={}".format(cluster_name))
+ if tmux:
+ attach_command_parts.append("--tmux")
+ elif screen:
+ attach_command_parts.append("--screen")
+
+ attach_command = " ".join(attach_command_parts)
+ attach_info = "Use `{}` to check on command status.".format(
+ attach_command)
+ logger.info(attach_info)
@cli.command()
@@ -627,11 +639,24 @@ def submit(cluster_config_file, screen, tmux, stop, start, cluster_name,
def exec_cmd(cluster_config_file, cmd, screen, tmux, stop, start, cluster_name,
port_forward):
assert not (screen and tmux), "Can specify only one of `screen` or `tmux`."
+
exec_cluster(cluster_config_file, cmd, screen, tmux, stop, start,
cluster_name, port_forward)
- if tmux:
- logger.info("Use `ray attach {} --tmux` "
- "to check on command status.".format(cluster_config_file))
+
+ if tmux or screen:
+ attach_command_parts = ["ray attach", cluster_config_file]
+ if cluster_name is not None:
+ attach_command_parts.append(
+ "--cluster-name={}".format(cluster_name))
+ if tmux:
+ attach_command_parts.append("--tmux")
+ elif screen:
+ attach_command_parts.append("--screen")
+
+ attach_command = " ".join(attach_command_parts)
+ attach_info = "Use `{}` to check on command status.".format(
+ attach_command)
+ logger.info(attach_info)
@cli.command()
@@ -651,7 +676,7 @@ def stack():
COMMAND = """
pyspy=`which py-spy`
if [ ! -e "$pyspy" ]; then
- echo "ERROR: Please 'pip install py-spy' first"
+ echo "ERROR: Please 'pip install py-spy' (or ray[debug]) first"
exit 1
fi
# Set IFS to iterate over lines instead of over words.
@@ -674,16 +699,15 @@ def stack():
cli.add_command(start)
cli.add_command(stop)
-cli.add_command(create_or_update)
cli.add_command(create_or_update, name="up")
cli.add_command(attach)
cli.add_command(exec_cmd, name="exec")
-cli.add_command(rsync_down)
-cli.add_command(rsync_up)
+cli.add_command(rsync_down, name="rsync_down")
+cli.add_command(rsync_up, name="rsync_up")
cli.add_command(submit)
cli.add_command(teardown)
cli.add_command(teardown, name="down")
-cli.add_command(get_head_ip)
+cli.add_command(get_head_ip, name="get_head_ip")
cli.add_command(stack)
diff --git a/python/ray/test/test_modin.py b/python/ray/test/test_modin.py
new file mode 100644
index 000000000000..83c11895ec7b
--- /dev/null
+++ b/python/ray/test/test_modin.py
@@ -0,0 +1,12 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import ray # noqa F401
+
+
+def test_modin_import():
+ import modin.pandas as pd
+ frame_data = [1, 2, 3, 4, 5, 6, 7, 8]
+ frame = pd.DataFrame(frame_data)
+ assert frame.sum().squeeze() == sum(frame_data)
diff --git a/python/ray/tune/logger.py b/python/ray/tune/logger.py
index 87f7e026d892..a7de6d5f96ba 100644
--- a/python/ray/tune/logger.py
+++ b/python/ray/tune/logger.py
@@ -97,7 +97,12 @@ class _JsonLogger(Logger):
def _init(self):
config_out = os.path.join(self.logdir, "params.json")
with open(config_out, "w") as f:
- json.dump(self.config, f, sort_keys=True, cls=_SafeFallbackEncoder)
+ json.dump(
+ self.config,
+ f,
+ indent=2,
+ sort_keys=True,
+ cls=_SafeFallbackEncoder)
local_file = os.path.join(self.logdir, "result.json")
self.local_out = open(local_file, "a")
diff --git a/python/ray/tune/ray_trial_executor.py b/python/ray/tune/ray_trial_executor.py
index 19cff512144c..451f5638da6c 100644
--- a/python/ray/tune/ray_trial_executor.py
+++ b/python/ray/tune/ray_trial_executor.py
@@ -135,7 +135,7 @@ def start_trial(self, trial, checkpoint=None, raise_on_failure=False):
self._stop_trial(trial, error=True, error_msg=error_msg)
try:
self._start_trial(trial, checkpoint)
- except Exception as exc:
+ except Exception:
logger.exception("Error starting runner, aborting!")
error_msg = traceback.format_exc()
self._stop_trial(trial, error=True, error_msg=error_msg)
diff --git a/python/ray/tune/trial.py b/python/ray/tune/trial.py
index e555e9512101..74449aa3feda 100644
--- a/python/ray/tune/trial.py
+++ b/python/ray/tune/trial.py
@@ -318,8 +318,10 @@ def __repr__(self):
def __str__(self):
"""Combines ``env`` with ``trainable_name`` and ``experiment_tag``."""
if "env" in self.config:
- identifier = "{}_{}".format(self.trainable_name,
- self.config["env"])
+ env = self.config["env"]
+ if isinstance(env, type):
+ env = env.__name__
+ identifier = "{}_{}".format(self.trainable_name, env)
else:
identifier = self.trainable_name
if self.experiment_tag:
diff --git a/python/ray/tune/trial_runner.py b/python/ray/tune/trial_runner.py
index 548c68c51cc0..3adcde22da79 100644
--- a/python/ray/tune/trial_runner.py
+++ b/python/ray/tune/trial_runner.py
@@ -299,7 +299,8 @@ def _memory_debug_string(self):
return "Memory usage on this node: {}/{} GB{}".format(
round(used_gb, 1), round(total_gb, 1), warn)
except ImportError:
- return "Unknown memory usage (`pip install psutil` to resolve)"
+ return ("Unknown memory usage. Please run `pip install psutil` "
+ "(or ray[debug]) to resolve)")
def has_resources(self, resources):
"""Returns whether this runner has at least the specified resources."""
@@ -358,15 +359,13 @@ def _process_events(self):
error_msg = traceback.format_exc()
if trial.status == Trial.RUNNING:
if trial.should_recover():
- self.try_recover(trial, error_msg)
+ self._try_recover(trial, error_msg)
else:
- self.trial_executor.stop_trial(
- trial,
- error=error_msg is not None,
- error_msg=error_msg)
self._scheduler_alg.on_trial_error(self, trial)
self._search_alg.on_trial_complete(
trial.trial_id, error=True)
+ self.trial_executor.stop_trial(
+ trial, error=True, error_msg=error_msg)
def _checkpoint_if_needed(self, trial):
"""Checkpoints trial based off trial.last_result."""
@@ -376,7 +375,7 @@ def _checkpoint_if_needed(self, trial):
self.trial_executor.save(trial, storage=Checkpoint.DISK)
self.trial_executor.try_checkpoint_metadata(trial)
- def try_recover(self, trial, error_msg):
+ def _try_recover(self, trial, error_msg):
"""Tries to recover trial.
Notifies SearchAlgorithm and Scheduler if failure to recover.
diff --git a/python/ray/tune/tune.py b/python/ray/tune/tune.py
index 86a4d2ba79d5..fa773572c110 100644
--- a/python/ray/tune/tune.py
+++ b/python/ray/tune/tune.py
@@ -9,7 +9,6 @@
from ray.tune.error import TuneError
from ray.tune.suggest import BasicVariantGenerator
from ray.tune.trial import Trial, DEBUG_PRINT_INTERVAL
-from ray.tune.result import DEFAULT_RESULTS_DIR
from ray.tune.log_sync import wait_for_log_sync
from ray.tune.trial_runner import TrialRunner
from ray.tune.schedulers import (HyperBandScheduler, AsyncHyperBandScheduler,
@@ -101,6 +100,8 @@ def run_experiments(experiments=None,
if search_alg is None:
search_alg = BasicVariantGenerator()
+ search_alg.add_configurations(experiments)
+
runner = TrialRunner(
search_alg,
scheduler=scheduler,
diff --git a/python/ray/worker.py b/python/ray/worker.py
index f68bb42886f0..c3c01f4859fc 100644
--- a/python/ray/worker.py
+++ b/python/ray/worker.py
@@ -12,7 +12,6 @@
import numpy as np
import os
import redis
-import setproctitle
import signal
import sys
import threading
@@ -73,6 +72,15 @@
# using logging.basicConfig in its entry/init points.
logger = logging.getLogger(__name__)
+try:
+ import setproctitle
+except ImportError:
+ setproctitle = None
+ logger.warning(
+ "WARNING: Not updating worker name since `setproctitle` is not "
+ "installed. Install this with `pip install setproctitle` "
+ "(or ray[debug]) to enable monitoring of worker processes.")
+
class RayTaskError(Exception):
"""An object used internally to represent a task that threw an exception.
@@ -1916,7 +1924,8 @@ def connect(info,
# Initialize some fields.
if mode is WORKER_MODE:
worker.worker_id = random_string()
- setproctitle.setproctitle("ray_worker")
+ if setproctitle:
+ setproctitle.setproctitle("ray_worker")
else:
# This is the code path of driver mode.
if driver_id is None:
@@ -2163,9 +2172,11 @@ def disconnect(worker=global_worker):
@contextmanager
def _changeproctitle(title, next_title):
- setproctitle.setproctitle(title)
+ if setproctitle:
+ setproctitle.setproctitle(title)
yield
- setproctitle.setproctitle(next_title)
+ if setproctitle:
+ setproctitle.setproctitle(next_title)
def _try_to_compute_deterministic_class_id(cls, depth=5):
diff --git a/python/setup.py b/python/setup.py
index 7198de2329bf..c92ffa65b481 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -64,7 +64,10 @@
optional_ray_files += ray_autoscaler_files
-extras = {"rllib": ["pyyaml", "gym[atari]", "opencv-python", "lz4", "scipy"]}
+extras = {
+ "rllib": ["pyyaml", "gym[atari]", "opencv-python", "lz4", "scipy"],
+ "debug": ["psutil", "setproctitle", "py-spy"],
+}
class build_ext(_build_ext.build_ext):
@@ -139,7 +142,6 @@ def find_version(*filepath):
"pytest",
"pyyaml",
"redis",
- "setproctitle",
# The six module is required by pyarrow.
"six >= 1.0.0",
"flatbuffers",
diff --git a/src/ray/object_manager/object_directory.cc b/src/ray/object_manager/object_directory.cc
index fce8b11349cd..ab20d27b66c6 100644
--- a/src/ray/object_manager/object_directory.cc
+++ b/src/ray/object_manager/object_directory.cc
@@ -192,6 +192,10 @@ ray::Status ObjectDirectory::LookupLocations(const ObjectID &object_id,
return status;
}
+ray::ClientID ObjectDirectory::GetLocalClientID() {
+ return gcs_client_->client_table().GetLocalClientId();
+}
+
std::string ObjectDirectory::DebugString() const {
std::stringstream result;
result << "ObjectDirectory:";
diff --git a/src/ray/object_manager/object_directory.h b/src/ray/object_manager/object_directory.h
index 7d6f1171079d..e36c4c41604e 100644
--- a/src/ray/object_manager/object_directory.h
+++ b/src/ray/object_manager/object_directory.h
@@ -104,6 +104,11 @@ class ObjectDirectoryInterface {
virtual ray::Status ReportObjectRemoved(const ObjectID &object_id,
const ClientID &client_id) = 0;
+ /// Get local client id
+ ///
+ /// \return ClientID
+ virtual ray::ClientID GetLocalClientID() = 0;
+
/// Returns debug string for class.
///
/// \return string.
@@ -145,6 +150,8 @@ class ObjectDirectory : public ObjectDirectoryInterface {
ray::Status ReportObjectRemoved(const ObjectID &object_id,
const ClientID &client_id) override;
+ ray::ClientID GetLocalClientID() override;
+
std::string DebugString() const override;
/// ObjectDirectory should not be copied.
diff --git a/src/ray/object_manager/object_manager.cc b/src/ray/object_manager/object_manager.cc
index e9edf5f842d7..a3cc87c7f17c 100644
--- a/src/ray/object_manager/object_manager.cc
+++ b/src/ray/object_manager/object_manager.cc
@@ -52,7 +52,7 @@ ObjectManager::ObjectManager(asio::io_service &main_service,
gen_(std::chrono::high_resolution_clock::now().time_since_epoch().count()) {
RAY_CHECK(config_.max_sends > 0);
RAY_CHECK(config_.max_receives > 0);
- // TODO(hme) Client ID is never set with this constructor.
+ client_id_ = object_directory_->GetLocalClientID();
main_service_ = &main_service;
store_notification_.SubscribeObjAdded(
[this](const object_manager::protocol::ObjectInfoT &object_info) {
@@ -628,9 +628,13 @@ void ObjectManager::SubscribeRemainingWaitObjects(const UniqueID &wait_id) {
const ObjectID &subscribe_object_id) {
if (!client_ids.empty()) {
auto object_id_wait_state = active_wait_requests_.find(wait_id);
- // We never expect to handle a subscription notification for a wait that has
- // already completed.
- RAY_CHECK(object_id_wait_state != active_wait_requests_.end());
+ if (object_id_wait_state == active_wait_requests_.end()) {
+ // Depending on the timing of calls to the object directory, we
+ // may get a subscription notification after the wait call has
+ // already completed. If so, then don't process the
+ // notification.
+ return;
+ }
auto &wait_state = object_id_wait_state->second;
RAY_CHECK(wait_state.remaining.erase(subscribe_object_id));
wait_state.found.insert(subscribe_object_id);
diff --git a/src/ray/raylet/actor_registration.cc b/src/ray/raylet/actor_registration.cc
index 3f4a67c1d21c..7ea95e656642 100644
--- a/src/ray/raylet/actor_registration.cc
+++ b/src/ray/raylet/actor_registration.cc
@@ -10,7 +10,6 @@ namespace raylet {
ActorRegistration::ActorRegistration(const ActorTableDataT &actor_table_data)
: actor_table_data_(actor_table_data),
- alive_(true),
execution_dependency_(ObjectID::nil()),
frontier_() {}
@@ -44,16 +43,7 @@ bool ActorRegistration::IsAlive() const {
return actor_table_data_.state == ActorState::ALIVE;
}
-std::string ActorRegistration::DebugString() const {
- std::stringstream result;
- if (alive_) {
- result << "alive";
- } else {
- result << "dead";
- }
- result << ", num handles: " << frontier_.size();
- return result.str();
-}
+int ActorRegistration::NumHandles() const { return frontier_.size(); }
} // namespace raylet
diff --git a/src/ray/raylet/actor_registration.h b/src/ray/raylet/actor_registration.h
index faa05eb2686c..4cf9b110afe1 100644
--- a/src/ray/raylet/actor_registration.h
+++ b/src/ray/raylet/actor_registration.h
@@ -94,17 +94,15 @@ class ActorRegistration {
/// \return True if the local actor is alive and false if it is dead.
bool IsAlive() const;
- /// Returns debug string for class.
+ /// Returns num handles to this actor entry.
///
- /// \return string.
- std::string DebugString() const;
+ /// \return int.
+ int NumHandles() const;
private:
/// Information from the global actor table about this actor, including the
/// node manager location.
ActorTableDataT actor_table_data_;
- /// True if the actor is alive and false otherwise.
- bool alive_;
/// The object representing the state following the actor's most recently
/// executed task. The next task to execute on the actor should be marked as
/// execution-dependent on this object.
diff --git a/src/ray/raylet/node_manager.cc b/src/ray/raylet/node_manager.cc
index a0b7526e8713..9110f0c87881 100644
--- a/src/ray/raylet/node_manager.cc
+++ b/src/ray/raylet/node_manager.cc
@@ -541,6 +541,13 @@ void NodeManager::HandleActorStateTransition(const ActorID &actor_id,
<< " already removed from the lineage cache. This is most "
"likely due to reconstruction.";
}
+ // Maintain the invariant that if a task is in the
+ // MethodsWaitingForActorCreation queue, then it is subscribed to its
+ // respective actor creation task and that task only. Since the actor
+ // location is now known, we can remove the task from the queue and
+ // forget its dependency on the actor creation task.
+ RAY_CHECK(task_dependency_manager_.UnsubscribeDependencies(
+ method.GetTaskSpecification().TaskId()));
// The task's uncommitted lineage was already added to the local lineage
// cache upon the initial submission, so it's okay to resubmit it with an
// empty lineage this time.
@@ -1154,6 +1161,15 @@ void NodeManager::SubmitTask(const Task &task, const Lineage &uncommitted_lineag
// Keep the task queued until we discover the actor's location.
// (See design_docs/task_states.rst for the state transition diagram.)
local_queues_.QueueMethodsWaitingForActorCreation({task});
+ // The actor has not yet been created and may have failed. To make sure
+ // that the actor is eventually recreated, we maintain the invariant that
+ // if a task is in the MethodsWaitingForActorCreation queue, then it is
+ // subscribed to its respective actor creation task and that task only.
+ // Once the actor has been created and this method removed from the
+ // waiting queue, the caller must make the corresponding call to
+ // UnsubscribeDependencies.
+ task_dependency_manager_.SubscribeDependencies(spec.TaskId(),
+ {spec.ActorCreationDummyObjectId()});
// Mark the task as pending. It will be canceled once we discover the
// actor's location and either execute the task ourselves or forward it
// to another node.
@@ -1431,7 +1447,8 @@ void NodeManager::FinishAssignedTask(Worker &worker) {
// Publish the actor creation event to all other nodes so that methods for
// the actor will be forwarded directly to this node.
- RAY_CHECK(actor_registry_.find(actor_id) == actor_registry_.end());
+ RAY_CHECK(actor_registry_.find(actor_id) == actor_registry_.end())
+ << "Created an actor that already exists";
auto actor_data = std::make_shared();
actor_data->actor_id = actor_id.binary();
actor_data->actor_creation_dummy_object_id =
@@ -1447,6 +1464,10 @@ void NodeManager::FinishAssignedTask(Worker &worker) {
// index in the log should succeed.
auto failure_callback = [](gcs::AsyncGcsClient *client, const ActorID &id,
const ActorTableDataT &data) {
+ // TODO(swang): Instead of making this a fatal check, we could just kill
+ // the duplicate actor process. If we do this, we must make sure to
+ // either resubmit the tasks that went to the duplicate actor, or wait
+ // for success before handling the actor state transition to ALIVE.
RAY_LOG(FATAL) << "Failed to update state to ALIVE for actor " << id;
};
RAY_CHECK_OK(gcs_client_->actor_table().AppendAt(
@@ -1764,14 +1785,27 @@ std::string NodeManager::DebugString() const {
result << "\n" << reconstruction_policy_.DebugString();
result << "\n" << task_dependency_manager_.DebugString();
result << "\n" << lineage_cache_.DebugString();
+ result << "\nActorRegistry:";
+ int live_actors = 0;
+ int dead_actors = 0;
+ int max_num_handles = 0;
+ for (auto &pair : actor_registry_) {
+ if (pair.second.IsAlive()) {
+ live_actors += 1;
+ } else {
+ dead_actors += 1;
+ }
+ if (pair.second.NumHandles() > max_num_handles) {
+ max_num_handles = pair.second.NumHandles();
+ }
+ }
+ result << "\n- num live actors: " << live_actors;
+ result << "\n- num dead actors: " << dead_actors;
+ result << "\n- max num handles: " << max_num_handles;
result << "\nRemoteConnections:";
for (auto &pair : remote_server_connections_) {
result << "\n" << pair.first.hex() << ": " << pair.second->DebugString();
}
- result << "\nActorRegistry:";
- for (auto &pair : actor_registry_) {
- result << "\n" << pair.first.hex() << ": " << pair.second.DebugString();
- }
result << "\nDebugString() time ms: " << (current_time_ms() - now_ms);
return result.str();
}
diff --git a/src/ray/raylet/reconstruction_policy_test.cc b/src/ray/raylet/reconstruction_policy_test.cc
index 4062511ae3dd..9f1499c31664 100644
--- a/src/ray/raylet/reconstruction_policy_test.cc
+++ b/src/ray/raylet/reconstruction_policy_test.cc
@@ -39,6 +39,7 @@ class MockObjectDirectory : public ObjectDirectoryInterface {
std::string DebugString() const { return ""; }
MOCK_METHOD0(RegisterBackend, void(void));
+ MOCK_METHOD0(GetLocalClientID, ray::ClientID());
MOCK_CONST_METHOD1(LookupRemoteConnectionInfo, void(RemoteConnectionInfo &));
MOCK_CONST_METHOD0(LookupAllRemoteConnections, std::vector());
MOCK_METHOD3(SubscribeObjectLocations,
diff --git a/test/component_failures_test.py b/test/component_failures_test.py
index b9d257962120..fd09a17599cf 100644
--- a/test/component_failures_test.py
+++ b/test/component_failures_test.py
@@ -5,11 +5,14 @@
import os
import json
import signal
+import sys
import time
+import numpy as np
import pytest
import ray
+from ray.test.cluster_utils import Cluster
from ray.test.test_utils import run_string_as_driver_nonblocking
@@ -33,6 +36,26 @@ def shutdown_only():
ray.shutdown()
+@pytest.fixture
+def ray_start_cluster():
+ node_args = {
+ "resources": dict(CPU=8),
+ "_internal_config": json.dumps({
+ "initial_reconstruction_timeout_milliseconds": 1000,
+ "num_heartbeats_timeout": 10
+ })
+ }
+ # Start with 4 worker nodes and 8 cores each.
+ g = Cluster(initialize_head=True, connect=True, head_node_args=node_args)
+ workers = []
+ for _ in range(4):
+ workers.append(g.add_node(**node_args))
+ g.wait_for_nodes()
+ yield g
+ ray.shutdown()
+ g.shutdown()
+
+
# This test checks that when a worker dies in the middle of a get, the plasma
# store and raylet will not die.
@pytest.mark.skipif(
@@ -347,6 +370,51 @@ def test_plasma_store_failed():
ray.shutdown()
+def test_actor_creation_node_failure(ray_start_cluster):
+ # TODO(swang): Refactor test_raylet_failed, etc to reuse the below code.
+ cluster = ray_start_cluster
+
+ @ray.remote
+ class Child(object):
+ def __init__(self, death_probability):
+ self.death_probability = death_probability
+
+ def ping(self):
+ # Exit process with some probability.
+ exit_chance = np.random.rand()
+ if exit_chance < self.death_probability:
+ sys.exit(-1)
+
+ num_children = 100
+ # Children actors will die about half the time.
+ death_probability = 0.5
+
+ children = [Child.remote(death_probability) for _ in range(num_children)]
+ while len(cluster.list_all_nodes()) > 1:
+ for j in range(3):
+ # Submit some tasks on the actors. About half of the actors will
+ # fail.
+ children_out = [child.ping.remote() for child in children]
+ # Wait a while for all the tasks to complete. This should trigger
+ # reconstruction for any actor creation tasks that were forwarded
+ # to nodes that then failed.
+ ready, _ = ray.wait(
+ children_out,
+ num_returns=len(children_out),
+ timeout=5 * 60 * 1000)
+ assert len(ready) == len(children_out)
+
+ # Replace any actors that died.
+ for i, out in enumerate(children_out):
+ try:
+ ray.get(out)
+ except ray.worker.RayGetError:
+ children[i] = Child.remote(death_probability)
+ # Remove a node. Any actor creation tasks that were forwarded to this
+ # node must be reconstructed.
+ cluster.remove_node(cluster.list_all_nodes()[-1])
+
+
@pytest.mark.skipif(
os.environ.get("RAY_USE_NEW_GCS") == "on",
reason="Hanging with new GCS API.")
diff --git a/test/failure_test.py b/test/failure_test.py
index 7895cbf1ac96..027ed38d6411 100644
--- a/test/failure_test.py
+++ b/test/failure_test.py
@@ -500,10 +500,10 @@ def test_warning_monitor_died(shutdown_only):
# addition to the monitor.
fake_id = 20 * b"\x00"
malformed_message = "asdf"
- redis_client = ray.worker.global_state.redis_clients[0]
+ redis_client = ray.worker.global_worker.redis_client
redis_client.execute_command(
- "RAY.TABLE_ADD", ray.gcs_utils.TablePrefix.HEARTBEAT,
- ray.gcs_utils.TablePubsub.HEARTBEAT, fake_id, malformed_message)
+ "RAY.TABLE_ADD", ray.gcs_utils.TablePrefix.HEARTBEAT_BATCH,
+ ray.gcs_utils.TablePubsub.HEARTBEAT_BATCH, fake_id, malformed_message)
wait_for_errors(ray_constants.MONITOR_DIED_ERROR, 1)
diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh
index 93f00a0eed72..9b8d9295eae3 100755
--- a/test/jenkins_tests/run_multi_node_tests.sh
+++ b/test/jenkins_tests/run_multi_node_tests.sh
@@ -53,6 +53,14 @@ docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
--stop '{"training_iteration": 2}' \
--config '{"simple_optimizer": true, "num_sgd_iter": 2, "model": {"use_lstm": true}}'
+docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
+ python /ray/python/ray/rllib/train.py \
+ --env CartPole-v1 \
+ --run PPO \
+ --stop '{"training_iteration": 2}' \
+ --config '{"num_gpus": 0.1}' \
+ --ray-num-gpus 1
+
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
python /ray/python/ray/rllib/train.py \
--env CartPole-v1 \
@@ -249,9 +257,30 @@ docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
python /ray/python/ray/rllib/test/test_external_env.py
+docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
+ python /ray/python/ray/rllib/examples/parametric_action_cartpole.py --run=PG --stop=50
+
+docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
+ python /ray/python/ray/rllib/examples/parametric_action_cartpole.py --run=PPO --stop=50
+
+docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
+ python /ray/python/ray/rllib/examples/parametric_action_cartpole.py --run=DQN --stop=50
+
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
python /ray/python/ray/rllib/test/test_lstm.py
+docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
+ python /ray/python/ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=PPO
+
+docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
+ python /ray/python/ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=PG
+
+docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
+ python /ray/python/ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=DQN
+
+docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
+ python /ray/python/ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=DDPG
+
docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \
python /ray/python/ray/rllib/test/test_multi_agent_env.py
diff --git a/test/stress_tests/run_stress_tests.sh b/test/stress_tests/run_stress_tests.sh
new file mode 100755
index 000000000000..ba0886037c9e
--- /dev/null
+++ b/test/stress_tests/run_stress_tests.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+# Cause the script to exit if a single command fails.
+set -e
+
+# Show explicitly which commands are currently running.
+set -x
+
+ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
+
+# Start a large cluster using the autoscaler.
+ray up -y $ROOT_DIR/stress_testing_config.yaml
+
+# Run a bunch of stress tests.
+ray submit $ROOT_DIR/stress_testing_config.yaml test_many_tasks_and_transfers.py
+ray submit $ROOT_DIR/stress_testing_config.yaml test_dead_actors.py
+
+# Tear down the cluster.
+ray down -y $ROOT_DIR/stress_testing_config.yaml
diff --git a/test/stress_tests/stress_testing_config.yaml b/test/stress_tests/stress_testing_config.yaml
new file mode 100644
index 000000000000..6ef5285035e8
--- /dev/null
+++ b/test/stress_tests/stress_testing_config.yaml
@@ -0,0 +1,115 @@
+# An unique identifier for the head node and workers of this cluster.
+cluster_name: stress-testing
+
+# The minimum number of workers nodes to launch in addition to the head
+# node. This number should be >= 0.
+min_workers: 100
+
+# The maximum number of workers nodes to launch in addition to the head
+# node. This takes precedence over min_workers.
+max_workers: 100
+
+# The autoscaler will scale up the cluster to this target fraction of resource
+# usage. For example, if a cluster of 10 nodes is 100% busy and
+# target_utilization is 0.8, it would resize the cluster to 13. This fraction
+# can be decreased to increase the aggressiveness of upscaling.
+# This value must be less than 1.0 for scaling to happen.
+target_utilization_fraction: 0.8
+
+# If a node is idle for this many minutes, it will be removed.
+idle_timeout_minutes: 5
+
+# Cloud-provider specific configuration.
+provider:
+ type: aws
+ region: us-west-2
+ availability_zone: us-west-2a
+
+# How Ray will authenticate with newly launched nodes.
+auth:
+ ssh_user: ubuntu
+# By default Ray creates a new private keypair, but you can also use your own.
+# If you do so, make sure to also set "KeyName" in the head and worker node
+# configurations below.
+# ssh_private_key: /path/to/your/key.pem
+
+# Provider-specific config for the head node, e.g. instance type. By default
+# Ray will auto-configure unspecified fields such as SubnetId and KeyName.
+# For more documentation on available fields, see:
+# http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
+head_node:
+ InstanceType: m5.12xlarge
+ ImageId: ami-0def3275 # Default Ubuntu 16.04 AMI.
+
+ # Set primary volume to 25 GiB
+ BlockDeviceMappings:
+ - DeviceName: /dev/sda1
+ Ebs:
+ VolumeSize: 50
+
+ # Additional options in the boto docs.
+
+# Provider-specific config for worker nodes, e.g. instance type. By default
+# Ray will auto-configure unspecified fields such as SubnetId and KeyName.
+# For more documentation on available fields, see:
+# http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
+worker_nodes:
+ InstanceType: m5.large
+ ImageId: ami-0def3275 # Default Ubuntu 16.04 AMI.
+
+ # Set primary volume to 25 GiB
+ BlockDeviceMappings:
+ - DeviceName: /dev/sda1
+ Ebs:
+ VolumeSize: 50
+
+ # Run workers on spot by default. Comment this out to use on-demand.
+ InstanceMarketOptions:
+ MarketType: spot
+ # Additional options can be found in the boto docs, e.g.
+ # SpotOptions:
+ # MaxPrice: MAX_HOURLY_PRICE
+
+ # Additional options in the boto docs.
+
+# Files or directories to copy to the head and worker nodes. The format is a
+# dictionary from REMOTE_PATH: LOCAL_PATH, e.g.
+file_mounts: {
+# "/path1/on/remote/machine": "/path1/on/local/machine",
+# "/path2/on/remote/machine": "/path2/on/local/machine",
+}
+
+# List of shell commands to run to set up nodes.
+setup_commands:
+ # Consider uncommenting these if you run into dpkg locking issues
+ # - sudo pkill -9 apt-get || true
+ # - sudo pkill -9 dpkg || true
+ # - sudo dpkg --configure -a
+ # Install basics.
+ - sudo apt-get update
+ - sudo apt-get install -y cmake pkg-config build-essential autoconf curl libtool unzip flex bison python
+ # Install Anaconda.
+ - wget https://repo.continuum.io/archive/Anaconda3-5.0.1-Linux-x86_64.sh || true
+ - bash Anaconda3-5.0.1-Linux-x86_64.sh -b -p $HOME/anaconda3 || true
+ - echo 'export PATH="$HOME/anaconda3/bin:$PATH"' >> ~/.bashrc
+ # # Build Ray.
+ # - git clone https://github.com/ray-project/ray || true
+ - pip install boto3==1.4.8 cython==0.27.3
+ # - cd ray/python; git checkout master; git pull; pip install -e . --verbose
+ - pip install https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.6.0-cp36-cp36m-manylinux1_x86_64.whl
+
+# Custom commands that will be run on the head node after common setup.
+head_setup_commands: []
+
+# Custom commands that will be run on worker nodes after common setup.
+worker_setup_commands: []
+
+# Command to start ray on the head node. You don't need to change this.
+head_start_ray_commands:
+ - ray stop
+ - ulimit -n 65536; ray start --head --num-redis-shards=5 --redis-port=6379 --autoscaling-config=~/ray_bootstrap_config.yaml
+
+# Command to start ray on worker nodes. You don't need to change this.
+worker_start_ray_commands:
+ - ray stop
+ - ulimit -n 65536; ray start --redis-address=$RAY_HEAD_IP:6379 --num-gpus=100
diff --git a/test/stress_tests/test_dead_actors.py b/test/stress_tests/test_dead_actors.py
new file mode 100644
index 000000000000..72b801142635
--- /dev/null
+++ b/test/stress_tests/test_dead_actors.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import numpy as np
+import sys
+
+import ray
+
+logger = logging.getLogger(__name__)
+
+ray.init(redis_address="localhost:6379")
+
+
+@ray.remote
+class Child(object):
+ def __init__(self, death_probability):
+ self.death_probability = death_probability
+
+ def ping(self):
+ # Exit process with some probability.
+ exit_chance = np.random.rand()
+ if exit_chance > self.death_probability:
+ sys.exit(-1)
+
+
+@ray.remote
+class Parent(object):
+ def __init__(self, num_children, death_probability):
+ self.death_probability = death_probability
+ self.children = [
+ Child.remote(death_probability) for _ in range(num_children)
+ ]
+
+ def ping(self, num_pings):
+ children_outputs = []
+ for _ in range(num_pings):
+ children_outputs += [
+ child.ping.remote() for child in self.children
+ ]
+ try:
+ ray.get(children_outputs)
+ except Exception:
+ # Replace the children if one of them died.
+ self.__init__(len(self.children), self.death_probability)
+
+ def kill(self):
+ # Clean up children.
+ ray.get([child.__ray_terminate__.remote() for child in self.children])
+
+
+num_parents = 10
+num_children = 10
+death_probability = 0.95
+
+parents = [
+ Parent.remote(num_children, death_probability) for _ in range(num_parents)
+]
+for i in range(100):
+ ray.get([parent.ping.remote(10) for parent in parents])
+
+ # Kill a parent actor with some probability.
+ exit_chance = np.random.rand()
+ if exit_chance > death_probability:
+ parent_index = np.random.randint(len(parents))
+ parents[parent_index].kill.remote()
+ parents[parent_index] = Parent.remote(num_children, death_probability)
+
+ logger.info("Finished trial", i)
diff --git a/test/stress_tests/test_many_tasks_and_transfers.py b/test/stress_tests/test_many_tasks_and_transfers.py
new file mode 100644
index 000000000000..87b8239a08fe
--- /dev/null
+++ b/test/stress_tests/test_many_tasks_and_transfers.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import logging
+import time
+
+import ray
+
+logger = logging.getLogger(__name__)
+
+ray.init(redis_address="localhost:6379")
+
+# These numbers need to match the values in the autoscaler config file.
+num_remote_nodes = 100
+head_node_cpus = 2
+num_remote_cpus = num_remote_nodes * head_node_cpus
+
+# Wait until the expected number of nodes have joined the cluster.
+while True:
+ if len(ray.global_state.client_table()) >= num_remote_nodes + 1:
+ break
+logger.info("Nodes have all joined. There are {} resources."
+ .format(ray.global_state.cluster_resources()))
+
+
+# Require 1 GPU to force the tasks to be on remote machines.
+@ray.remote(num_gpus=1)
+def f(size, *xs):
+ return np.ones(size, dtype=np.uint8)
+
+
+# Require 1 GPU to force the actors to be on remote machines.
+@ray.remote(num_cpus=1, num_gpus=1)
+class Actor(object):
+ def method(self, size, *xs):
+ return np.ones(size, dtype=np.uint8)
+
+
+# Launch a bunch of tasks.
+start_time = time.time()
+logger.info("Submitting many tasks.")
+for i in range(10):
+ logger.info("Iteration {}".format(i))
+ ray.get([f.remote(0) for _ in range(100000)])
+logger.info("Finished after {} seconds.".format(time.time() - start_time))
+
+# Launch a bunch of tasks, each with a bunch of dependencies.
+start_time = time.time()
+logger.info("Submitting tasks with many dependencies.")
+x_ids = []
+for i in range(5):
+ logger.info("Iteration {}".format(i))
+ x_ids = [f.remote(0, *x_ids) for _ in range(10000)]
+ray.get(x_ids)
+logger.info("Finished after {} seconds.".format(time.time() - start_time))
+
+# Create a bunch of actors.
+start_time = time.time()
+logger.info("Creating {} actors.".format(num_remote_cpus))
+actors = [Actor.remote() for _ in range(num_remote_cpus)]
+logger.info("Finished after {} seconds.".format(time.time() - start_time))
+
+# Submit a bunch of small tasks to each actor.
+start_time = time.time()
+logger.info("Submitting many small actor tasks.")
+x_ids = []
+for _ in range(100000):
+ x_ids = [a.method.remote(0) for a in actors]
+ray.get(x_ids)
+logger.info("Finished after {} seconds.".format(time.time() - start_time))
+
+# Submit a bunch of actor tasks with all-to-all communication.
+start_time = time.time()
+logger.info("Submitting actor tasks with all-to-all communication.")
+x_ids = []
+for _ in range(50):
+ for size_exponent in [0, 1, 2, 3, 4, 5, 6]:
+ x_ids = [a.method.remote(10**size_exponent, *x_ids) for a in actors]
+ray.get(x_ids)
+logger.info("Finished after {} seconds.".format(time.time() - start_time))
diff --git a/thirdparty/scripts/build_modin.sh b/thirdparty/scripts/build_modin.sh
new file mode 100755
index 000000000000..96563fdb2106
--- /dev/null
+++ b/thirdparty/scripts/build_modin.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+set -x
+
+# Cause the script to exit if a single command fails.
+set -e
+
+if [[ -z "$1" ]]; then
+ PYTHON_EXECUTABLE=`which python`
+else
+ PYTHON_EXECUTABLE=$1
+fi
+
+PYTHON_VERSION="$($PYTHON_EXECUTABLE -c 'import sys; print(sys.version_info[0])')"
+
+TP_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)/../
+MODIN_VERSION=0.2.4
+MODIN_WHEELS_FNAME="modin-$MODIN_VERSION-py$PYTHON_VERSION-none-any.whl"
+MODIN_WHEELS_URL="https://github.com/modin-project/modin/releases/download/v$MODIN_VERSION/"
+
+pushd $TP_DIR/../python/ray/
+rm -rf modin
+mkdir modin
+pushd modin
+curl -kL "$MODIN_WHEELS_URL$MODIN_WHEELS_FNAME" -o "$MODIN_WHEELS_FNAME"
+unzip "$MODIN_WHEELS_FNAME"
+rm "$MODIN_WHEELS_FNAME"
+popd
+popd
diff --git a/thirdparty/scripts/setup.sh b/thirdparty/scripts/setup.sh
index 27f1ef0e3ed5..da283bd3b2bb 100755
--- a/thirdparty/scripts/setup.sh
+++ b/thirdparty/scripts/setup.sh
@@ -67,3 +67,8 @@ bash "$TP_SCRIPT_DIR/build_ui.sh"
# rDSN (optional)
##############################################
# bash "$TP_SCRIPT_DIR/build_rdsn.sh"
+
+##############################################
+# modin
+##############################################
+bash "$TP_SCRIPT_DIR/build_modin.sh" $PYTHON_EXECUTABLE