[RLlib] Add ObservationPreprocessor (ConnectorV2) and cleanup Conne…

…ctorV2 docstrings. (#47077)
ray-project · Aug 12, 2024 · c603541 · c603541
1 parent fd6a8bf
commit c603541
Show file tree

Hide file tree

Showing 19 changed files with 454 additions and 8 deletions.
diff --git a/rllib/connectors/common/add_observations_from_episodes_to_batch.py b/rllib/connectors/common/add_observations_from_episodes_to_batch.py
@@ -7,11 +7,40 @@
 from ray.rllib.core.rl_module.rl_module import RLModule
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.typing import EpisodeType
+from ray.util.annotations import PublicAPI
 
 
+@PublicAPI(stability="alpha")
 class AddObservationsFromEpisodesToBatch(ConnectorV2):
     """Gets the last observation from a running episode and adds it to the batch.
 
+    Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
+    are added automatically by RLlib into every env-to-module/Learner connector
+    pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
+    `config.add_default_connectors_to_learner_pipeline ` are set to
+    False.
+
+    The default env-to-module connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    The default Learner connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddColumnsFromEpisodesToTrainBatch,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+
+    This ConnectorV2:
     - Operates on a list of Episode objects.
     - Gets the most recent observation(s) from all the given episodes and adds them
     to the batch under construction (as a list of individual observations).

diff --git a/rllib/connectors/common/add_states_from_episodes_to_batch.py b/rllib/connectors/common/add_states_from_episodes_to_batch.py
@@ -15,11 +15,39 @@
 from ray.rllib.utils.numpy import convert_to_numpy
 from ray.rllib.utils.spaces.space_utils import batch, BatchedNdArray
 from ray.rllib.utils.typing import EpisodeType
+from ray.util.annotations import PublicAPI
 
 
+@PublicAPI(stability="alpha")
 class AddStatesFromEpisodesToBatch(ConnectorV2):
     """Gets last STATE_OUT from running episode and adds it as STATE_IN to the batch.
 
+    Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
+    are added automatically by RLlib into every env-to-module/Learner connector
+    pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
+    `config.add_default_connectors_to_learner_pipeline ` are set to
+    False.
+
+    The default env-to-module connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    The default Learner connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddColumnsFromEpisodesToTrainBatch,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+
     If the RLModule is stateful, the episodes' STATE_OUTS will be extracted
     and restructured under a new STATE_IN key.
     As a Learner connector, the resulting STATE_IN batch has the shape (B', ...).
@@ -32,6 +60,7 @@ class AddStatesFromEpisodesToBatch(ConnectorV2):
     reshaped into (B, T=max_seq_len (learner) or 1 (env-to-module), ...) and will be
     zero-padded, if necessary.
 
+    This ConnectorV2:
     - Operates on a list of Episode objects.
     - Gets the most recent STATE_OUT from all the given episodes and adds them under
     the STATE_IN key to the batch under construction.

diff --git a/rllib/connectors/common/agent_to_module_mapping.py b/rllib/connectors/common/agent_to_module_mapping.py
@@ -8,11 +8,39 @@
 from ray.rllib.env.multi_agent_episode import MultiAgentEpisode
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.typing import EpisodeType, ModuleID
+from ray.util.annotations import PublicAPI
 
 
+@PublicAPI(stability="alpha")
 class AgentToModuleMapping(ConnectorV2):
     """ConnectorV2 that performs mapping of data from AgentID based to ModuleID based.
 
+    Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
+    are added automatically by RLlib into every env-to-module/Learner connector
+    pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
+    `config.add_default_connectors_to_learner_pipeline ` are set to
+    False.
+
+    The default env-to-module connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    The default Learner connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddColumnsFromEpisodesToTrainBatch,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+
     This connector piece is only used by RLlib (as a default connector piece) in a
     multi-agent setup.
 

diff --git a/rllib/connectors/common/batch_individual_items.py b/rllib/connectors/common/batch_individual_items.py
@@ -10,9 +10,93 @@
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.spaces.space_utils import batch
 from ray.rllib.utils.typing import EpisodeType
+from ray.util.annotations import PublicAPI
 
 
+@PublicAPI(stability="alpha")
 class BatchIndividualItems(ConnectorV2):
+    """Batches individual data-items (in lists) into tensors (with batch dimension).
+
+    Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
+    are added automatically by RLlib into every env-to-module/Learner connector
+    pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
+    `config.add_default_connectors_to_learner_pipeline ` are set to
+    False.
+
+    The default env-to-module connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    The default Learner connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddColumnsFromEpisodesToTrainBatch,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+
+    This ConnectorV2:
+    - Operates only on the input `data`, NOT the incoming list of episode objects
+    (ignored).
+    - In the single-agent case, `data` must already be a dict, structured as follows by
+    prior connector pieces of the same pipeline:
+    [col0] -> {[(eps_id,)]: [list of individual batch items]}
+    - In the multi-agent case, `data` must already be a dict, structured as follows by
+    prior connector pieces of the same pipeline (in particular the
+    `AgentToModuleMapping` piece):
+    [module_id] -> [col0] -> [list of individual batch items]
+    - Translates the above data under the different columns (e.g. "obs") into final
+    (batched) structures. For the single-agent case, the output `data` looks like this:
+    [col0] -> [possibly complex struct of batches (at the leafs)].
+    For the multi-agent case, the output `data` looks like this:
+    [module_id] -> [col0] -> [possibly complex struct of batches (at the leafs)].
+
+    .. testcode::
+
+        from ray.rllib.connectors.common import BatchIndividualItems
+        from ray.rllib.utils.test_utils import check
+
+        single_agent_data = {
+            "obs": {
+                # Note that at this stage, next-obs is not part of the data anymore ..
+                ("MA-EPS0",): [0, 1],
+                ("MA-EPS1",): [2, 3],
+            },
+            "actions": {
+                # .. so we have as many actions per episode as we have observations.
+                ("MA-EPS0",): [4, 5],
+                ("MA-EPS1",): [6, 7],
+            },
+        }
+
+        # Create our (single-agent) connector piece.
+        connector = BatchIndividualItems()
+
+        # Call the connector (and thereby batch the individual items).
+        output_data = connector(
+            rl_module=None,  # This particular connector works without an RLModule.
+            data=single_agent_data,
+            episodes=[],  # This particular connector works without a list of episodes.
+            explore=True,
+            shared_data={},
+        )
+
+        # `data` should now be batched (episode IDs should have been removed from the
+        # struct).
+        check(
+            output_data,
+            {"obs": [0, 1, 2, 3], "actions": [4, 5, 6, 7]},
+        )
+    """
+
     def __init__(
         self,
         input_observation_space: Optional[gym.Space] = None,
@@ -52,7 +136,7 @@ def __call__(
             # Multi-agent case: This connector piece should only be used after(!)
             # the AgentToModuleMapping connector has already been applied, leading
             # to a batch structure of:
-            # [module_id] -> [col0] -> [list of items]
+            # [module_id] -> [col0] -> [list of individual batch items]
             if is_multi_rl_module and column in rl_module:
                 # Case, in which a column has already been properly batched before this
                 # connector piece is called.

diff --git a/rllib/connectors/common/module_to_agent_unmapping.py b/rllib/connectors/common/module_to_agent_unmapping.py
@@ -6,8 +6,10 @@
 from ray.rllib.env.multi_agent_episode import MultiAgentEpisode
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.typing import EpisodeType
+from ray.util.annotations import PublicAPI
 
 
+@PublicAPI(stability="alpha")
 class ModuleToAgentUnmapping(ConnectorV2):
     """Performs flipping of `data` from ModuleID- to AgentID based mapping.
 

diff --git a/rllib/connectors/common/numpy_to_tensor.py b/rllib/connectors/common/numpy_to_tensor.py
@@ -10,13 +10,45 @@
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.torch_utils import convert_to_torch_tensor
 from ray.rllib.utils.typing import EpisodeType
+from ray.util.annotations import PublicAPI
 
 
+@PublicAPI(stability="alpha")
 class NumpyToTensor(ConnectorV2):
     """Converts numpy arrays across the entire input data into (framework) tensors.
 
     The framework information is received via the provided `rl_module` arg in the
-    `__call__`.
+    `__call__()` method.
+
+    Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
+    are added automatically by RLlib into every env-to-module/Learner connector
+    pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
+    `config.add_default_connectors_to_learner_pipeline ` are set to
+    False.
+
+    The default env-to-module connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+    The default Learner connector pipeline is:
+    [
+        [0 or more user defined ConnectorV2 pieces],
+        AddObservationsFromEpisodesToBatch,
+        AddColumnsFromEpisodesToTrainBatch,
+        AddStatesFromEpisodesToBatch,
+        AgentToModuleMapping,  # only in multi-agent setups!
+        BatchIndividualItems,
+        NumpyToTensor,
+    ]
+
+    This ConnectorV2:
+    - Loops through the input `data` and converts all found numpy arrays into
+    framework-specific tensors (possibly on a GPU).
     """
 
     def __init__(
@@ -37,6 +69,8 @@ def __init__(
             pin_mempory: Whether to pin memory when creating (torch) tensors.
                 If None (default), pins memory if `as_learner_connector` is True,
                 otherwise doesn't pin memory.
+            device: An optional device to move the resulting tensors to. If not
+                provided, all data will be left on the CPU.
             **kwargs:
         """
         super().__init__(
@@ -68,7 +102,6 @@ def __call__(
             is_single_agent = True
             data = {DEFAULT_MODULE_ID: data}
 
-        # TODO (sven): Support specifying a device (e.g. GPU).
         for module_id, module_data in data.copy().items():
             infos = module_data.pop(Columns.INFOS, None)
             if rl_module.framework == "torch":

diff --git a/rllib/connectors/common/tensor_to_numpy.py b/rllib/connectors/common/tensor_to_numpy.py
@@ -5,8 +5,10 @@
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.numpy import convert_to_numpy
 from ray.rllib.utils.typing import EpisodeType
+from ray.util.annotations import PublicAPI
 
 
+@PublicAPI(stability="alpha")
 class TensorToNumpy(ConnectorV2):
     """Converts (framework) tensors across the entire input data into numpy arrays."""
 

diff --git a/rllib/connectors/env_to_module/mean_std_filter.py b/rllib/connectors/env_to_module/mean_std_filter.py
@@ -8,11 +8,10 @@
 from ray.rllib.connectors.connector_v2 import ConnectorV2
 from ray.rllib.core.rl_module.rl_module import RLModule
 from ray.rllib.utils.annotations import override
-from ray.rllib.utils.filter import MeanStdFilter as _MeanStdFilter
+from ray.rllib.utils.filter import MeanStdFilter as _MeanStdFilter, RunningStat
 from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space
 from ray.rllib.utils.typing import AgentID, EpisodeType, StateDict
 from ray.util.annotations import PublicAPI
-from ray.rllib.utils.filter import RunningStat
 
 
 @PublicAPI(stability="alpha")