Skip to content

Commit

Permalink
[RLlib] Add ObservationPreprocessor (ConnectorV2) and cleanup Conne…
Browse files Browse the repository at this point in the history
…ctorV2 docstrings. (#47077)
  • Loading branch information
sven1977 authored Aug 12, 2024
1 parent fd6a8bf commit c603541
Show file tree
Hide file tree
Showing 19 changed files with 454 additions and 8 deletions.
29 changes: 29 additions & 0 deletions rllib/connectors/common/add_observations_from_episodes_to_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,40 @@
from ray.rllib.core.rl_module.rl_module import RLModule
from ray.rllib.utils.annotations import override
from ray.rllib.utils.typing import EpisodeType
from ray.util.annotations import PublicAPI


@PublicAPI(stability="alpha")
class AddObservationsFromEpisodesToBatch(ConnectorV2):
"""Gets the last observation from a running episode and adds it to the batch.
Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
are added automatically by RLlib into every env-to-module/Learner connector
pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
`config.add_default_connectors_to_learner_pipeline ` are set to
False.
The default env-to-module connector pipeline is:
[
[0 or more user defined ConnectorV2 pieces],
AddObservationsFromEpisodesToBatch,
AddStatesFromEpisodesToBatch,
AgentToModuleMapping, # only in multi-agent setups!
BatchIndividualItems,
NumpyToTensor,
]
The default Learner connector pipeline is:
[
[0 or more user defined ConnectorV2 pieces],
AddObservationsFromEpisodesToBatch,
AddColumnsFromEpisodesToTrainBatch,
AddStatesFromEpisodesToBatch,
AgentToModuleMapping, # only in multi-agent setups!
BatchIndividualItems,
NumpyToTensor,
]
This ConnectorV2:
- Operates on a list of Episode objects.
- Gets the most recent observation(s) from all the given episodes and adds them
to the batch under construction (as a list of individual observations).
Expand Down
29 changes: 29 additions & 0 deletions rllib/connectors/common/add_states_from_episodes_to_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,39 @@
from ray.rllib.utils.numpy import convert_to_numpy
from ray.rllib.utils.spaces.space_utils import batch, BatchedNdArray
from ray.rllib.utils.typing import EpisodeType
from ray.util.annotations import PublicAPI


@PublicAPI(stability="alpha")
class AddStatesFromEpisodesToBatch(ConnectorV2):
"""Gets last STATE_OUT from running episode and adds it as STATE_IN to the batch.
Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
are added automatically by RLlib into every env-to-module/Learner connector
pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
`config.add_default_connectors_to_learner_pipeline ` are set to
False.
The default env-to-module connector pipeline is:
[
[0 or more user defined ConnectorV2 pieces],
AddObservationsFromEpisodesToBatch,
AddStatesFromEpisodesToBatch,
AgentToModuleMapping, # only in multi-agent setups!
BatchIndividualItems,
NumpyToTensor,
]
The default Learner connector pipeline is:
[
[0 or more user defined ConnectorV2 pieces],
AddObservationsFromEpisodesToBatch,
AddColumnsFromEpisodesToTrainBatch,
AddStatesFromEpisodesToBatch,
AgentToModuleMapping, # only in multi-agent setups!
BatchIndividualItems,
NumpyToTensor,
]
If the RLModule is stateful, the episodes' STATE_OUTS will be extracted
and restructured under a new STATE_IN key.
As a Learner connector, the resulting STATE_IN batch has the shape (B', ...).
Expand All @@ -32,6 +60,7 @@ class AddStatesFromEpisodesToBatch(ConnectorV2):
reshaped into (B, T=max_seq_len (learner) or 1 (env-to-module), ...) and will be
zero-padded, if necessary.
This ConnectorV2:
- Operates on a list of Episode objects.
- Gets the most recent STATE_OUT from all the given episodes and adds them under
the STATE_IN key to the batch under construction.
Expand Down
28 changes: 28 additions & 0 deletions rllib/connectors/common/agent_to_module_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,39 @@
from ray.rllib.env.multi_agent_episode import MultiAgentEpisode
from ray.rllib.utils.annotations import override
from ray.rllib.utils.typing import EpisodeType, ModuleID
from ray.util.annotations import PublicAPI


@PublicAPI(stability="alpha")
class AgentToModuleMapping(ConnectorV2):
"""ConnectorV2 that performs mapping of data from AgentID based to ModuleID based.
Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
are added automatically by RLlib into every env-to-module/Learner connector
pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
`config.add_default_connectors_to_learner_pipeline ` are set to
False.
The default env-to-module connector pipeline is:
[
[0 or more user defined ConnectorV2 pieces],
AddObservationsFromEpisodesToBatch,
AddStatesFromEpisodesToBatch,
AgentToModuleMapping, # only in multi-agent setups!
BatchIndividualItems,
NumpyToTensor,
]
The default Learner connector pipeline is:
[
[0 or more user defined ConnectorV2 pieces],
AddObservationsFromEpisodesToBatch,
AddColumnsFromEpisodesToTrainBatch,
AddStatesFromEpisodesToBatch,
AgentToModuleMapping, # only in multi-agent setups!
BatchIndividualItems,
NumpyToTensor,
]
This connector piece is only used by RLlib (as a default connector piece) in a
multi-agent setup.
Expand Down
86 changes: 85 additions & 1 deletion rllib/connectors/common/batch_individual_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,93 @@
from ray.rllib.utils.annotations import override
from ray.rllib.utils.spaces.space_utils import batch
from ray.rllib.utils.typing import EpisodeType
from ray.util.annotations import PublicAPI


@PublicAPI(stability="alpha")
class BatchIndividualItems(ConnectorV2):
"""Batches individual data-items (in lists) into tensors (with batch dimension).
Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
are added automatically by RLlib into every env-to-module/Learner connector
pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
`config.add_default_connectors_to_learner_pipeline ` are set to
False.
The default env-to-module connector pipeline is:
[
[0 or more user defined ConnectorV2 pieces],
AddObservationsFromEpisodesToBatch,
AddStatesFromEpisodesToBatch,
AgentToModuleMapping, # only in multi-agent setups!
BatchIndividualItems,
NumpyToTensor,
]
The default Learner connector pipeline is:
[
[0 or more user defined ConnectorV2 pieces],
AddObservationsFromEpisodesToBatch,
AddColumnsFromEpisodesToTrainBatch,
AddStatesFromEpisodesToBatch,
AgentToModuleMapping, # only in multi-agent setups!
BatchIndividualItems,
NumpyToTensor,
]
This ConnectorV2:
- Operates only on the input `data`, NOT the incoming list of episode objects
(ignored).
- In the single-agent case, `data` must already be a dict, structured as follows by
prior connector pieces of the same pipeline:
[col0] -> {[(eps_id,)]: [list of individual batch items]}
- In the multi-agent case, `data` must already be a dict, structured as follows by
prior connector pieces of the same pipeline (in particular the
`AgentToModuleMapping` piece):
[module_id] -> [col0] -> [list of individual batch items]
- Translates the above data under the different columns (e.g. "obs") into final
(batched) structures. For the single-agent case, the output `data` looks like this:
[col0] -> [possibly complex struct of batches (at the leafs)].
For the multi-agent case, the output `data` looks like this:
[module_id] -> [col0] -> [possibly complex struct of batches (at the leafs)].
.. testcode::
from ray.rllib.connectors.common import BatchIndividualItems
from ray.rllib.utils.test_utils import check
single_agent_data = {
"obs": {
# Note that at this stage, next-obs is not part of the data anymore ..
("MA-EPS0",): [0, 1],
("MA-EPS1",): [2, 3],
},
"actions": {
# .. so we have as many actions per episode as we have observations.
("MA-EPS0",): [4, 5],
("MA-EPS1",): [6, 7],
},
}
# Create our (single-agent) connector piece.
connector = BatchIndividualItems()
# Call the connector (and thereby batch the individual items).
output_data = connector(
rl_module=None, # This particular connector works without an RLModule.
data=single_agent_data,
episodes=[], # This particular connector works without a list of episodes.
explore=True,
shared_data={},
)
# `data` should now be batched (episode IDs should have been removed from the
# struct).
check(
output_data,
{"obs": [0, 1, 2, 3], "actions": [4, 5, 6, 7]},
)
"""

def __init__(
self,
input_observation_space: Optional[gym.Space] = None,
Expand Down Expand Up @@ -52,7 +136,7 @@ def __call__(
# Multi-agent case: This connector piece should only be used after(!)
# the AgentToModuleMapping connector has already been applied, leading
# to a batch structure of:
# [module_id] -> [col0] -> [list of items]
# [module_id] -> [col0] -> [list of individual batch items]
if is_multi_rl_module and column in rl_module:
# Case, in which a column has already been properly batched before this
# connector piece is called.
Expand Down
2 changes: 2 additions & 0 deletions rllib/connectors/common/module_to_agent_unmapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
from ray.rllib.env.multi_agent_episode import MultiAgentEpisode
from ray.rllib.utils.annotations import override
from ray.rllib.utils.typing import EpisodeType
from ray.util.annotations import PublicAPI


@PublicAPI(stability="alpha")
class ModuleToAgentUnmapping(ConnectorV2):
"""Performs flipping of `data` from ModuleID- to AgentID based mapping.
Expand Down
37 changes: 35 additions & 2 deletions rllib/connectors/common/numpy_to_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,45 @@
from ray.rllib.utils.annotations import override
from ray.rllib.utils.torch_utils import convert_to_torch_tensor
from ray.rllib.utils.typing import EpisodeType
from ray.util.annotations import PublicAPI


@PublicAPI(stability="alpha")
class NumpyToTensor(ConnectorV2):
"""Converts numpy arrays across the entire input data into (framework) tensors.
The framework information is received via the provided `rl_module` arg in the
`__call__`.
`__call__()` method.
Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that
are added automatically by RLlib into every env-to-module/Learner connector
pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or
`config.add_default_connectors_to_learner_pipeline ` are set to
False.
The default env-to-module connector pipeline is:
[
[0 or more user defined ConnectorV2 pieces],
AddObservationsFromEpisodesToBatch,
AddStatesFromEpisodesToBatch,
AgentToModuleMapping, # only in multi-agent setups!
BatchIndividualItems,
NumpyToTensor,
]
The default Learner connector pipeline is:
[
[0 or more user defined ConnectorV2 pieces],
AddObservationsFromEpisodesToBatch,
AddColumnsFromEpisodesToTrainBatch,
AddStatesFromEpisodesToBatch,
AgentToModuleMapping, # only in multi-agent setups!
BatchIndividualItems,
NumpyToTensor,
]
This ConnectorV2:
- Loops through the input `data` and converts all found numpy arrays into
framework-specific tensors (possibly on a GPU).
"""

def __init__(
Expand All @@ -37,6 +69,8 @@ def __init__(
pin_mempory: Whether to pin memory when creating (torch) tensors.
If None (default), pins memory if `as_learner_connector` is True,
otherwise doesn't pin memory.
device: An optional device to move the resulting tensors to. If not
provided, all data will be left on the CPU.
**kwargs:
"""
super().__init__(
Expand Down Expand Up @@ -68,7 +102,6 @@ def __call__(
is_single_agent = True
data = {DEFAULT_MODULE_ID: data}

# TODO (sven): Support specifying a device (e.g. GPU).
for module_id, module_data in data.copy().items():
infos = module_data.pop(Columns.INFOS, None)
if rl_module.framework == "torch":
Expand Down
2 changes: 2 additions & 0 deletions rllib/connectors/common/tensor_to_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from ray.rllib.utils.annotations import override
from ray.rllib.utils.numpy import convert_to_numpy
from ray.rllib.utils.typing import EpisodeType
from ray.util.annotations import PublicAPI


@PublicAPI(stability="alpha")
class TensorToNumpy(ConnectorV2):
"""Converts (framework) tensors across the entire input data into numpy arrays."""

Expand Down
3 changes: 1 addition & 2 deletions rllib/connectors/env_to_module/mean_std_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@
from ray.rllib.connectors.connector_v2 import ConnectorV2
from ray.rllib.core.rl_module.rl_module import RLModule
from ray.rllib.utils.annotations import override
from ray.rllib.utils.filter import MeanStdFilter as _MeanStdFilter
from ray.rllib.utils.filter import MeanStdFilter as _MeanStdFilter, RunningStat
from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space
from ray.rllib.utils.typing import AgentID, EpisodeType, StateDict
from ray.util.annotations import PublicAPI
from ray.rllib.utils.filter import RunningStat


@PublicAPI(stability="alpha")
Expand Down
Loading

0 comments on commit c603541

Please sign in to comment.