Skip to content

Commit

Permalink
[RLlib] IMPALA on new API stack: Add Pong-v5 release test. (ray-proje…
Browse files Browse the repository at this point in the history
…ct#45803)

Signed-off-by: JP-sDEV <jon.pablo80@gmail.com>
  • Loading branch information
sven1977 authored and JP-sDEV committed Nov 14, 2024
1 parent c84260a commit 02be250
Show file tree
Hide file tree
Showing 10 changed files with 239 additions and 110 deletions.
37 changes: 35 additions & 2 deletions release/release_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2728,6 +2728,39 @@
cluster:
cluster_compute: 1gpu_4cpus_gce.yaml

# --------------------------
# IMPALA
# --------------------------
- name: rllib_learning_tests_impala_ppo_torch
group: RLlib tests
working_dir: rllib_tests

stable: true

frequency: nightly
team: rllib
cluster:
byod:
type: gpu
post_build_script: byod_rllib_test.sh
runtime_env:
- RLLIB_TEST_NO_JAX_IMPORT=1
- LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/ray/.mujoco/mujoco210/bin
cluster_compute: 2gpus_64cpus.yaml

run:
timeout: 1800
script: python learning_tests/tuned_examples/impala/pong_impala.py --enable-new-api-stack --num-learners=0 --num-env-runners=46 --stop-reward=19.5 --as-release-test

alert: default

variations:
- __suffix__: aws
- __suffix__: gce
env: gce
frequency: manual
cluster:
cluster_compute: 2gpus_64cpus_gce.yaml

# --------------------------
# PPO
Expand All @@ -2747,7 +2780,7 @@
runtime_env:
- RLLIB_TEST_NO_JAX_IMPORT=1
- LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/ray/.mujoco/mujoco210/bin
cluster_compute: 8gpus_96cpus.yaml
cluster_compute: 4gpus_96cpus.yaml

run:
timeout: 1200
Expand All @@ -2761,7 +2794,7 @@
env: gce
frequency: manual
cluster:
cluster_compute: 8gpus_96cpus_gce.yaml
cluster_compute: 4gpus_96cpus_gce.yaml


# --------------------------
Expand Down
17 changes: 17 additions & 0 deletions release/rllib_tests/4gpus_96cpus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
cloud_id: {{env["ANYSCALE_CLOUD_ID"]}}
region: us-west-2

max_workers: 0

head_node_type:
name: head_node
instance_type: g6.24xlarge

worker_node_types: []

aws:
BlockDeviceMappings:
- DeviceName: /dev/sda1
Ebs:
DeleteOnTermination: true
VolumeSize: 500
27 changes: 27 additions & 0 deletions release/rllib_tests/4gpus_96cpus_gce.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
cloud_id: {{env["ANYSCALE_CLOUD_ID"]}}
region: us-west1
allowed_azs:
- us-west1-b

max_workers: 0

head_node_type:
name: head_node
instance_type: n1-standard-96-nvidia-t4-16gb-4

worker_node_types: []

gcp_advanced_configurations_json:
instance_properties:
disks:
- boot: true
auto_delete: true
initialize_params:
disk_size_gb: 500

#aws:
# BlockDeviceMappings:
# - DeviceName: /dev/sda1
# Ebs:
# DeleteOnTermination: true
# VolumeSize: 500
6 changes: 3 additions & 3 deletions rllib/algorithms/algorithm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3992,7 +3992,7 @@ def get_multi_rl_module_spec(
# Default is multi-agent and user wants to override it -> Don't use the
# default.
else:
# Use has given an override RLModuleSpec -> Use this to
# User provided an override RLModuleSpec -> Use this to
# construct the individual RLModules within the MultiRLModuleSpec.
if single_agent_rl_module_spec is not None:
pass
Expand All @@ -4007,7 +4007,7 @@ def get_multi_rl_module_spec(
single_agent_rl_module_spec = (
current_rl_module_spec.rl_module_specs
)
# The currently setup multi-agent spec has NO
# The currently set up multi-agent spec has NO
# RLModuleSpec in it -> Error (there is no way we can
# infer this information from anywhere at this point).
else:
Expand All @@ -4017,7 +4017,7 @@ def get_multi_rl_module_spec(
"`RLModuleSpec`s to compile the individual "
"RLModules' specs! Use "
"`AlgorithmConfig.get_multi_rl_module_spec("
"policy_dict=.., single_agent_rl_module_spec=..)`."
"policy_dict=.., rl_module_spec=..)`."
)

single_agent_rl_module_spec.inference_only = inference_only
Expand Down
19 changes: 14 additions & 5 deletions rllib/algorithms/marwil/marwil_learner.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Dict
from typing import Dict, Optional

from ray.rllib.core.learner.learner import Learner
from ray.rllib.utils.annotations import override
from ray.rllib.utils.lambda_defaultdict import LambdaDefaultDict
from ray.rllib.utils.typing import ModuleID, TensorType
from ray.rllib.utils.typing import ModuleID, ShouldModuleBeUpdatedFn, TensorType

LEARNER_RESULTS_MOVING_AVG_SQD_ADV_NORM_KEY = "moving_avg_sqd_adv_norm"
LEARNER_RESULTS_VF_EXPLAINED_VAR_KEY = "vf_explained_variance"
Expand All @@ -29,6 +29,15 @@ def build(self) -> None:
)

@override(Learner)
def remove_module(self, module_id: ModuleID) -> None:
super().remove_module(module_id)
self.moving_avg_sqd_adv_norms_per_module.pop(module_id)
def remove_module(
self,
module_id: ModuleID,
*,
new_should_module_be_updated: Optional[ShouldModuleBeUpdatedFn] = None,
) -> None:
super().remove_module(
module_id,
new_should_module_be_updated=new_should_module_be_updated,
)
# In case of BC (beta==0.0 and this property never being used),
self.moving_avg_sqd_adv_norms_per_module.pop(module_id, None)
11 changes: 4 additions & 7 deletions rllib/connectors/common/batch_individual_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,9 @@ def __call__(
# Only record structure for OBS column.
if column == Columns.OBS:
shared_data["memorized_map_structure"] = memorized_map_structure
# Multi-agent case: This should already be covered above.
# This connector piece should only be used after(!)
# the AgentToModuleMapping connector has already been applied, leading
# to a batch structure of:
# [module_id] -> [col0] -> [list of items]
else:
raise NotImplementedError
# Multi-agent case: But Module ID not found in our RLModule -> Ignore this
# `module_id` entirely.
# else:
# pass

return batch
4 changes: 2 additions & 2 deletions rllib/core/learner/learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1341,8 +1341,8 @@ def _update_from_batch_or_episodes(
# a) Either also pass given batches through the learner connector (even if
# episodes is None). (preferred solution)
# b) Get rid of the option to pass in a batch altogether.
if episodes is None:
batch = self._convert_batch_type(batch)
# if episodes is None:
# batch = self._convert_batch_type(batch)

# Check the MultiAgentBatch, whether our RLModule contains all ModuleIDs
# found in this batch. If not, throw an error.
Expand Down
Loading

0 comments on commit 02be250

Please sign in to comment.