[RLlib] IMPALA on new API stack: Add Pong-v5 release test. (ray-proje…

…ct#45803) Signed-off-by: JP-sDEV <jon.pablo80@gmail.com>
JP-sDEV · Nov 14, 2024 · 02be250 · 02be250
1 parent c84260a
commit 02be250
Show file tree

Hide file tree

Showing 10 changed files with 239 additions and 110 deletions.
diff --git a/release/release_tests.yaml b/release/release_tests.yaml
@@ -2728,6 +2728,39 @@
       cluster:
         cluster_compute: 1gpu_4cpus_gce.yaml
 
+# --------------------------
+# IMPALA
+# --------------------------
+- name: rllib_learning_tests_impala_ppo_torch
+  group: RLlib tests
+  working_dir: rllib_tests
+
+  stable: true
+
+  frequency: nightly
+  team: rllib
+  cluster:
+    byod:
+      type: gpu
+      post_build_script: byod_rllib_test.sh
+      runtime_env:
+        - RLLIB_TEST_NO_JAX_IMPORT=1
+        - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/ray/.mujoco/mujoco210/bin
+    cluster_compute: 2gpus_64cpus.yaml
+
+  run:
+    timeout: 1800
+    script: python learning_tests/tuned_examples/impala/pong_impala.py --enable-new-api-stack --num-learners=0 --num-env-runners=46 --stop-reward=19.5 --as-release-test
+
+  alert: default
+
+  variations:
+    - __suffix__: aws
+    - __suffix__: gce
+      env: gce
+      frequency: manual
+      cluster:
+        cluster_compute: 2gpus_64cpus_gce.yaml
 
 # --------------------------
 # PPO
@@ -2747,7 +2780,7 @@
       runtime_env:
         - RLLIB_TEST_NO_JAX_IMPORT=1
         - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/ray/.mujoco/mujoco210/bin
-    cluster_compute: 8gpus_96cpus.yaml
+    cluster_compute: 4gpus_96cpus.yaml
 
   run:
     timeout: 1200
@@ -2761,7 +2794,7 @@
       env: gce
       frequency: manual
       cluster:
-        cluster_compute: 8gpus_96cpus_gce.yaml
+        cluster_compute: 4gpus_96cpus_gce.yaml
 
 
 # --------------------------

diff --git a/release/rllib_tests/4gpus_96cpus.yaml b/release/rllib_tests/4gpus_96cpus.yaml
@@ -0,0 +1,17 @@
+cloud_id: {{env["ANYSCALE_CLOUD_ID"]}}
+region: us-west-2
+
+max_workers: 0
+
+head_node_type:
+    name: head_node
+    instance_type: g6.24xlarge
+
+worker_node_types: []
+
+aws:
+    BlockDeviceMappings:
+        - DeviceName: /dev/sda1
+          Ebs:
+            DeleteOnTermination: true
+            VolumeSize: 500
diff --git a/release/rllib_tests/4gpus_96cpus_gce.yaml b/release/rllib_tests/4gpus_96cpus_gce.yaml
@@ -0,0 +1,27 @@
+cloud_id: {{env["ANYSCALE_CLOUD_ID"]}}
+region: us-west1
+allowed_azs:
+    - us-west1-b
+
+max_workers: 0
+
+head_node_type:
+    name: head_node
+    instance_type: n1-standard-96-nvidia-t4-16gb-4
+
+worker_node_types: []
+
+gcp_advanced_configurations_json:
+  instance_properties:
+    disks:
+      - boot: true
+        auto_delete: true
+        initialize_params:
+          disk_size_gb: 500
+
+#aws:
+#    BlockDeviceMappings:
+#        - DeviceName: /dev/sda1
+#          Ebs:
+#            DeleteOnTermination: true
+#            VolumeSize: 500
diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py
@@ -3992,7 +3992,7 @@ def get_multi_rl_module_spec(
             # Default is multi-agent and user wants to override it -> Don't use the
             # default.
             else:
-                # Use has given an override RLModuleSpec -> Use this to
+                # User provided an override RLModuleSpec -> Use this to
                 # construct the individual RLModules within the MultiRLModuleSpec.
                 if single_agent_rl_module_spec is not None:
                     pass
@@ -4007,7 +4007,7 @@ def get_multi_rl_module_spec(
                         single_agent_rl_module_spec = (
                             current_rl_module_spec.rl_module_specs
                         )
-                    # The currently setup multi-agent spec has NO
+                    # The currently set up multi-agent spec has NO
                     # RLModuleSpec in it -> Error (there is no way we can
                     # infer this information from anywhere at this point).
                     else:
@@ -4017,7 +4017,7 @@ def get_multi_rl_module_spec(
                             "`RLModuleSpec`s to compile the individual "
                             "RLModules' specs! Use "
                             "`AlgorithmConfig.get_multi_rl_module_spec("
-                            "policy_dict=.., single_agent_rl_module_spec=..)`."
+                            "policy_dict=.., rl_module_spec=..)`."
                         )
 
                 single_agent_rl_module_spec.inference_only = inference_only

diff --git a/rllib/algorithms/marwil/marwil_learner.py b/rllib/algorithms/marwil/marwil_learner.py
@@ -1,9 +1,9 @@
-from typing import Dict
+from typing import Dict, Optional
 
 from ray.rllib.core.learner.learner import Learner
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.lambda_defaultdict import LambdaDefaultDict
-from ray.rllib.utils.typing import ModuleID, TensorType
+from ray.rllib.utils.typing import ModuleID, ShouldModuleBeUpdatedFn, TensorType
 
 LEARNER_RESULTS_MOVING_AVG_SQD_ADV_NORM_KEY = "moving_avg_sqd_adv_norm"
 LEARNER_RESULTS_VF_EXPLAINED_VAR_KEY = "vf_explained_variance"
@@ -29,6 +29,15 @@ def build(self) -> None:
         )
 
     @override(Learner)
-    def remove_module(self, module_id: ModuleID) -> None:
-        super().remove_module(module_id)
-        self.moving_avg_sqd_adv_norms_per_module.pop(module_id)
+    def remove_module(
+        self,
+        module_id: ModuleID,
+        *,
+        new_should_module_be_updated: Optional[ShouldModuleBeUpdatedFn] = None,
+    ) -> None:
+        super().remove_module(
+            module_id,
+            new_should_module_be_updated=new_should_module_be_updated,
+        )
+        # In case of BC (beta==0.0 and this property never being used),
+        self.moving_avg_sqd_adv_norms_per_module.pop(module_id, None)
diff --git a/rllib/connectors/common/batch_individual_items.py b/rllib/connectors/common/batch_individual_items.py
@@ -190,12 +190,9 @@ def __call__(
                 # Only record structure for OBS column.
                 if column == Columns.OBS:
                     shared_data["memorized_map_structure"] = memorized_map_structure
-            # Multi-agent case: This should already be covered above.
-            # This connector piece should only be used after(!)
-            # the AgentToModuleMapping connector has already been applied, leading
-            # to a batch structure of:
-            # [module_id] -> [col0] -> [list of items]
-            else:
-                raise NotImplementedError
+            # Multi-agent case: But Module ID not found in our RLModule -> Ignore this
+            # `module_id` entirely.
+            # else:
+            #    pass
 
         return batch
diff --git a/rllib/core/learner/learner.py b/rllib/core/learner/learner.py
@@ -1341,8 +1341,8 @@ def _update_from_batch_or_episodes(
         #  a) Either also pass given batches through the learner connector (even if
         #     episodes is None). (preferred solution)
         #  b) Get rid of the option to pass in a batch altogether.
-        if episodes is None:
-            batch = self._convert_batch_type(batch)
+        # if episodes is None:
+        #    batch = self._convert_batch_type(batch)
 
         # Check the MultiAgentBatch, whether our RLModule contains all ModuleIDs
         # found in this batch. If not, throw an error.