ray-project · kouroshHakha · Jun 21, 2023 · May 25, 2023 · May 25, 2023 · May 25, 2023
@@ -2014,6 +2014,13 @@ py_test(
     srcs = ["core/learner/tests/test_learner.py"]
 )
 
+py_test(
+    name = "test_torch_learner_compile",
+    tags = ["team:rllib", "core", "ray_data"],
+    size = "medium",
+    srcs = ["core/learner/torch/tests/test_torch_learner_compile.py"]
+)
+
 py_test(
     name ="tests/test_algorithm_save_load_checkpoint_learner",
     tags = ["team:rllib", "core"],

@@ -286,12 +286,12 @@ def __init__(self, algo_class=None):
         self.torch_compile_learner_dynamo_backend = (
             "aot_eager" if sys.platform == "darwin" else "inductor"
         )
-        self.torch_compile_learner_dynamo_mode = "reduce-overhead"
+        self.torch_compile_learner_dynamo_mode = None
         self.torch_compile_worker = False
         self.torch_compile_worker_dynamo_backend = (
-            "aot_eager" if sys.platform == "darwin" else "inductor"
+            "aot_eager" if sys.platform == "darwin" else "cudagraphs"
         )
-        self.torch_compile_worker_dynamo_mode = "reduce-overhead"
+        self.torch_compile_worker_dynamo_mode = None
 
         # `self.environment()`
         self.env = None
@@ -3035,7 +3035,6 @@ def get_torch_compile_learner_config(self):
         )
 
         return TorchCompileConfig(
-            compile_forward_train=self.torch_compile_learner,
             torch_dynamo_backend=self.torch_compile_learner_dynamo_backend,
             torch_dynamo_mode=self.torch_compile_learner_dynamo_mode,
         )
@@ -3048,8 +3047,6 @@ def get_torch_compile_worker_config(self):
         )
 
         return TorchCompileConfig(
-            compile_forward_exploration=self.torch_compile_worker,
-            compile_forward_inference=self.torch_compile_worker,
             torch_dynamo_backend=self.torch_compile_worker_dynamo_backend,
             torch_dynamo_mode=self.torch_compile_worker_dynamo_mode,
         )
@@ -3275,7 +3272,10 @@ def get_learner_group_config(self, module_spec: ModuleSpec) -> LearnerGroupConfi
         )
 
         if self.framework_str == "torch":
-            config.framework(torch_compile_cfg=self.get_torch_compile_learner_config())
+            config.framework(
+                torch_compile=self.torch_compile_learner,
+                torch_compile_cfg=self.get_torch_compile_learner_config(),
+            )
         elif self.framework_str == "tf2":
             config.framework(eager_tracing=self.eager_tracing)
 

@@ -92,13 +92,35 @@ class FrameworkHyperparameters:
             This is useful for speeding up the training loop. However, it is not
             compatible with all tf operations. For example, tf.print is not supported
             in tf.function.
+        torch_compile: Whether to use torch.compile() within the context of a given
+            learner.
+        what_to_compile: What to compile when using torch.compile(). Can be one of
+            ["complete_update", "forward_train"].
+            If "complete_update", the update step of the learner will be compiled. This
+            includes the forward pass of the RLModule, the loss computation, and the
+            optimizer step.
+            If "forward_train", only the forward methods (and therein the
+            forward_train method) of the RLModule will be compiled.
+            Either of the two may lead to different performance gains in different
+            settings.
+            "complete_update" promises the highest performance gains, but may work
+            in some settings. By compiling only forward_train, you may already get
+            some speedups and avoid issues that arise from compiling the entire update.
         troch_compile_config: The TorchCompileConfig to use for compiling the RL
             Module in Torch.
     """
 
     eager_tracing: bool = False
+    torch_compile: bool = False
+    what_to_compile: str = "complete_update"
     torch_compile_cfg: Optional["TorchCompileConfig"] = None
 
+    def validate(self):
+        if self.what_to_compile not in ["complete_update", "forward_train"]:
+            raise ValueError(
+                "what_to_compile must be one of ['complete_update', 'forward_train']."
+            )
+
 
 @dataclass
 class LearnerHyperparameters:
@@ -314,6 +336,7 @@ def __init__(
         self._framework_hyperparameters = (
             framework_hyperparameters or FrameworkHyperparameters()
         )
+        self._framework_hyperparameters.validate()
 
         # whether self.build has already been called
         self._is_built = False

@@ -56,6 +56,7 @@ def __init__(self, cls: Type[LearnerGroup] = None) -> None:
 
         # `self.framework()`
         self.eager_tracing = False
+        self.torch_compile = False
         self.torch_compile_cfg = None
 
     def validate(self) -> None:
@@ -85,6 +86,7 @@ def build(self) -> LearnerGroup:
         framework_hps = FrameworkHyperparameters(
             eager_tracing=self.eager_tracing,
             torch_compile_cfg=self.torch_compile_cfg,
+            torch_compile=self.torch_compile,
         )
 
         learner_spec = LearnerSpec(
@@ -100,12 +102,16 @@ def build(self) -> LearnerGroup:
     def framework(
         self,
         eager_tracing: Optional[bool] = NotProvided,
+        torch_compile: Optional[bool] = NotProvided,
         torch_compile_cfg: Optional["TorchCompileConfig"] = NotProvided,
     ) -> "LearnerGroupConfig":
 
         if eager_tracing is not NotProvided:
             self.eager_tracing = eager_tracing
 
+        if torch_compile is not NotProvided:
+            self.torch_compile = torch_compile
+
         if torch_compile_cfg is not NotProvided:
             self.torch_compile_cfg = torch_compile_cfg
 

@@ -0,0 +1,129 @@
+import itertools
+import unittest
+
+import gymnasium as gym
+
+import ray
+from ray.rllib.core.learner.learner import FrameworkHyperparameters
+from ray.rllib.core.learner.learner import Learner
+from ray.rllib.core.models.tests.test_base_models import _dynamo_is_available
+from ray.rllib.core.rl_module.torch.torch_compile_config import TorchCompileConfig
+from ray.rllib.core.testing.torch.bc_learner import BCTorchLearner
+from ray.rllib.core.testing.utils import get_learner
+from ray.rllib.core.testing.utils import get_module_spec
+from ray.rllib.policy.sample_batch import MultiAgentBatch
+from ray.rllib.utils.test_utils import get_cartpole_dataset_reader
+
+
+def _get_learner(learning_rate: float = 1e-3) -> Learner:
+    env = gym.make("CartPole-v1")
+    # adding learning rate as a configurable parameter to avoid hardcoding it
+    # and information leakage across tests that rely on knowing the LR value
+    # that is used in the learner.
+    learner = get_learner("torch", env, learning_rate=learning_rate)
+    learner.build()
+
+    return learner
+
+
+class TestLearner(unittest.TestCase):
+    @classmethod
+    def setUp(cls) -> None:
+        ray.init()
+
+    @classmethod
+    def tearDown(cls) -> None:
+        ray.shutdown()
+
+    @unittest.skipIf(not _dynamo_is_available(), "torch._dynamo not available")
+    def test_torch_compile(self):
+        """Test if torch.compile() can be applied and used on the learner.
+
+        Also tests if we can update with the compiled update method without errors.
+        """
+
+        env = gym.make("CartPole-v1")
+        is_multi_agents = [False, True]
+        what_to_compiles = ["complete_update", "forward_train"]
+
+        for is_multi_agent, what_to_compile in itertools.product(
+            is_multi_agents, what_to_compiles
+        ):
+            framework_hps = FrameworkHyperparameters(
+                torch_compile=True,
+                torch_compile_cfg=TorchCompileConfig(),
+                what_to_compile=what_to_compile,
+            )
+            spec = get_module_spec(
+                framework="torch", env=env, is_multi_agent=is_multi_agent
+            )
+            learner = BCTorchLearner(
+                module_spec=spec,
+                framework_hyperparameters=framework_hps,
+            )
+            learner.build()
+
+            reader = get_cartpole_dataset_reader(batch_size=512)
+
+            for iter_i in range(10):
+                batch = reader.next()
+                learner.update(batch.as_multi_agent())
+
+            spec = get_module_spec(framework="torch", env=env, is_multi_agent=False)
+            learner.add_module(module_id="another_module", module_spec=spec)
+
+            for iter_i in range(10):
+                batch = MultiAgentBatch(
+                    {"another_module": reader.next(), "default_policy": reader.next()},
+                    0,
+                )
+                learner.update(batch)
+
+            learner.remove_module(module_id="another_module")
+
+    @unittest.skipIf(not _dynamo_is_available(), "torch._dynamo not available")
+    def test_torch_compile_no_breaks(self):
+        """Tests if torch.compile() does encounter too many breaks.
+
+        torch.compile() should ideally not encounter any breaks when compiling the
+        update method of the learner. This method tests if we encounter only a given
+        number of breaks.
+        """
+
+        env = gym.make("CartPole-v1")
+        framework_hps = FrameworkHyperparameters(
+            torch_compile=False,
+            torch_compile_cfg=TorchCompileConfig(),
+        )
+
+        spec = get_module_spec(framework="torch", env=env)
+        learner = BCTorchLearner(
+            module_spec=spec,
+            framework_hyperparameters=framework_hps,
+        )
+        learner.build()
+
+        import torch._dynamo as dynamo
+
+        reader = get_cartpole_dataset_reader(batch_size=512)
+
+        batch = reader.next().as_multi_agent()
+        batch = learner._convert_batch_type(batch)
+
+        # This is a helper method of dynamo to analyze where breaks occur.
+        dynamo_explanation = dynamo.explain(learner._update, batch)
+        print(dynamo_explanation[5])
+
+        # There should be only one break reason - `return_value` - since inputs and
+        # outputs are not checked
+        break_reasons_list = dynamo_explanation[4]
+
+        # TODO(Artur): Attempt bringing breaks down to 1. (This may not be possible)
+        self.assertEquals(len(break_reasons_list), 3)
+
+
+if __name__ == "__main__":
+    import pytest
+    import sys
+
+    sys.exit(pytest.main(["-v", __file__]))