ray-project · sven1977 · Aug 26, 2024 · Aug 22, 2024 · Aug 23, 2024 · Aug 24, 2024
@@ -312,6 +312,8 @@ def __init__(self, algo_class: Optional[type] = None):
             "aot_eager" if sys.platform == "darwin" else "onnxrt"
         )
         self.torch_compile_worker_dynamo_mode = None
+        # Default kwargs for `torch.nn.parallel.DistributedDataParallel`.
+        self.torch_ddp_kwargs = {}
 
         # `self.api_stack()`
         self.enable_rl_module_and_learner = False
@@ -1378,6 +1380,7 @@ def framework(
         torch_compile_worker: Optional[bool] = NotProvided,
         torch_compile_worker_dynamo_backend: Optional[str] = NotProvided,
         torch_compile_worker_dynamo_mode: Optional[str] = NotProvided,
+        torch_ddp_kwargs: Optional[Dict[str, Any]] = NotProvided,
     ) -> "AlgorithmConfig":
         """Sets the config's DL framework settings.
 
@@ -1417,6 +1420,12 @@ def framework(
                 the workers.
             torch_compile_worker_dynamo_mode: The torch dynamo mode to use on the
                 workers.
+            torch_ddp_kwargs: The kwargs to pass into
+                `torch.nn.parallel.DistributedDataParallel` when using `num_learners
+                > 1`. This is specifically helpful when searching for unused parameters
+                that are not used in the backward pass. This can give hints for errors
+                in custom models where some parameters do not get touched in the
+                backward pass although they should.
 
         Returns:
             This updated AlgorithmConfig object.
@@ -1458,6 +1467,8 @@ def framework(
             )
         if torch_compile_worker_dynamo_mode is not NotProvided:
             self.torch_compile_worker_dynamo_mode = torch_compile_worker_dynamo_mode
+        if torch_ddp_kwargs is not NotProvided:
+            self.torch_ddp_kwargs = torch_ddp_kwargs
 
         return self
 

@@ -251,7 +251,9 @@ def add_module(
                         "torch compile."
                     )
                 self._module.add_module(
-                    module_id, TorchDDPRLModule(module), override=True
+                    module_id,
+                    TorchDDPRLModule(module, **self.config.torch_ddp_kwargs),
+                    override=True,
                 )
 
         return marl_spec
@@ -406,7 +408,9 @@ def _make_modules_ddp_if_necessary(self) -> None:
         if self._distributed:
             # Single agent module: Convert to `TorchDDPRLModule`.
             if isinstance(self._module, TorchRLModule):
-                self._module = TorchDDPRLModule(self._module)
+                self._module = TorchDDPRLModule(
+                    self._module, **self.config.torch_ddp_kwargs
+                )
             # Multi agent module: Convert each submodule to `TorchDDPRLModule`.
             else:
                 assert isinstance(self._module, MultiRLModule)
@@ -415,7 +419,11 @@ def _make_modules_ddp_if_necessary(self) -> None:
                     if isinstance(sub_module, TorchRLModule):
                         # Wrap and override the module ID key in self._module.
                         self._module.add_module(
-                            key, TorchDDPRLModule(sub_module), override=True
+                            key,
+                            TorchDDPRLModule(
+                                sub_module, **self.config.torch_ddp_kwargs
+                            ),
+                            override=True,
                         )
 
     def _is_module_compatible_with_learner(self, module: RLModule) -> bool: