Fix DPP + SyncBN (#6838)

* Fix DPP + SyncBN Ensure that model is already on correct GPU before applying SyncBN conversion * Fix order of SyncBN for ddp_spawn
Lightning-AI · Apr 6, 2021 · eafec7d · eafec7d
1 parent 6dc1078
commit eafec7d
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 6 deletions.
diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py
@@ -241,12 +241,12 @@ def init_ddp_connection(self, global_rank: int, world_size: int) -> None:
             torch_distrib.init_process_group(self.torch_distributed_backend, rank=global_rank, world_size=world_size)
 
     def pre_dispatch(self):
-        if self.sync_batchnorm:
-            self.model = self.configure_sync_batchnorm(self.model)
-
         # move the model to the correct device
         self.model_to_device()
 
+        if self.sync_batchnorm:
+            self.model = self.configure_sync_batchnorm(self.model)
+
         self.configure_ddp()
 
         self.barrier()

diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py
@@ -141,12 +141,12 @@ def new_process(self, process_idx, trainer, mp_queue):
         self.dist.rank = self.global_rank
         self.dist.device = self.root_device
 
-        if self.sync_batchnorm:
-            self.model = self.configure_sync_batchnorm(self.model)
-
         # move the model to the correct device
         self.model_to_device()
 
+        if self.sync_batchnorm:
+            self.model = self.configure_sync_batchnorm(self.model)
+
         self.configure_ddp()
 
         self.barrier()