[Distributed] Switch all_reduce to use the new functional collective …

…op (#6887) PyTorch has implemented a new set of functional collective ops and is planning to remove the old ops. Migrating all_reduce to use the new op. See context in pytorch/pytorch#93173 (comment)
pytorch · Apr 10, 2024 · a816c42 · a816c42
1 parent 756b0ec
commit a816c42
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 11 deletions.
diff --git a/torch_xla/core/xla_model.py b/torch_xla/core/xla_model.py
@@ -491,8 +491,7 @@ def all_reduce(reduce_type, inputs, scale=1.0, groups=None, pin_layout=True):
     if scale == 1.0 and groups == [] and pin_layout:
       # TODO(alanwaketan): Support groups.
       # Only c10d_functional version cc ops are traceable by Dynamo.
-      result = torch.ops.c10d_functional.all_reduce(inputs, reduce_type, "", [],
-                                                    0)
+      result = torch.ops._c10d_functional.all_reduce(inputs, reduce_type, "")
     else:
       result = torch_xla._XLAC._xla_all_reduce(reduce_type, inputs, scale,
                                                groups, pin_layout)

diff --git a/torch_xla/csrc/cross_replica_reduces.cpp b/torch_xla/csrc/cross_replica_reduces.cpp
@@ -112,23 +112,20 @@ std::shared_ptr<torch::lazy::Value> CreateToken(
 // order. RFC: https://github.com/pytorch/pytorch/issues/93173
 ////////////////////////////////////////////////////////////////////////////////////
 
-// tag is ignored as it's only used in PyTorch to provide backward compatibility
-// with the traditional process group API.
-at::Tensor all_reduce(const at::Tensor& self, c10::string_view reduceOp,
-                      c10::string_view /*tag*/, at::IntArrayRef /*ranks*/,
-                      int64_t /*group_size*/) {
+at::Tensor all_reduce(const at::Tensor& self, std::string reduceOp,
+                      std::string /*group_name*/) {
   TORCH_LAZY_FN_COUNTER_TIMED_TRACING("xla::");
   auto self_tensor = bridge::GetXlaTensor(self);
-  // TODO(alanwaketan): Use ranks and group_size to generate groups. Currently
-  // we just suse {} as a workaround. Scale is always 1.0 here, and we always
-  // pin layout.
+  // TODO(alanwaketan): Use group_name to generate groups. Currently we just
+  // use {} as a workaround. Scale is always 1.0 here, and we always pin
+  // layout.
   auto result = tensor_methods::all_reduce(self_tensor, GetReduceType(reduceOp),
                                            /*scale*/ 1.0,
                                            /*groups*/ {}, /*pin_layout*/ true);
   return bridge::AtenFromXlaTensor(result);
 }
 
-TORCH_LIBRARY_IMPL(c10d_functional, XLA, m) {
+TORCH_LIBRARY_IMPL(_c10d_functional, XLA, m) {
   m.impl("all_reduce", all_reduce);
 }