pytorch · jonb377 · Aug 29, 2024 · Aug 29, 2024 · Aug 29, 2024 · Aug 29, 2024
diff --git a/torch_xla/experimental/distributed_checkpoint/_helpers.py b/torch_xla/experimental/distributed_checkpoint/_helpers.py
@@ -151,38 +151,6 @@ def flat_copy(path: OBJ_PATH, value: STATE_DICT_ITEM) -> None:
   return flattened, mappings
 
 
-# TODO(jonbolin): Take a dependency on the upstream implementation when the APIs
-# are stable.
-# https://github.com/pytorch/pytorch/blob/d1cecd9c32ba700c27f2b0716bf2cbef41469495/torch/distributed/checkpoint/_dedup_tensors.py#L29
-def dedup_tensors(all_plans: List[SavePlan]) -> List[SavePlan]:
-  all_plans = list(all_plans)
-  key_to_plan: Dict[MetadataIndex, List[int]] = {}
-  for plan_idx, plan in enumerate(all_plans):
-    for write_item in plan.items:
-      key_to_plan.setdefault(write_item.index, []).append(plan_idx)
-
-  replicated_items = {k: v for k, v in key_to_plan.items() if len(v) > 1}
-
-  # Remove duplicates by always keeping the first entry.
-  # Compute the per-rank remove set.
-  plan_to_keys: Dict[int, List[MetadataIndex]] = {}
-  for key, plans in replicated_items.items():
-    for plan_idx in plans[1:]:
-      plan_to_keys.setdefault(plan_idx, []).append(key)
-
-  for plan_idx, keys in plan_to_keys.items():
-    key_set = set(keys)
-    # rewrite items and remove elements
-    new_items = [
-        write_item for write_item in all_plans[plan_idx].items
-        if write_item.index not in key_set
-    ]
-    all_plans[plan_idx] = dataclasses.replace(
-        all_plans[plan_idx], items=new_items)
-
-  return all_plans
-
-
 # TODO(jonbolin): Take a dependency on the upstream implementation when the APIs
 # are stable
 # https://github.com/pytorch/pytorch/blob/d1cecd9c32ba700c27f2b0716bf2cbef41469495/torch/distributed/_shard/_utils.py#L7

diff --git a/torch_xla/experimental/distributed_checkpoint/planners.py b/torch_xla/experimental/distributed_checkpoint/planners.py
@@ -36,9 +36,10 @@
 from torch.utils._pytree import tree_map
 from torch_xla.distributed.spmd import XLAShardedTensor, XLAShard
 from torch_xla.experimental.distributed_checkpoint._helpers import (
-    FLATTEN_MAPPING, flatten_state_dict, dedup_tensors, _is_sharded_tensor,
-    set_element, narrow_tensor_by_index, _unwrap_xla_sharded_tensor, _CpuShards)
+    FLATTEN_MAPPING, flatten_state_dict, _is_sharded_tensor, set_element,
+    narrow_tensor_by_index, _unwrap_xla_sharded_tensor, _CpuShards)
 from typing import Any, Dict, List, Tuple, Union
+from torch.distributed.checkpoint._dedup_save_plans import dedup_save_plans
 
 
 class SPMDSavePlanner(SavePlanner):
@@ -107,7 +108,7 @@ def create_local_plan(self) -> SavePlan:
   def create_global_plan(
       self, all_plans: List[SavePlan]) -> Tuple[List[SavePlan], Metadata]:
     # Deduplicate write items across plans
-    all_plans = dedup_tensors(all_plans)
+    all_plans = dedup_save_plans(all_plans)
 
     global_plan, metadata = create_default_global_save_plan(
         all_plans, rewrite_index_hints=False)