From 2ec9a6d5ce2a4927f0aecff650751ed3fb0e1920 Mon Sep 17 00:00:00 2001
From: lilujia <lilujia@baidu.com>
Date: Thu, 24 Oct 2024 17:01:38 +0800
Subject: [PATCH] [XPU], support unified ckpt function

---
 paddlenlp/trainer/plugins/unified_checkpoint.py | 4 ++--
 paddlenlp/trainer/trainer.py                    | 6 ++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/paddlenlp/trainer/plugins/unified_checkpoint.py b/paddlenlp/trainer/plugins/unified_checkpoint.py
index 4c5b54a20ddb..4bd1a1ffb08e 100644
--- a/paddlenlp/trainer/plugins/unified_checkpoint.py
+++ b/paddlenlp/trainer/plugins/unified_checkpoint.py
@@ -1307,7 +1307,7 @@ def check_unified_checkpoint(args, model, resume_from_checkpoint, safe_serializa
         else:
             local_resume = False
     local_resume = paddle.to_tensor([local_resume])
-    dist.all_reduce(local_resume, op=dist.ReduceOp.PROD)
+    dist.all_reduce(local_resume, op=dist.ReduceOp.MIN)
     local_resume = local_resume.item()
     return local_resume
 
@@ -1425,7 +1425,7 @@ def check_dynamic_load(args, weight_map, existed_files, is_master_weights=False,
             else:
                 local_resume = False
         local_resume = paddle.to_tensor([local_resume])
-        dist.all_reduce(local_resume, op=dist.ReduceOp.PROD)
+        dist.all_reduce(local_resume, op=dist.ReduceOp.MIN)
         return local_resume.item()
 
     # check whether the optimizer checkpoint files are complete.
diff --git a/paddlenlp/trainer/trainer.py b/paddlenlp/trainer/trainer.py
index c3e7d91f74ba..e61d0e73daeb 100644
--- a/paddlenlp/trainer/trainer.py
+++ b/paddlenlp/trainer/trainer.py
@@ -1793,6 +1793,12 @@ def _load_rng_state(self, checkpoint):
             for i in range(core.get_cuda_device_count()):
                 core.default_cuda_generator(i).set_state(checkpoint_rng_state["cuda"][i])
 
+        if core.is_compiled_with_xpu():
+            if not len(checkpoint_rng_state["cuda"]) == core.get_cuda_device_count():
+                raise ValueError("Length of xpu state list shoule be equal to the xpu device count")
+            for i in range(core.get_xpu_device_count()):
+                core.default_xpu_generator(i).set_state(checkpoint_rng_state["cuda"][i])
+
         if paddle.device.get_all_custom_device_type() is not None:
             custom_device_type = paddle.device.get_all_custom_device_type()
             for device in custom_device_type: