[Pretrain] Fix eval during pretrain (#7827)

* add unified checkpoint training args doc * fix eval during pretrain * fix
PaddlePaddle · Jan 11, 2024 · ee4b9dd · ee4b9dd
1 parent f039d09
commit ee4b9dd
Showing 1 changed file with 4 additions and 3 deletions.
diff --git a/llm/run_pretrain.py b/llm/run_pretrain.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import copy
 import math
 import os
 import sys
@@ -261,12 +262,12 @@ def print_dataset(data, mode="train"):
     def _collate_data(data, stack_fn=Stack()):
         tokens_ = stack_fn([x["text"] for x in data])
 
-        labels = tokens_[:, 1:]
+        labels = copy.deepcopy(tokens_)[:, 1:]
         tokens = tokens_[:, :-1]
 
         return {
-            "input_ids": paddle.to_tensor(tokens),
-            "labels": paddle.to_tensor(labels),
+            "input_ids": tokens,
+            "labels": labels,
         }
 
     if need_data: