Skip to content

Commit

Permalink
[Pretrain] Fix eval during pretrain (#7827)
Browse files Browse the repository at this point in the history
* add unified checkpoint training args doc

* fix eval during pretrain

* fix
  • Loading branch information
DesmonDay authored Jan 11, 2024
1 parent f039d09 commit ee4b9dd
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions llm/run_pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import math
import os
import sys
Expand Down Expand Up @@ -261,12 +262,12 @@ def print_dataset(data, mode="train"):
def _collate_data(data, stack_fn=Stack()):
tokens_ = stack_fn([x["text"] for x in data])

labels = tokens_[:, 1:]
labels = copy.deepcopy(tokens_)[:, 1:]
tokens = tokens_[:, :-1]

return {
"input_ids": paddle.to_tensor(tokens),
"labels": paddle.to_tensor(labels),
"input_ids": tokens,
"labels": labels,
}

if need_data:
Expand Down

0 comments on commit ee4b9dd

Please sign in to comment.