Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
gongel committed Sep 10, 2022
1 parent f560a6b commit 28ea1e2
Showing 1 changed file with 1 addition and 2 deletions.
3 changes: 1 addition & 2 deletions model_zoo/gpt/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,14 +442,13 @@ def _construct_sample(self, tokens):
labels = tokens[1:]
tokens = tokens[:-1]
seq_length = len(tokens)
# No padding, so attention_mask is None
attention_mask = None

# The pad and eos tokens do not contribute the loss
loss_mask = np.ones(seq_length, dtype="float32")
loss_mask[np.where(np.array(tokens) == self.eos_id)] = 0.0
position_ids = np.arange(0, seq_length, dtype="int64")

attention_mask = loss_mask
labels = np.array(labels, dtype="int64")
return [tokens, loss_mask, attention_mask, position_ids, labels]

Expand Down

0 comments on commit 28ea1e2

Please sign in to comment.