Skip to content

Commit

Permalink
llama : fix typo in <|im_end|> token text (#6745)
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Apr 22, 2024
1 parent c0956b0 commit 8960fe8
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4340,7 +4340,7 @@ static void llm_load_vocab(
}
}

// find EOT token: "<|eot_id|>", "<|im_emd|>", "<end_of_turn>", etc.
// find EOT token: "<|eot_id|>", "<|im_end|>", "<end_of_turn>", etc.
//
// TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOT_ID
// for now, we apply this workaround to find the EOT token based on its text
Expand All @@ -4351,7 +4351,7 @@ static void llm_load_vocab(
// need to fix convert script
//vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL &&
(t.first == "<|eot_id|>" ||
t.first == "<|im_emd|>" ||
t.first == "<|im_end|>" ||
t.first == "<end_of_turn>"
)
) {
Expand Down

0 comments on commit 8960fe8

Please sign in to comment.