Skip to content

Commit

Permalink
Adding huichen's test case
Browse files Browse the repository at this point in the history
  • Loading branch information
goerch committed Sep 16, 2023
1 parent e41209a commit afc0d0d
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
1 change: 1 addition & 0 deletions tests/test-tokenizer-0-llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ static const std::map<std::string, std::vector<llama_token>> & k_tests() {
{ " Hello" , { 1678, 15043, }, },
{ " Hello" , { 268, 15043, }, },
{ " Hello\n Hello" , { 268, 15043, 13, 1678, 15043, }, },
{ " (" , { 29871, 313, }, },
};

return _k_tests;
Expand Down
6 changes: 3 additions & 3 deletions tests/test-tokenizer-1-llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ int main(int argc, char **argv) {
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
std::string check = llama_detokenize_spm(ctx, tokens);
if (check != str) {
fprintf(stderr, "%s : error: token %d detokenizes to >%s<(%zu) but tokenization of this detokenizes to >%s<(%zu)\n",
fprintf(stderr, "%s : error: token %d detokenizes to '%s'(%zu) but tokenization of this detokenizes to '%s'(%zu)\n",
__func__, i, str.c_str(), str.length(), check.c_str(), check.length());
return 2;
}
Expand All @@ -99,7 +99,7 @@ int main(int argc, char **argv) {
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
std::string check = llama_detokenize_spm(ctx, tokens);
if (cp != 9601 && str != check) {
fprintf(stderr, "%s : error: codepoint %d detokenizes to >%s<(%zu) instead of >%s<(%zu)\n",
fprintf(stderr, "%s : error: codepoint %d detokenizes to '%s'(%zu) instead of '%s'(%zu)\n",
__func__, cp, check.c_str(), check.length(), str.c_str(), str.length());
return 3;
}
Expand All @@ -110,7 +110,7 @@ int main(int argc, char **argv) {
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
std::string check = llama_detokenize_spm(ctx, tokens);
if (str != check) {
fprintf(stderr, "%s : error: codepoint %d detokenizes to >%s<(%zu) instead of >%s<(%zu)\n",
fprintf(stderr, "%s : error: codepoint %d detokenizes to '%s'(%zu) instead of '%s'(%zu)\n",
__func__, cp, check.c_str(), check.length(), str.c_str(), str.length());
return 4;
}
Expand Down

0 comments on commit afc0d0d

Please sign in to comment.