Skip to content

Commit

Permalink
Remove the prompt padding in the batch API
Browse files Browse the repository at this point in the history
gemma.cpp now correctly handles the prefilling of prompts of varying
lengths.
  • Loading branch information
ufownl committed Aug 19, 2024
1 parent 8c1396a commit fa5c08b
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 15 deletions.
14 changes: 0 additions & 14 deletions src/batch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,20 +98,6 @@ std::vector<cgemma::session_context> parse_args(lua_State* L) {
if (sess_ctxs.back().prompt.empty()) {
luaL_error(L, "Too few arguments, %d expected", nargs + 1);
}
size_t max_prompt_size = 0;
for (const auto& ctx: sess_ctxs) {
max_prompt_size = std::max(max_prompt_size, ctx.prompt.size());
}
for (auto& ctx: sess_ctxs) {
auto padding_size = max_prompt_size - ctx.prompt.size();
if (padding_size > 0) {
std::vector<int> prompt;
prompt.reserve(max_prompt_size);
prompt.resize(padding_size, cgemma::PAD_ID);
prompt.insert(prompt.end(), ctx.prompt.begin(), ctx.prompt.end());
ctx.prompt = std::move(prompt);
}
}
return sess_ctxs;
}

Expand Down
1 change: 0 additions & 1 deletion src/instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

namespace cgemma {

constexpr const int PAD_ID = 0;
constexpr const int UNK_ID = 3;
constexpr const int EOT_ID = 107;

Expand Down

0 comments on commit fa5c08b

Please sign in to comment.