Skip to content

Commit

Permalink
Add heuristic algo for speculative
Browse files Browse the repository at this point in the history
  • Loading branch information
leng-yue committed Sep 4, 2023
1 parent 3519568 commit 98230ef
Showing 1 changed file with 14 additions and 1 deletion.
15 changes: 14 additions & 1 deletion examples/speculative/speculative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ int main(int argc, char ** argv) {
//GGML_ASSERT(n_vocab == llama_n_vocab(ctx_dft));

// how many tokens to draft each time
const int n_draft = params.n_draft;
int n_draft = params.n_draft;

int n_predict = 0;
int n_drafted = 0;
Expand Down Expand Up @@ -116,6 +116,8 @@ int main(int argc, char ** argv) {

// sample from the drafted tokens if any
int i_dft = 0;
bool all_accepted = false;

while (true) {
const llama_token id = llama_sample_token(ctx_tgt, NULL, NULL, params, last_tokens, candidates, i_dft);

Expand All @@ -141,6 +143,9 @@ int main(int argc, char ** argv) {
++n_past_dft;
++i_dft;

if (i_dft == (int) drafted.size()) {
all_accepted = true;
}
continue;
}

Expand All @@ -154,6 +159,14 @@ int main(int argc, char ** argv) {
break;
}

if (drafted.size() > 0 && all_accepted) {
n_draft += 2;
LOG("all drafted tokens accepted, n_draft = %d\n", n_draft);
} else {
n_draft -= 1;
LOG("drafted token rejected, n_draft = %d\n", n_draft);
}

if (n_predict > params.n_predict || has_eos) {
break;
}
Expand Down

0 comments on commit 98230ef

Please sign in to comment.