Skip to content

Commit

Permalink
[SpecDecoding] Update MLPSpeculator CI tests to use smaller model (vl…
Browse files Browse the repository at this point in the history
  • Loading branch information
njhill authored and kylesayrs committed Aug 17, 2024
1 parent 62d054b commit 1b239b3
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions tests/spec_decode/e2e/test_mlp_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@
from .conftest import run_greedy_equality_correctness_test

# main model
MAIN_MODEL = "ibm-granite/granite-3b-code-instruct"
MAIN_MODEL = "JackFram/llama-160m"

# speculative model
SPEC_MODEL = "ibm-granite/granite-3b-code-instruct-accelerator"
SPEC_MODEL = "ibm-fms/llama-160m-accelerator"

# max. number of speculative tokens: this corresponds to
# n_predict in the config.json of the speculator model.
MAX_SPEC_TOKENS = 5
MAX_SPEC_TOKENS = 3

# precision
PRECISION = "float32"
Expand Down

0 comments on commit 1b239b3

Please sign in to comment.