From c882a7f5b3ce5c98efb52c911ea15ca565d10cd7 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Wed, 24 Jul 2024 00:34:22 -0700 Subject: [PATCH] [SpecDecoding] Update MLPSpeculator CI tests to use smaller model (#6714) --- tests/spec_decode/e2e/test_mlp_correctness.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/spec_decode/e2e/test_mlp_correctness.py b/tests/spec_decode/e2e/test_mlp_correctness.py index dd67a7735a647..e310941afacf3 100644 --- a/tests/spec_decode/e2e/test_mlp_correctness.py +++ b/tests/spec_decode/e2e/test_mlp_correctness.py @@ -24,14 +24,14 @@ from .conftest import run_greedy_equality_correctness_test # main model -MAIN_MODEL = "ibm-granite/granite-3b-code-instruct" +MAIN_MODEL = "JackFram/llama-160m" # speculative model -SPEC_MODEL = "ibm-granite/granite-3b-code-instruct-accelerator" +SPEC_MODEL = "ibm-fms/llama-160m-accelerator" # max. number of speculative tokens: this corresponds to # n_predict in the config.json of the speculator model. -MAX_SPEC_TOKENS = 5 +MAX_SPEC_TOKENS = 3 # precision PRECISION = "float32"