From 79c204f3fc9dadbfc074caedf513d610c4dbfd05 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Tue, 23 Jul 2024 17:48:56 -0700 Subject: [PATCH 1/2] [SpecDecoding] Update MLPSpeculator CI tests to use smaller model --- tests/spec_decode/e2e/test_mlp_correctness.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/spec_decode/e2e/test_mlp_correctness.py b/tests/spec_decode/e2e/test_mlp_correctness.py index dd67a7735a647..0cbf9ba31665b 100644 --- a/tests/spec_decode/e2e/test_mlp_correctness.py +++ b/tests/spec_decode/e2e/test_mlp_correctness.py @@ -24,10 +24,10 @@ from .conftest import run_greedy_equality_correctness_test # main model -MAIN_MODEL = "ibm-granite/granite-3b-code-instruct" +MAIN_MODEL = "JackFram/llama-160m" # speculative model -SPEC_MODEL = "ibm-granite/granite-3b-code-instruct-accelerator" +SPEC_MODEL = "ibm-fms/llama-160m-accelerator" # max. number of speculative tokens: this corresponds to # n_predict in the config.json of the speculator model. From 81dceba96c2af5791354bc79f21e6c8de6b294bb Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Tue, 23 Jul 2024 19:48:04 -0700 Subject: [PATCH 2/2] Fix lower max spec tokens for the new model --- tests/spec_decode/e2e/test_mlp_correctness.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/spec_decode/e2e/test_mlp_correctness.py b/tests/spec_decode/e2e/test_mlp_correctness.py index 0cbf9ba31665b..e310941afacf3 100644 --- a/tests/spec_decode/e2e/test_mlp_correctness.py +++ b/tests/spec_decode/e2e/test_mlp_correctness.py @@ -31,7 +31,7 @@ # max. number of speculative tokens: this corresponds to # n_predict in the config.json of the speculator model. -MAX_SPEC_TOKENS = 5 +MAX_SPEC_TOKENS = 3 # precision PRECISION = "float32"