From 79c204f3fc9dadbfc074caedf513d610c4dbfd05 Mon Sep 17 00:00:00 2001
From: Nick Hill <nickhill@us.ibm.com>
Date: Tue, 23 Jul 2024 17:48:56 -0700
Subject: [PATCH 1/2] [SpecDecoding] Update MLPSpeculator CI tests to use
 smaller model

---
 tests/spec_decode/e2e/test_mlp_correctness.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/spec_decode/e2e/test_mlp_correctness.py b/tests/spec_decode/e2e/test_mlp_correctness.py
index dd67a7735a647..0cbf9ba31665b 100644
--- a/tests/spec_decode/e2e/test_mlp_correctness.py
+++ b/tests/spec_decode/e2e/test_mlp_correctness.py
@@ -24,10 +24,10 @@
 from .conftest import run_greedy_equality_correctness_test
 
 # main model
-MAIN_MODEL = "ibm-granite/granite-3b-code-instruct"
+MAIN_MODEL = "JackFram/llama-160m"
 
 # speculative model
-SPEC_MODEL = "ibm-granite/granite-3b-code-instruct-accelerator"
+SPEC_MODEL = "ibm-fms/llama-160m-accelerator"
 
 # max. number of speculative tokens: this corresponds to
 # n_predict in the config.json of the speculator model.

From 81dceba96c2af5791354bc79f21e6c8de6b294bb Mon Sep 17 00:00:00 2001
From: Nick Hill <nickhill@us.ibm.com>
Date: Tue, 23 Jul 2024 19:48:04 -0700
Subject: [PATCH 2/2] Fix lower max spec tokens for the new model

---
 tests/spec_decode/e2e/test_mlp_correctness.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/spec_decode/e2e/test_mlp_correctness.py b/tests/spec_decode/e2e/test_mlp_correctness.py
index 0cbf9ba31665b..e310941afacf3 100644
--- a/tests/spec_decode/e2e/test_mlp_correctness.py
+++ b/tests/spec_decode/e2e/test_mlp_correctness.py
@@ -31,7 +31,7 @@
 
 # max. number of speculative tokens: this corresponds to
 # n_predict in the config.json of the speculator model.
-MAX_SPEC_TOKENS = 5
+MAX_SPEC_TOKENS = 3
 
 # precision
 PRECISION = "float32"