fix: Hack for llama3 eos_token_id (#427)

predibase · Apr 19, 2024 · 3ffa99a · 3ffa99a
1 parent 2017d45
commit 3ffa99a
Showing 1 changed file with 5 additions and 0 deletions.
diff --git a/server/lorax_server/models/flash_llama.py b/server/lorax_server/models/flash_llama.py
@@ -61,6 +61,11 @@ def __init__(
             trust_remote_code=trust_remote_code,
         )
 
+        if tokenizer.eos_token_id == 128001:
+            # TODO(travis): hack to workaround llamam-3 chat template generating the wrong eos_token
+            # https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/discussions/14
+            tokenizer.eos_token_id = 128009
+
         config = LlamaConfig.from_pretrained(model_id, revision=revision, trust_remote_code=trust_remote_code)
         config.quantize = quantize