From 3ffa99a70e54e5537cd89a747c6d5c6cd42f14c7 Mon Sep 17 00:00:00 2001
From: Travis Addair <tgaddair@gmail.com>
Date: Fri, 19 Apr 2024 11:04:10 -0700
Subject: [PATCH] fix: Hack for llama3 eos_token_id (#427)

---
 server/lorax_server/models/flash_llama.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/server/lorax_server/models/flash_llama.py b/server/lorax_server/models/flash_llama.py
index e44beddb6..e99052b13 100644
--- a/server/lorax_server/models/flash_llama.py
+++ b/server/lorax_server/models/flash_llama.py
@@ -61,6 +61,11 @@ def __init__(
             trust_remote_code=trust_remote_code,
         )
 
+        if tokenizer.eos_token_id == 128001:
+            # TODO(travis): hack to workaround llamam-3 chat template generating the wrong eos_token
+            # https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/discussions/14
+            tokenizer.eos_token_id = 128009
+
         config = LlamaConfig.from_pretrained(model_id, revision=revision, trust_remote_code=trust_remote_code)
         config.quantize = quantize