From 3ffa99a70e54e5537cd89a747c6d5c6cd42f14c7 Mon Sep 17 00:00:00 2001 From: Travis Addair Date: Fri, 19 Apr 2024 11:04:10 -0700 Subject: [PATCH] fix: Hack for llama3 eos_token_id (#427) --- server/lorax_server/models/flash_llama.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/lorax_server/models/flash_llama.py b/server/lorax_server/models/flash_llama.py index e44beddb6..e99052b13 100644 --- a/server/lorax_server/models/flash_llama.py +++ b/server/lorax_server/models/flash_llama.py @@ -61,6 +61,11 @@ def __init__( trust_remote_code=trust_remote_code, ) + if tokenizer.eos_token_id == 128001: + # TODO(travis): hack to workaround llamam-3 chat template generating the wrong eos_token + # https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/discussions/14 + tokenizer.eos_token_id = 128009 + config = LlamaConfig.from_pretrained(model_id, revision=revision, trust_remote_code=trust_remote_code) config.quantize = quantize