diff --git a/README.md b/README.md index 8652953..939904b 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,10 @@ See: [OpenVLM Leaderboard](https://huggingface.co/spaces/opencompass/open_vlm_le ## Recent updates +Version 0.19.1 + +- really Fix <|end|> token for Mini-InternVL-Chat-4B-V1-5, thanks again [@Ph0rk0z](https://github.com/Ph0rk0z) + Version 0.19.0 - new model support: tiiuae/falcon-11B-vlm diff --git a/backend/internvl-chat-v1-5.py b/backend/internvl-chat-v1-5.py index 4f3af32..8f6bf83 100644 --- a/backend/internvl-chat-v1-5.py +++ b/backend/internvl-chat-v1-5.py @@ -105,10 +105,10 @@ def __init__(self, model_id: str, device: str, device_map: str = 'auto', extra_p self.model.img_context_token_id = self.tokenizer.convert_tokens_to_ids('') - if self.format == 'phintern' and self.tokenizer.convert_tokens_to_ids('<|end|>') != 0: - self.eos_token_id = self.tokenizer.convert_tokens_to_ids('<|end|>') - elif self.tokenizer.convert_tokens_to_ids('<|im_end|>') != 0: - self.eos_token_id = self.tokenizer.convert_tokens_to_ids('<|im_end|>') # 92542, InternLM2 + self.eos_token = '<|end|>' if self.format == 'phintern' else '<|im_end|>' + + if self.tokenizer.convert_tokens_to_ids(self.eos_token) != 0: + self.eos_token_id = self.tokenizer.convert_tokens_to_ids(self.eos_token) # 92542, InternLM2 else: self.eos_token_id = self.tokenizer.eos_token_id @@ -151,4 +151,4 @@ async def chat_with_images(self, request: ImageChatRequest) -> str: ) response = self.tokenizer.decode(output[0], skip_special_tokens=True) - return response.split('<|im_end|>')[0].strip() + return response.split(self.eos_token)[0].strip()