[Bugfix] Allow prefill of assistant response when using `mistral_comm…

…on` (vllm-project#9446)
KuntaiDu · Oct 17, 2024 · 4d647bd · 4d647bd
1 parent f3702b7
commit 4d647bd
Showing 1 changed file with 4 additions and 0 deletions.
diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py
@@ -166,6 +166,10 @@ def apply_chat_template(self,
                             tools: Optional[Dict[str, Any]] = None,
                             **kwargs) -> List[int]:
 
+        last_message = messages[-1]
+        if last_message["role"] == "assistant":
+            last_message["prefix"] = True
+
         request = ChatCompletionRequest(messages=messages,
                                         tools=tools)  # type: ignore[type-var]
         encoded = self.mistral.encode_chat_completion(request)