[Bugfix] Allow prefill of assistant response when using `mistral_comm…

…on` (vllm-project#9446) Signed-off-by: qishuai <[email protected]>
FerdinandZhong · Oct 29, 2024 · 712d250 · 712d250
1 parent d5e2294
commit 712d250
Showing 1 changed file with 4 additions and 0 deletions.
diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py
@@ -166,6 +166,10 @@ def apply_chat_template(self,
                             tools: Optional[Dict[str, Any]] = None,
                             **kwargs) -> List[int]:
 
+        last_message = messages[-1]
+        if last_message["role"] == "assistant":
+            last_message["prefix"] = True
+
         request = ChatCompletionRequest(messages=messages,
                                         tools=tools)  # type: ignore[type-var]
         encoded = self.mistral.encode_chat_completion(request)