From bde406525fd73ce484ebdced99358b6e52fa10f6 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Mon, 14 Oct 2024 20:30:19 -0700
Subject: [PATCH 01/43] [Bugix]: Make chat content text allow type content

---
 vllm/entrypoints/chat_utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 41354dc602c61..4ad32cbae11d5 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -400,11 +400,13 @@ def _parse_chat_message_content_parts(
             MODEL_KEEP_MULTI_MODAL_CONTENT
 
     has_image = False
+    has_text = False
     for part in parts:
         part_type = part["type"]
         if part_type == "text":
             text = _TextParser(part)["text"]
             texts.append(text)
+            has_text = True
         elif part_type == "image_url":
             image_url = _ImageParser(part)["image_url"]
 
@@ -426,8 +428,7 @@ def _parse_chat_message_content_parts(
             raise NotImplementedError(f"Unknown part type: {part_type}")
 
     text_prompt = "\n".join(texts)
-    if keep_multimodal_content:
-        text_prompt = "\n".join(texts)
+    if has_text or keep_multimodal_content:
         role_content = [{'type': 'text', 'text': text_prompt}]
 
         if has_image:

From 3bf919cb8f4cf093b1fb8ac75047b14da44c66f3 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Tue, 15 Oct 2024 13:33:25 -0700
Subject: [PATCH 02/43] Add test to verify content is parsed as expected

---
 tests/entrypoints/test_chat_utils.py | 37 ++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
index 6ded5102c9314..115a32438845b 100644
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -302,6 +302,43 @@ def test_parse_chat_messages_multiple_images_across_messages(
     ]
     _assert_mm_data_is_image_input(mm_data, 2)
 
+def test_parse_chat_messages_context_text_format(
+    phi3v_model_config,
+    phi3v_tokenizer,
+):
+    conversation, mm_data = parse_chat_messages([{
+        "role":
+        "user",
+        "content": [{
+            "type": "text",
+            "text": "What's in this text?"
+        }]
+    }, {
+        "role": "assistant",
+        "content": "Some stuff."
+    }, {
+        "role":
+        "user",
+        "content": [{
+            "type": "text",
+            "text": "What about this one?"
+        }]
+    }], phi3v_model_config, phi3v_tokenizer)
+
+    assert conversation == [
+        {
+            "role": "user",
+            "content": "What's in this text?"
+        },
+        {
+            "role": "assistant",
+            "content": "Some stuff."
+        },
+        {
+            "role": "user",
+            "content": "What about this one?"
+        },
+    ]
 
 def test_parse_chat_messages_rejects_too_many_images_in_one_message(
     phi3v_model_config,

From 66ab303e9374f67c1de4e18b2f47e260177731d8 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Tue, 15 Oct 2024 14:01:43 -0700
Subject: [PATCH 03/43] Fix formatting

---
 tests/entrypoints/test_chat_utils.py | 37 ++++++++++++++--------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
index 115a32438845b..3b54f43b37b0f 100644
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -302,28 +302,28 @@ def test_parse_chat_messages_multiple_images_across_messages(
     ]
     _assert_mm_data_is_image_input(mm_data, 2)
 
+
 def test_parse_chat_messages_context_text_format(
     phi3v_model_config,
     phi3v_tokenizer,
 ):
-    conversation, mm_data = parse_chat_messages([{
-        "role":
-        "user",
-        "content": [{
-            "type": "text",
-            "text": "What's in this text?"
-        }]
-    }, {
-        "role": "assistant",
-        "content": "Some stuff."
-    }, {
-        "role":
-        "user",
-        "content": [{
-            "type": "text",
-            "text": "What about this one?"
-        }]
-    }], phi3v_model_config, phi3v_tokenizer)
+    conversation, mm_data = parse_chat_messages(
+        [{
+            "role": "user",
+            "content": [{
+                "type": "text",
+                "text": "What's in this text?"
+            }]
+        }, {
+            "role": "assistant",
+            "content": "Some stuff."
+        }, {
+            "role": "user",
+            "content": [{
+                "type": "text",
+                "text": "What about this one?"
+            }]
+        }], phi3v_model_config, phi3v_tokenizer)
 
     assert conversation == [
         {
@@ -340,6 +340,7 @@ def test_parse_chat_messages_context_text_format(
         },
     ]
 
+
 def test_parse_chat_messages_rejects_too_many_images_in_one_message(
     phi3v_model_config,
     phi3v_tokenizer,

From aed37f668eeac335e61380e1fdfdde25feae138d Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 03:32:24 -0400
Subject: [PATCH 04/43] Fix test to actually test the fix

---
 tests/entrypoints/test_chat_utils.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
index 3b54f43b37b0f..6eb97ca48168b 100644
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -328,7 +328,10 @@ def test_parse_chat_messages_context_text_format(
     assert conversation == [
         {
             "role": "user",
-            "content": "What's in this text?"
+            "content": [{
+                "type": "text",
+                "text": "What's in this text?"
+            }]
         },
         {
             "role": "assistant",
@@ -336,7 +339,10 @@ def test_parse_chat_messages_context_text_format(
         },
         {
             "role": "user",
-            "content": "What about this one?"
+            "content": [{
+                "type": "text",
+                "text": "What about this one?"
+            }]
         },
     ]
 

From a194b32d68176aadf7c6ba0e384b9d7fd7bbf234 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 04:07:52 -0400
Subject: [PATCH 05/43] Rewrite logic to fix failing test

---
 vllm/entrypoints/chat_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 4ad32cbae11d5..e05b6c9712c4e 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -428,7 +428,7 @@ def _parse_chat_message_content_parts(
             raise NotImplementedError(f"Unknown part type: {part_type}")
 
     text_prompt = "\n".join(texts)
-    if has_text or keep_multimodal_content:
+    if keep_multimodal_content:
         role_content = [{'type': 'text', 'text': text_prompt}]
 
         if has_image:
@@ -440,6 +440,8 @@ def _parse_chat_message_content_parts(
         if mm_placeholder_counts:
             text_prompt = _get_full_multimodal_text_prompt(
                 mm_placeholder_counts, text_prompt)
+        elif has_text:
+            text_prompt = [{'type': 'text', 'text': text_prompt}]
         return [ConversationMessage(role=role, content=text_prompt)]
 
 

From a112a1ac92136a80e559fd0b84ca6f611fe22aef Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 06:00:28 -0400
Subject: [PATCH 06/43] Another attempt to making this work

---
 vllm/entrypoints/chat_utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index c3ac03ee6ed1f..899faa70021dd 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -392,6 +392,7 @@ def _parse_chat_message_content_parts(
     role: str,
     parts: Iterable[ChatCompletionContentPartParam],
     mm_tracker: BaseMultiModalItemTracker,
+    keep_content_structure: bool,
 ) -> List[ConversationMessage]:
     texts: List[str] = []
 
@@ -441,7 +442,7 @@ def _parse_chat_message_content_parts(
         if mm_placeholder_counts:
             text_prompt = _get_full_multimodal_text_prompt(
                 mm_placeholder_counts, text_prompt)
-        elif has_text:
+        elif has_text and keep_content_structure:
             text_prompt = [{'type': 'text', 'text': text_prompt}]
         return [ConversationMessage(role=role, content=text_prompt)]
 
@@ -458,17 +459,20 @@ def _parse_chat_message_content(
     role = message["role"]
     content = message.get("content")
 
+    keep_content_structure = True
     if content is None:
         content = []
     elif isinstance(content, str):
         content = [
             ChatCompletionContentPartTextParam(type="text", text=content)
         ]
+        keep_content_structure = False
 
     result = _parse_chat_message_content_parts(
         role,
         content,  # type: ignore
         mm_tracker,
+        keep_content_structure,
     )
 
     for result_msg in result:

From 0fee8d782e279a849f21096883d6b30dbb2193b1 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 11:04:32 -0400
Subject: [PATCH 07/43] Remove the offending tests

---
 tests/entrypoints/openai/test_chat.py | 51 ---------------------------
 1 file changed, 51 deletions(-)

diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py
index 3af0032fd2fb0..34783c7cf93dd 100644
--- a/tests/entrypoints/openai/test_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@@ -898,57 +898,6 @@ async def test_extra_fields(client: openai.AsyncOpenAI):
     assert "extra_forbidden" in exc_info.value.message
 
 
-@pytest.mark.asyncio
-async def test_complex_message_content(client: openai.AsyncOpenAI):
-    resp = await client.chat.completions.create(
-        model=MODEL_NAME,
-        messages=[{
-            "role":
-            "user",
-            "content": [{
-                "type":
-                "text",
-                "text":
-                "what is 1+1? please provide the result without any other text."
-            }]
-        }],
-        temperature=0,
-        seed=0)
-    content = resp.choices[0].message.content
-    assert content == "2"
-
-
-@pytest.mark.asyncio
-async def test_custom_role(client: openai.AsyncOpenAI):
-    # Not sure how the model handles custom roles so we just check that
-    # both string and complex message content are handled in the same way
-
-    resp1 = await client.chat.completions.create(
-        model=MODEL_NAME,
-        messages=[{
-            "role": "my-custom-role",
-            "content": "what is 1+1?",
-        }],  # type: ignore
-        temperature=0,
-        seed=0)
-
-    resp2 = await client.chat.completions.create(
-        model=MODEL_NAME,
-        messages=[{
-            "role": "my-custom-role",
-            "content": [{
-                "type": "text",
-                "text": "what is 1+1?"
-            }]
-        }],  # type: ignore
-        temperature=0,
-        seed=0)
-
-    content1 = resp1.choices[0].message.content
-    content2 = resp2.choices[0].message.content
-    assert content1 == content2
-
-
 @pytest.mark.asyncio
 async def test_long_seed(client: openai.AsyncOpenAI):
     for seed in [

From 4011ed1500a4123d67ecc02d2b1ac1271e3ca38c Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 12:36:33 -0400
Subject: [PATCH 08/43] Add cli args to switch between types

---
 tests/entrypoints/openai/test_chat.py | 65 ++++++++++++++++++++++-----
 vllm/config.py                        |  2 +
 vllm/engine/arg_utils.py              |  9 ++++
 vllm/entrypoints/chat_utils.py        |  9 ++--
 4 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py
index 34783c7cf93dd..0fbc4cca83bd2 100644
--- a/tests/entrypoints/openai/test_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@@ -433,28 +433,18 @@ async def test_chat_completion_stream_options(client: openai.AsyncOpenAI,
         model=model_name,
         messages=messages,
         max_tokens=10,
-        extra_body=dict(min_tokens=10),
         temperature=0.0,
         stream=True,
         stream_options={
             "include_usage": True,
-            "continuous_usage_stats": True,
+            "continuous_usage_stats": True
         },
     )
-    last_completion_tokens = 0
     async for chunk in stream:
         assert chunk.usage.prompt_tokens >= 0
-        assert last_completion_tokens == 0 or \
-               chunk.usage.completion_tokens > last_completion_tokens or \
-               (
-                   not chunk.choices and
-                   chunk.usage.completion_tokens == last_completion_tokens
-               )
+        assert chunk.usage.completion_tokens >= 0
         assert chunk.usage.total_tokens == (chunk.usage.prompt_tokens +
                                             chunk.usage.completion_tokens)
-        last_completion_tokens = chunk.usage.completion_tokens
-
-    assert last_completion_tokens == 10
 
 
 # NOTE: Not sure why, but when I place this after `test_guided_regex_chat`
@@ -898,6 +888,57 @@ async def test_extra_fields(client: openai.AsyncOpenAI):
     assert "extra_forbidden" in exc_info.value.message
 
 
+@pytest.mark.asyncio
+async def test_complex_message_content(client: openai.AsyncOpenAI):
+    resp = await client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=[{
+            "role":
+            "user",
+            "content": [{
+                "type":
+                "text",
+                "text":
+                "what is 1+1? please provide the result without any other text."
+            }]
+        }],
+        temperature=0,
+        seed=0)
+    content = resp.choices[0].message.content
+    assert content == "2"
+
+
+@pytest.mark.asyncio
+async def test_custom_role(client: openai.AsyncOpenAI):
+    # Not sure how the model handles custom roles so we just check that
+    # both string and complex message content are handled in the same way
+
+    resp1 = await client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=[{
+            "role": "my-custom-role",
+            "content": "what is 1+1?",
+        }],  # type: ignore
+        temperature=0,
+        seed=0)
+
+    resp2 = await client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=[{
+            "role": "my-custom-role",
+            "content": [{
+                "type": "text",
+                "text": "what is 1+1?"
+            }]
+        }],  # type: ignore
+        temperature=0,
+        seed=0)
+
+    content1 = resp1.choices[0].message.content
+    content2 = resp2.choices[0].message.content
+    assert content1 == content2
+
+
 @pytest.mark.asyncio
 async def test_long_seed(client: openai.AsyncOpenAI):
     for seed in [
diff --git a/vllm/config.py b/vllm/config.py
index 4533fb017188c..83586eae88539 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -110,6 +110,7 @@ def __init__(self,
                  model: str,
                  tokenizer: str,
                  tokenizer_mode: str,
+                 chat_template_content_type: str,
                  trust_remote_code: bool,
                  dtype: Union[str, torch.dtype],
                  seed: int,
@@ -137,6 +138,7 @@ def __init__(self,
         self.model = model
         self.tokenizer = tokenizer
         self.tokenizer_mode = tokenizer_mode
+        self.chat_template_content_type = chat_template_content_type
         self.trust_remote_code = trust_remote_code
         self.seed = seed
         self.revision = revision
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 41963dcb16922..c178d8667abce 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -86,6 +86,7 @@ class EngineArgs:
     tokenizer: Optional[str] = None
     skip_tokenizer_init: bool = False
     tokenizer_mode: str = 'auto'
+    chat_template_content_type: str = "string"
     trust_remote_code: bool = False
     download_dir: Optional[str] = None
     load_format: str = 'auto'
@@ -238,6 +239,13 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             'fast tokenizer if available.\n* "slow" will '
             'always use the slow tokenizer. \n* '
             '"mistral" will always use the `mistral_common` tokenizer.')
+        parser.add_argument(
+            '--chat-template-text-content-format',
+            type=str,
+            default='string',
+            choices=['string', 'openai'],
+            help='The content to choose with chat template. "string" will keep the content field as '
+                 'just a string whereas "openai" will parse the content in the current OpenAI format.')
         parser.add_argument('--trust-remote-code',
                             action='store_true',
                             help='Trust remote code from huggingface.')
@@ -841,6 +849,7 @@ def create_model_config(self) -> ModelConfig:
             # We know this is not None because we set it in __post_init__
             tokenizer=cast(str, self.tokenizer),
             tokenizer_mode=self.tokenizer_mode,
+            chat_template_content_type=self.chat_template_content_type,
             trust_remote_code=self.trust_remote_code,
             dtype=self.dtype,
             seed=self.seed,
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 899faa70021dd..78e4d7d8a27b7 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -392,7 +392,7 @@ def _parse_chat_message_content_parts(
     role: str,
     parts: Iterable[ChatCompletionContentPartParam],
     mm_tracker: BaseMultiModalItemTracker,
-    keep_content_structure: bool,
+    chat_template_content_type: str,
 ) -> List[ConversationMessage]:
     texts: List[str] = []
 
@@ -442,7 +442,7 @@ def _parse_chat_message_content_parts(
         if mm_placeholder_counts:
             text_prompt = _get_full_multimodal_text_prompt(
                 mm_placeholder_counts, text_prompt)
-        elif has_text and keep_content_structure:
+        elif has_text and chat_template_content_type == "openai":
             text_prompt = [{'type': 'text', 'text': text_prompt}]
         return [ConversationMessage(role=role, content=text_prompt)]
 
@@ -455,24 +455,23 @@ def _parse_chat_message_content_parts(
 def _parse_chat_message_content(
     message: ChatCompletionMessageParam,
     mm_tracker: BaseMultiModalItemTracker,
+    chat_template_content_type: str,
 ) -> List[ConversationMessage]:
     role = message["role"]
     content = message.get("content")
 
-    keep_content_structure = True
     if content is None:
         content = []
     elif isinstance(content, str):
         content = [
             ChatCompletionContentPartTextParam(type="text", text=content)
         ]
-        keep_content_structure = False
 
     result = _parse_chat_message_content_parts(
         role,
         content,  # type: ignore
         mm_tracker,
-        keep_content_structure,
+        chat_template_content_type,
     )
 
     for result_msg in result:

From 80ae489b4dcd862e39a7e83f50c840ec530879bb Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 12:41:52 -0400
Subject: [PATCH 09/43] Minor fix

---
 vllm/entrypoints/chat_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 78e4d7d8a27b7..334ba77a139ed 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -515,7 +515,7 @@ def parse_chat_messages(
     mm_tracker = MultiModalItemTracker(model_config, tokenizer)
 
     for msg in messages:
-        sub_messages = _parse_chat_message_content(msg, mm_tracker)
+        sub_messages = _parse_chat_message_content(msg, mm_tracker, model_config.chat_template_content_type)
 
         conversation.extend(sub_messages)
 
@@ -533,7 +533,7 @@ def parse_chat_messages_futures(
     mm_tracker = AsyncMultiModalItemTracker(model_config, tokenizer)
 
     for msg in messages:
-        sub_messages = _parse_chat_message_content(msg, mm_tracker)
+        sub_messages = _parse_chat_message_content(msg, mm_tracker, model_config.chat_template_content_type)
 
         conversation.extend(sub_messages)
 

From a67e03f0e74a7f8b335af381272a60a9ba394c46 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 13:09:59 -0400
Subject: [PATCH 10/43] Fix tests

---
 tests/entrypoints/test_chat_utils.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
index 6eb97ca48168b..9fcd4466915cb 100644
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -307,6 +307,7 @@ def test_parse_chat_messages_context_text_format(
     phi3v_model_config,
     phi3v_tokenizer,
 ):
+    phi3v_model_config.chat_template_content_type = "openai"
     conversation, mm_data = parse_chat_messages(
         [{
             "role": "user",
@@ -319,10 +320,7 @@ def test_parse_chat_messages_context_text_format(
             "content": "Some stuff."
         }, {
             "role": "user",
-            "content": [{
-                "type": "text",
-                "text": "What about this one?"
-            }]
+            "content": "What about this one?"
         }], phi3v_model_config, phi3v_tokenizer)
 
     assert conversation == [
@@ -335,7 +333,10 @@ def test_parse_chat_messages_context_text_format(
         },
         {
             "role": "assistant",
-            "content": "Some stuff."
+            "content": [{
+                "type": "text",
+                "text": "Some stuff."
+            }]
         },
         {
             "role": "user",

From 0b95a0b79e1355f1054686cf7dbcb82cce334c49 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 13:24:44 -0400
Subject: [PATCH 11/43] Fix formatting

---
 vllm/config.py                 | 4 ++--
 vllm/engine/arg_utils.py       | 6 ++++--
 vllm/entrypoints/chat_utils.py | 6 ++++--
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index 83586eae88539..61384455e9bbc 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1481,8 +1481,8 @@ def _verify_args(self) -> None:
                              "typical_acceptance_sampler.")
 
         if (self.draft_token_acceptance_method != 'rejection_sampler'
-                and self.draft_token_acceptance_method !=
-                'typical_acceptance_sampler'):
+                and self.draft_token_acceptance_method
+                != 'typical_acceptance_sampler'):
             raise ValueError(
                 "Expected draft_token_acceptance_method to be either "
                 "rejection_sampler or typical_acceptance_sampler. Instead it "
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index c178d8667abce..31a7bad71c179 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -244,8 +244,10 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             type=str,
             default='string',
             choices=['string', 'openai'],
-            help='The content to choose with chat template. "string" will keep the content field as '
-                 'just a string whereas "openai" will parse the content in the current OpenAI format.')
+            help='The content to choose with chat template. "string" will '
+            'keep the content field as just a string whereas "openai" '
+            'will parse the content in the current OpenAI format.'
+        )
         parser.add_argument('--trust-remote-code',
                             action='store_true',
                             help='Trust remote code from huggingface.')
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 334ba77a139ed..0aeb18d88bb0c 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -515,7 +515,8 @@ def parse_chat_messages(
     mm_tracker = MultiModalItemTracker(model_config, tokenizer)
 
     for msg in messages:
-        sub_messages = _parse_chat_message_content(msg, mm_tracker, model_config.chat_template_content_type)
+        sub_messages = _parse_chat_message_content(
+            msg, mm_tracker, model_config.chat_template_content_type)
 
         conversation.extend(sub_messages)
 
@@ -533,7 +534,8 @@ def parse_chat_messages_futures(
     mm_tracker = AsyncMultiModalItemTracker(model_config, tokenizer)
 
     for msg in messages:
-        sub_messages = _parse_chat_message_content(msg, mm_tracker, model_config.chat_template_content_type)
+        sub_messages = _parse_chat_message_content(
+            msg, mm_tracker, model_config.chat_template_content_type)
 
         conversation.extend(sub_messages)
 

From d89e8c0081b9b2cb4ebedffa0c2552d46bac23df Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 13:25:44 -0400
Subject: [PATCH 12/43] Revert chat changes

---
 tests/entrypoints/openai/test_chat.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py
index 0fbc4cca83bd2..28cd30c2294dd 100644
--- a/tests/entrypoints/openai/test_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@@ -433,18 +433,28 @@ async def test_chat_completion_stream_options(client: openai.AsyncOpenAI,
         model=model_name,
         messages=messages,
         max_tokens=10,
+        extra_body=dict(min_tokens=10),
         temperature=0.0,
         stream=True,
         stream_options={
             "include_usage": True,
-            "continuous_usage_stats": True
+            "continuous_usage_stats": True,
         },
     )
+    last_completion_tokens = 0
     async for chunk in stream:
         assert chunk.usage.prompt_tokens >= 0
-        assert chunk.usage.completion_tokens >= 0
+        assert last_completion_tokens == 0 or \
+               chunk.usage.completion_tokens > last_completion_tokens or \
+               (
+                   not chunk.choices and
+                   chunk.usage.completion_tokens == last_completion_tokens
+               )
         assert chunk.usage.total_tokens == (chunk.usage.prompt_tokens +
                                             chunk.usage.completion_tokens)
+        last_completion_tokens = chunk.usage.completion_tokens
+
+    assert last_completion_tokens == 10
 
 
 # NOTE: Not sure why, but when I place this after `test_guided_regex_chat`
@@ -956,4 +966,4 @@ async def test_long_seed(client: openai.AsyncOpenAI):
                 seed=seed)
 
         assert ("greater_than_equal" in exc_info.value.message
-                or "less_than_equal" in exc_info.value.message)
+                or "less_than_equal" in exc_info.value.message)
\ No newline at end of file

From be94fc5d023996f297ae0e29efcde09c0b31fa24 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 13:26:43 -0400
Subject: [PATCH 13/43] Add missing new line

---
 tests/entrypoints/openai/test_chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py
index 28cd30c2294dd..3af0032fd2fb0 100644
--- a/tests/entrypoints/openai/test_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@@ -966,4 +966,4 @@ async def test_long_seed(client: openai.AsyncOpenAI):
                 seed=seed)
 
         assert ("greater_than_equal" in exc_info.value.message
-                or "less_than_equal" in exc_info.value.message)
\ No newline at end of file
+                or "less_than_equal" in exc_info.value.message)

From ff7965ab9d6cc99056bfbae8674a39238e0bf166 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 13:29:10 -0400
Subject: [PATCH 14/43] Minor nits

---
 vllm/config.py                 |  4 ++--
 vllm/entrypoints/chat_utils.py | 10 ++++++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index 61384455e9bbc..83586eae88539 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1481,8 +1481,8 @@ def _verify_args(self) -> None:
                              "typical_acceptance_sampler.")
 
         if (self.draft_token_acceptance_method != 'rejection_sampler'
-                and self.draft_token_acceptance_method
-                != 'typical_acceptance_sampler'):
+                and self.draft_token_acceptance_method !=
+                'typical_acceptance_sampler'):
             raise ValueError(
                 "Expected draft_token_acceptance_method to be either "
                 "rejection_sampler or typical_acceptance_sampler. Instead it "
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 0aeb18d88bb0c..bcfd236e64ab0 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -516,7 +516,10 @@ def parse_chat_messages(
 
     for msg in messages:
         sub_messages = _parse_chat_message_content(
-            msg, mm_tracker, model_config.chat_template_content_type)
+            msg,
+            mm_tracker,
+            model_config.chat_template_content_type,
+        )
 
         conversation.extend(sub_messages)
 
@@ -535,7 +538,10 @@ def parse_chat_messages_futures(
 
     for msg in messages:
         sub_messages = _parse_chat_message_content(
-            msg, mm_tracker, model_config.chat_template_content_type)
+            msg,
+            mm_tracker,
+            model_config.chat_template_content_type,
+        )
 
         conversation.extend(sub_messages)
 

From 22489cf2fd4863fc5101650e212e97e655ebef14 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 13:33:54 -0400
Subject: [PATCH 15/43] Minor nits again

---
 vllm/engine/arg_utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 31a7bad71c179..e718da18efa34 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -246,8 +246,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             choices=['string', 'openai'],
             help='The content to choose with chat template. "string" will '
             'keep the content field as just a string whereas "openai" '
-            'will parse the content in the current OpenAI format.'
-        )
+            'will parse the content in the current OpenAI format.')
         parser.add_argument('--trust-remote-code',
                             action='store_true',
                             help='Trust remote code from huggingface.')

From f8d2cbaebcca3a0e29a6f279af42590eca84e914 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 13:39:13 -0400
Subject: [PATCH 16/43] Standardize name

---
 tests/entrypoints/test_chat_utils.py |  2 +-
 vllm/config.py                       |  4 ++--
 vllm/engine/arg_utils.py             |  6 +++---
 vllm/entrypoints/chat_utils.py       | 12 ++++++------
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
index 9fcd4466915cb..92258c8a9116e 100644
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -307,7 +307,7 @@ def test_parse_chat_messages_context_text_format(
     phi3v_model_config,
     phi3v_tokenizer,
 ):
-    phi3v_model_config.chat_template_content_type = "openai"
+    phi3v_model_config.chat_template_text_content_format = "openai"
     conversation, mm_data = parse_chat_messages(
         [{
             "role": "user",
diff --git a/vllm/config.py b/vllm/config.py
index 83586eae88539..7fdf38f2bf0cd 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -110,7 +110,7 @@ def __init__(self,
                  model: str,
                  tokenizer: str,
                  tokenizer_mode: str,
-                 chat_template_content_type: str,
+                 chat_template_text_content_format: str,
                  trust_remote_code: bool,
                  dtype: Union[str, torch.dtype],
                  seed: int,
@@ -138,7 +138,7 @@ def __init__(self,
         self.model = model
         self.tokenizer = tokenizer
         self.tokenizer_mode = tokenizer_mode
-        self.chat_template_content_type = chat_template_content_type
+        self.chat_template_text_content_format = chat_template_text_content_format
         self.trust_remote_code = trust_remote_code
         self.seed = seed
         self.revision = revision
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index e718da18efa34..6df697b2980e1 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -86,7 +86,7 @@ class EngineArgs:
     tokenizer: Optional[str] = None
     skip_tokenizer_init: bool = False
     tokenizer_mode: str = 'auto'
-    chat_template_content_type: str = "string"
+    chat_template_text_content_format: str ='string'
     trust_remote_code: bool = False
     download_dir: Optional[str] = None
     load_format: str = 'auto'
@@ -242,7 +242,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
         parser.add_argument(
             '--chat-template-text-content-format',
             type=str,
-            default='string',
+            default=EngineArgs.chat_template_text_content_format,
             choices=['string', 'openai'],
             help='The content to choose with chat template. "string" will '
             'keep the content field as just a string whereas "openai" '
@@ -850,7 +850,7 @@ def create_model_config(self) -> ModelConfig:
             # We know this is not None because we set it in __post_init__
             tokenizer=cast(str, self.tokenizer),
             tokenizer_mode=self.tokenizer_mode,
-            chat_template_content_type=self.chat_template_content_type,
+            chat_template_text_content_format=self.chat_template_text_content_format,
             trust_remote_code=self.trust_remote_code,
             dtype=self.dtype,
             seed=self.seed,
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index bcfd236e64ab0..7029aceb26607 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -392,7 +392,7 @@ def _parse_chat_message_content_parts(
     role: str,
     parts: Iterable[ChatCompletionContentPartParam],
     mm_tracker: BaseMultiModalItemTracker,
-    chat_template_content_type: str,
+    chat_template_text_content_format: str,
 ) -> List[ConversationMessage]:
     texts: List[str] = []
 
@@ -442,7 +442,7 @@ def _parse_chat_message_content_parts(
         if mm_placeholder_counts:
             text_prompt = _get_full_multimodal_text_prompt(
                 mm_placeholder_counts, text_prompt)
-        elif has_text and chat_template_content_type == "openai":
+        elif has_text and chat_template_text_content_format == "openai":
             text_prompt = [{'type': 'text', 'text': text_prompt}]
         return [ConversationMessage(role=role, content=text_prompt)]
 
@@ -455,7 +455,7 @@ def _parse_chat_message_content_parts(
 def _parse_chat_message_content(
     message: ChatCompletionMessageParam,
     mm_tracker: BaseMultiModalItemTracker,
-    chat_template_content_type: str,
+    chat_template_text_content_format: str,
 ) -> List[ConversationMessage]:
     role = message["role"]
     content = message.get("content")
@@ -471,7 +471,7 @@ def _parse_chat_message_content(
         role,
         content,  # type: ignore
         mm_tracker,
-        chat_template_content_type,
+        chat_template_text_content_format,
     )
 
     for result_msg in result:
@@ -518,7 +518,7 @@ def parse_chat_messages(
         sub_messages = _parse_chat_message_content(
             msg,
             mm_tracker,
-            model_config.chat_template_content_type,
+            model_config.chat_template_text_content_format,
         )
 
         conversation.extend(sub_messages)
@@ -540,7 +540,7 @@ def parse_chat_messages_futures(
         sub_messages = _parse_chat_message_content(
             msg,
             mm_tracker,
-            model_config.chat_template_content_type,
+            model_config.chat_template_text_content_format,
         )
 
         conversation.extend(sub_messages)

From 54532f2ec4054a9d55515eb8a8101d049a9070ac Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 13:44:53 -0400
Subject: [PATCH 17/43] Remove unnecessary variable

---
 vllm/entrypoints/chat_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 7029aceb26607..bd31bbd871031 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -442,7 +442,7 @@ def _parse_chat_message_content_parts(
         if mm_placeholder_counts:
             text_prompt = _get_full_multimodal_text_prompt(
                 mm_placeholder_counts, text_prompt)
-        elif has_text and chat_template_text_content_format == "openai":
+        if chat_template_text_content_format == "openai":
             text_prompt = [{'type': 'text', 'text': text_prompt}]
         return [ConversationMessage(role=role, content=text_prompt)]
 

From ea7274d2ff4d9392aa4ae0d7c2806ae063a0461a Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 13:45:19 -0400
Subject: [PATCH 18/43] Actually remove unnecessary variable

---
 vllm/entrypoints/chat_utils.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index bd31bbd871031..bdf7b6e2c4b11 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -402,13 +402,11 @@ def _parse_chat_message_content_parts(
             MODEL_KEEP_MULTI_MODAL_CONTENT
 
     has_image = False
-    has_text = False
     for part in parts:
         part_type = part["type"]
         if part_type == "text":
             text = _TextParser(part)["text"]
             texts.append(text)
-            has_text = True
         elif part_type == "image_url":
             image_url = _ImageParser(part)["image_url"]
 

From f3608bee2801a1cc1432cd6b3145844abdde18fe Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 13:51:23 -0400
Subject: [PATCH 19/43] Make variable name simpler

---
 tests/entrypoints/test_chat_utils.py |  2 +-
 vllm/config.py                       |  4 ++--
 vllm/engine/arg_utils.py             | 14 +++++++-------
 vllm/entrypoints/chat_utils.py       | 12 ++++++------
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
index 92258c8a9116e..f2585696cc8df 100644
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -307,7 +307,7 @@ def test_parse_chat_messages_context_text_format(
     phi3v_model_config,
     phi3v_tokenizer,
 ):
-    phi3v_model_config.chat_template_text_content_format = "openai"
+    phi3v_model_config.chat_template_text_format = "openai"
     conversation, mm_data = parse_chat_messages(
         [{
             "role": "user",
diff --git a/vllm/config.py b/vllm/config.py
index 7fdf38f2bf0cd..9c4605723fb39 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -110,7 +110,7 @@ def __init__(self,
                  model: str,
                  tokenizer: str,
                  tokenizer_mode: str,
-                 chat_template_text_content_format: str,
+                 chat_template_text_format: str,
                  trust_remote_code: bool,
                  dtype: Union[str, torch.dtype],
                  seed: int,
@@ -138,7 +138,7 @@ def __init__(self,
         self.model = model
         self.tokenizer = tokenizer
         self.tokenizer_mode = tokenizer_mode
-        self.chat_template_text_content_format = chat_template_text_content_format
+        self.chat_template_text_format = chat_template_text_format
         self.trust_remote_code = trust_remote_code
         self.seed = seed
         self.revision = revision
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 6df697b2980e1..fd00ee2b00932 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -86,7 +86,7 @@ class EngineArgs:
     tokenizer: Optional[str] = None
     skip_tokenizer_init: bool = False
     tokenizer_mode: str = 'auto'
-    chat_template_text_content_format: str ='string'
+    chat_template_text_format: str ='string'
     trust_remote_code: bool = False
     download_dir: Optional[str] = None
     load_format: str = 'auto'
@@ -240,13 +240,13 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             'always use the slow tokenizer. \n* '
             '"mistral" will always use the `mistral_common` tokenizer.')
         parser.add_argument(
-            '--chat-template-text-content-format',
+            '--chat-template-text-format',
             type=str,
-            default=EngineArgs.chat_template_text_content_format,
+            default=EngineArgs.chat_template_text_format,
             choices=['string', 'openai'],
-            help='The content to choose with chat template. "string" will '
-            'keep the content field as just a string whereas "openai" '
-            'will parse the content in the current OpenAI format.')
+            help='The format to render text content within a chat template. '
+                 '"string" will keep the content field as a string whereas '
+                 '"openai" will parse the content in the current OpenAI format.')
         parser.add_argument('--trust-remote-code',
                             action='store_true',
                             help='Trust remote code from huggingface.')
@@ -850,7 +850,7 @@ def create_model_config(self) -> ModelConfig:
             # We know this is not None because we set it in __post_init__
             tokenizer=cast(str, self.tokenizer),
             tokenizer_mode=self.tokenizer_mode,
-            chat_template_text_content_format=self.chat_template_text_content_format,
+            chat_template_text_format=self.chat_template_text_format,
             trust_remote_code=self.trust_remote_code,
             dtype=self.dtype,
             seed=self.seed,
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index bdf7b6e2c4b11..600d95a2e93b3 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -392,7 +392,7 @@ def _parse_chat_message_content_parts(
     role: str,
     parts: Iterable[ChatCompletionContentPartParam],
     mm_tracker: BaseMultiModalItemTracker,
-    chat_template_text_content_format: str,
+    chat_template_text_format: str,
 ) -> List[ConversationMessage]:
     texts: List[str] = []
 
@@ -440,7 +440,7 @@ def _parse_chat_message_content_parts(
         if mm_placeholder_counts:
             text_prompt = _get_full_multimodal_text_prompt(
                 mm_placeholder_counts, text_prompt)
-        if chat_template_text_content_format == "openai":
+        if chat_template_text_format == "openai":
             text_prompt = [{'type': 'text', 'text': text_prompt}]
         return [ConversationMessage(role=role, content=text_prompt)]
 
@@ -453,7 +453,7 @@ def _parse_chat_message_content_parts(
 def _parse_chat_message_content(
     message: ChatCompletionMessageParam,
     mm_tracker: BaseMultiModalItemTracker,
-    chat_template_text_content_format: str,
+    chat_template_text_format: str,
 ) -> List[ConversationMessage]:
     role = message["role"]
     content = message.get("content")
@@ -469,7 +469,7 @@ def _parse_chat_message_content(
         role,
         content,  # type: ignore
         mm_tracker,
-        chat_template_text_content_format,
+        chat_template_text_format,
     )
 
     for result_msg in result:
@@ -516,7 +516,7 @@ def parse_chat_messages(
         sub_messages = _parse_chat_message_content(
             msg,
             mm_tracker,
-            model_config.chat_template_text_content_format,
+            model_config.chat_template_text_format,
         )
 
         conversation.extend(sub_messages)
@@ -538,7 +538,7 @@ def parse_chat_messages_futures(
         sub_messages = _parse_chat_message_content(
             msg,
             mm_tracker,
-            model_config.chat_template_text_content_format,
+            model_config.chat_template_text_format,
         )
 
         conversation.extend(sub_messages)

From 79a22bac50c572d0bedc06f772b8adaff8482a47 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 13:53:00 -0400
Subject: [PATCH 20/43] Fix help doc

---
 vllm/engine/arg_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index fd00ee2b00932..7e58008ae72fd 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -246,7 +246,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             choices=['string', 'openai'],
             help='The format to render text content within a chat template. '
                  '"string" will keep the content field as a string whereas '
-                 '"openai" will parse the content in the current OpenAI format.')
+                 '"openai" will parse content in the current OpenAI format.')
         parser.add_argument('--trust-remote-code',
                             action='store_true',
                             help='Trust remote code from huggingface.')

From c7e53716f0042cacd95e0c188f4bc742ffa14028 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 13:59:56 -0400
Subject: [PATCH 21/43] Fix formatting

---
 vllm/engine/arg_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 7e58008ae72fd..25ba20976dbe3 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -86,7 +86,7 @@ class EngineArgs:
     tokenizer: Optional[str] = None
     skip_tokenizer_init: bool = False
     tokenizer_mode: str = 'auto'
-    chat_template_text_format: str ='string'
+    chat_template_text_format: str = 'string'
     trust_remote_code: bool = False
     download_dir: Optional[str] = None
     load_format: str = 'auto'
@@ -245,8 +245,8 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             default=EngineArgs.chat_template_text_format,
             choices=['string', 'openai'],
             help='The format to render text content within a chat template. '
-                 '"string" will keep the content field as a string whereas '
-                 '"openai" will parse content in the current OpenAI format.')
+            '"string" will keep the content field as a string whereas '
+            '"openai" will parse content in the current OpenAI format.')
         parser.add_argument('--trust-remote-code',
                             action='store_true',
                             help='Trust remote code from huggingface.')

From 80d45d51121fc3cb27776c6bdccdb1c555fb4186 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 14:43:18 -0400
Subject: [PATCH 22/43] Fix default value in config

---
 vllm/config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index 9c4605723fb39..09c8b501b0f60 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -110,7 +110,6 @@ def __init__(self,
                  model: str,
                  tokenizer: str,
                  tokenizer_mode: str,
-                 chat_template_text_format: str,
                  trust_remote_code: bool,
                  dtype: Union[str, torch.dtype],
                  seed: int,
@@ -134,11 +133,11 @@ def __init__(self,
                  use_async_output_proc: bool = True,
                  override_neuron_config: Optional[Dict[str, Any]] = None,
                  config_format: ConfigFormat = ConfigFormat.AUTO,
+                 chat_template_text_format: str = "string",
                  mm_processor_kwargs: Optional[Dict[str, Any]] = None) -> None:
         self.model = model
         self.tokenizer = tokenizer
         self.tokenizer_mode = tokenizer_mode
-        self.chat_template_text_format = chat_template_text_format
         self.trust_remote_code = trust_remote_code
         self.seed = seed
         self.revision = revision
@@ -169,6 +168,7 @@ def __init__(self,
             self.model, revision)
         self.dtype = _get_and_verify_dtype(self.hf_text_config, dtype)
         self.use_async_output_proc = use_async_output_proc
+        self.chat_template_text_format = chat_template_text_format
         self.mm_processor_kwargs = mm_processor_kwargs
 
         # Set enforce_eager to False if the value is unset.

From 89dd84b4b98d4e1f25135a158e73b36d6caa97fc Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 16:03:53 -0400
Subject: [PATCH 23/43] Fix failing test

---
 tests/entrypoints/openai/test_serving_chat.py | 1 +
 vllm/engine/llm_engine.py                     | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py
index ec550fe82c70f..d91558776738d 100644
--- a/tests/entrypoints/openai/test_serving_chat.py
+++ b/tests/entrypoints/openai/test_serving_chat.py
@@ -25,6 +25,7 @@ class MockModelConfig:
     tokenizer = MODEL_NAME
     trust_remote_code = False
     tokenizer_mode = "auto"
+    chat_template_text_format = "string"
     max_model_len = 100
     tokenizer_revision = None
     embedding_mode = False
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 61c21887e6816..cd9387e961acb 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -251,7 +251,7 @@ def __init__(
             "num_scheduler_steps=%d, chunked_prefill_enabled=%s "
             "multi_step_stream_outputs=%s, enable_prefix_caching=%s, "
             "use_async_output_proc=%s, use_cached_outputs=%s, "
-            "mm_processor_kwargs=%s)",
+            "chat_template_text_format=%s, mm_processor_kwargs=%s)",
             VLLM_VERSION,
             model_config.model,
             speculative_config,
@@ -286,6 +286,7 @@ def __init__(
             cache_config.enable_prefix_caching,
             model_config.use_async_output_proc,
             use_cached_outputs,
+            model_config.chat_template_text_format,
             model_config.mm_processor_kwargs,
         )
         # TODO(woosuk): Print more configs in debug mode.

From faafc31660c9205b7c7c11f908f65296851c00e4 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 16:13:14 -0400
Subject: [PATCH 24/43] Fix failing test

---
 tests/entrypoints/test_chat_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
index f2585696cc8df..aed2d9cdb4072 100644
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -23,6 +23,7 @@ def phi3v_model_config():
                        trust_remote_code=True,
                        dtype="bfloat16",
                        seed=0,
+                       chat_template_text_format="string",
                        limit_mm_per_prompt={
                            "image": 2,
                        })

From ef74a9cfe1fecde7e14cb4841d2b9c3b3230d8d2 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 16:20:53 -0400
Subject: [PATCH 25/43] Fix failing test again

---
 vllm/entrypoints/chat_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 600d95a2e93b3..a2c5bab7ac8ee 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -441,7 +441,8 @@ def _parse_chat_message_content_parts(
             text_prompt = _get_full_multimodal_text_prompt(
                 mm_placeholder_counts, text_prompt)
         if chat_template_text_format == "openai":
-            text_prompt = [{'type': 'text', 'text': text_prompt}]
+            role_content = [{'type': 'text', 'text': text_prompt}]
+            return [ConversationMessage(role=role, content=role_content)]
         return [ConversationMessage(role=role, content=text_prompt)]
 
 

From 1eca1f509af2dc5390568f63dcb162665e9c6e83 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 16:34:27 -0400
Subject: [PATCH 26/43] Fix mypy error

---
 vllm/entrypoints/chat_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index a2c5bab7ac8ee..99d7bed7957f0 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -96,7 +96,7 @@ class ConversationMessage(TypedDict, total=False):
     role: Required[str]
     """The role of the message's author."""
 
-    content: Optional[str]
+    content: Optional[Union[str,  List[ChatCompletionContentPartTextParam]]]
     """The contents of the message"""
 
     tool_call_id: Optional[str]
@@ -441,7 +441,7 @@ def _parse_chat_message_content_parts(
             text_prompt = _get_full_multimodal_text_prompt(
                 mm_placeholder_counts, text_prompt)
         if chat_template_text_format == "openai":
-            role_content = [{'type': 'text', 'text': text_prompt}]
+            role_content = [ChatCompletionContentPartTextParam(type="text", text=content)]
             return [ConversationMessage(role=role, content=role_content)]
         return [ConversationMessage(role=role, content=text_prompt)]
 

From 2fe18bd55ca24f4e42d87c9696aeb45d91c94c35 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 16:43:59 -0400
Subject: [PATCH 27/43] Fix mypy error by ignoring

---
 vllm/entrypoints/chat_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 99d7bed7957f0..b24904df7a250 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -96,7 +96,7 @@ class ConversationMessage(TypedDict, total=False):
     role: Required[str]
     """The role of the message's author."""
 
-    content: Optional[Union[str,  List[ChatCompletionContentPartTextParam]]]
+    content: Optional[str]
     """The contents of the message"""
 
     tool_call_id: Optional[str]
@@ -441,7 +441,7 @@ def _parse_chat_message_content_parts(
             text_prompt = _get_full_multimodal_text_prompt(
                 mm_placeholder_counts, text_prompt)
         if chat_template_text_format == "openai":
-            role_content = [ChatCompletionContentPartTextParam(type="text", text=content)]
+            role_content = [ChatCompletionContentPartTextParam(type="text", text=text_prompt)] # type: ignore
             return [ConversationMessage(role=role, content=role_content)]
         return [ConversationMessage(role=role, content=text_prompt)]
 

From 6311517c79dbaae8491cb3d2b4bce8786ed28470 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 16:46:07 -0400
Subject: [PATCH 28/43] Put ignore in the right place

---
 vllm/entrypoints/chat_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index b24904df7a250..0994e8674f0d5 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -441,8 +441,8 @@ def _parse_chat_message_content_parts(
             text_prompt = _get_full_multimodal_text_prompt(
                 mm_placeholder_counts, text_prompt)
         if chat_template_text_format == "openai":
-            role_content = [ChatCompletionContentPartTextParam(type="text", text=text_prompt)] # type: ignore
-            return [ConversationMessage(role=role, content=role_content)]
+            role_content = [ChatCompletionContentPartTextParam(type="text", text=text_prompt)]
+            return [ConversationMessage(role=role, content=role_content)] # type: ignore
         return [ConversationMessage(role=role, content=text_prompt)]
 
 

From f3f38871400f5eaa3faaefb6a1af4ec5e4adb9fe Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 16:48:59 -0400
Subject: [PATCH 29/43] Fix formatting

---
 vllm/engine/arg_utils.py       | 2 +-
 vllm/entrypoints/chat_utils.py | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 2ec460492e8c4..38962a55379a5 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -256,7 +256,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             choices=['string', 'openai'],
             help='The format to render text content within a chat template. '
             '"string" will keep the content field as a string whereas '
-            '"openai" will parse content in the current OpenAI format.')
+            '"openai"yapf will parse content in the current OpenAI format.')
         parser.add_argument('--trust-remote-code',
                             action='store_true',
                             help='Trust remote code from huggingface.')
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 0994e8674f0d5..415b139d23a15 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -441,8 +441,12 @@ def _parse_chat_message_content_parts(
             text_prompt = _get_full_multimodal_text_prompt(
                 mm_placeholder_counts, text_prompt)
         if chat_template_text_format == "openai":
-            role_content = [ChatCompletionContentPartTextParam(type="text", text=text_prompt)]
-            return [ConversationMessage(role=role, content=role_content)] # type: ignore
+            role_content = [
+                ChatCompletionContentPartTextParam(type="text",
+                                                   text=text_prompt)
+            ]
+            return [ConversationMessage(role=role,
+                                        content=role_content)]  # type: ignore
         return [ConversationMessage(role=role, content=text_prompt)]
 
 

From a08b342ddecc4b6f96a6e935a376c3abe8d512e0 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Fri, 18 Oct 2024 17:50:19 -0400
Subject: [PATCH 30/43] Remove stupid typo

---
 vllm/engine/arg_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 38962a55379a5..2ec460492e8c4 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -256,7 +256,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             choices=['string', 'openai'],
             help='The format to render text content within a chat template. '
             '"string" will keep the content field as a string whereas '
-            '"openai"yapf will parse content in the current OpenAI format.')
+            '"openai" will parse content in the current OpenAI format.')
         parser.add_argument('--trust-remote-code',
                             action='store_true',
                             help='Trust remote code from huggingface.')

From 75ed3e6e4982a86edfafd69896511274aaef1d8b Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Tue, 22 Oct 2024 12:32:11 -0400
Subject: [PATCH 31/43] Fix mypy and tests

---
 tests/entrypoints/test_chat_utils.py |  2 +-
 vllm/entrypoints/chat_utils.py       | 11 ++++-------
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
index 76f2ffa6bdff9..47b410bdf9e20 100644
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -15,7 +15,7 @@
 PHI3V_MODEL_ID = "microsoft/Phi-3.5-vision-instruct"
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def phi3v_model_config():
     return ModelConfig(PHI3V_MODEL_ID,
                        task="generate",
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 20a4458c912d8..49cfe5b78c70c 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -121,7 +121,7 @@ class ConversationMessage(TypedDict, total=False):
     role: Required[str]
     """The role of the message's author."""
 
-    content: Optional[str]
+    content: Union[Optional[str], List[Dict[str, str]]]
     """The contents of the message"""
 
     tool_call_id: Optional[str]
@@ -523,19 +523,16 @@ def _parse_chat_message_content_parts(
         if has_image:
             role_content = [{'type': 'image'}] + role_content
         return [ConversationMessage(role=role,
-                                    content=role_content)]  # type: ignore
+                                    content=role_content)]
     else:
         mm_placeholder_counts = mm_parser.mm_placeholder_counts()
         if mm_placeholder_counts:
             text_prompt = _get_full_multimodal_text_prompt(
                 mm_placeholder_counts, text_prompt)
         if chat_template_text_format == "openai":
-            role_content = [
-                ChatCompletionContentPartTextParam(type="text",
-                                                   text=text_prompt)
-            ]
+            role_content = [{'type': 'text', 'text': text_prompt}]
             return [ConversationMessage(role=role,
-                                        content=role_content)]  # type: ignore
+                                        content=role_content)]
         return [ConversationMessage(role=role, content=text_prompt)]
 
 

From 3e55da82ecd1de81c2d9013837800ddd35d1f571 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Tue, 22 Oct 2024 13:42:40 -0400
Subject: [PATCH 32/43] Fix mypy

---
 vllm/entrypoints/openai/serving_chat.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index c3fa0e44e5e8d..962f36ff294be 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -384,7 +384,7 @@ async def chat_completion_stream_generator(
                     # Send response to echo the input portion of the
                     # last message
                     if request.echo or request.continue_final_message:
-                        last_msg_content: str = ""
+                        last_msg_content: Union[str, List[Dict[str, str]]] = ""
                         if conversation and "content" in conversation[
                                 -1] and conversation[-1].get("role") == role:
                             last_msg_content = conversation[-1]["content"] or ""
@@ -724,7 +724,7 @@ async def chat_completion_full_generator(
             choices.append(choice_data)
 
         if request.echo or request.continue_final_message:
-            last_msg_content = ""
+            last_msg_content: Union[str, List[Dict[str, str]]] = ""
             if conversation and "content" in conversation[-1] and conversation[
                     -1].get("role") == role:
                 last_msg_content = conversation[-1]["content"] or ""

From 7a01d53dcbe02d35bbc9cce41d14ecf0fa621b63 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Tue, 22 Oct 2024 14:04:14 -0400
Subject: [PATCH 33/43] Fix mypy again

---
 vllm/entrypoints/openai/serving_chat.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index 962f36ff294be..752fd1ee2b806 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -728,6 +728,8 @@ async def chat_completion_full_generator(
             if conversation and "content" in conversation[-1] and conversation[
                     -1].get("role") == role:
                 last_msg_content = conversation[-1]["content"] or ""
+            if isinstance(last_msg_content, list):
+                last_msg_content = "\n".join([msg['text'] for msg in last_msg_content])
 
             for choice in choices:
                 full_message = last_msg_content + (choice.message.content

From bef9a2dd4661b4d4996c72980b032f367cf77277 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Tue, 22 Oct 2024 14:06:33 -0400
Subject: [PATCH 34/43] Fix formatting

---
 vllm/entrypoints/openai/serving_chat.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index 752fd1ee2b806..e3a1786e68102 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -729,7 +729,8 @@ async def chat_completion_full_generator(
                     -1].get("role") == role:
                 last_msg_content = conversation[-1]["content"] or ""
             if isinstance(last_msg_content, list):
-                last_msg_content = "\n".join([msg['text'] for msg in last_msg_content])
+                last_msg_content = "\n".join(
+                    [msg['text'] for msg in last_msg_content])
 
             for choice in choices:
                 full_message = last_msg_content + (choice.message.content

From c0fc5c9697c0f9a6ac1c640d08a23f7be7adb4ae Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Tue, 22 Oct 2024 14:09:39 -0400
Subject: [PATCH 35/43] Fix formatting again

---
 vllm/entrypoints/chat_utils.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 49cfe5b78c70c..77e9ccd182002 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -522,8 +522,7 @@ def _parse_chat_message_content_parts(
 
         if has_image:
             role_content = [{'type': 'image'}] + role_content
-        return [ConversationMessage(role=role,
-                                    content=role_content)]
+        return [ConversationMessage(role=role, content=role_content)]
     else:
         mm_placeholder_counts = mm_parser.mm_placeholder_counts()
         if mm_placeholder_counts:
@@ -531,8 +530,7 @@ def _parse_chat_message_content_parts(
                 mm_placeholder_counts, text_prompt)
         if chat_template_text_format == "openai":
             role_content = [{'type': 'text', 'text': text_prompt}]
-            return [ConversationMessage(role=role,
-                                        content=role_content)]
+            return [ConversationMessage(role=role, content=role_content)]
         return [ConversationMessage(role=role, content=text_prompt)]
 
 

From 50ba0ce69cce89755cc3e55d47a1fc4d24357cc0 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Wed, 23 Oct 2024 09:40:14 -0700
Subject: [PATCH 36/43] Add docs and str generator

---
 docs/source/serving/openai_compatible_server.md | 16 ++++++++++++++++
 vllm/entrypoints/openai/serving_chat.py         |  4 ++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/docs/source/serving/openai_compatible_server.md b/docs/source/serving/openai_compatible_server.md
index cc8e539a8a6d3..c33c75a3730b2 100644
--- a/docs/source/serving/openai_compatible_server.md
+++ b/docs/source/serving/openai_compatible_server.md
@@ -103,6 +103,22 @@ vllm serve <model> --chat-template ./path-to-chat-template.jinja
 vLLM community provides a set of chat templates for popular models. You can find them in the examples
 directory [here](https://github.com/vllm-project/vllm/tree/main/examples/)
 
+With the inclusion of multi-modal chat APIs, the OpenAI spec now accepts chat messages in a new format which specifies 
+both a `type` and a `text` field. An example is provided below:
+```python
+completion = client.chat.completions.create(
+  model="NousResearch/Meta-Llama-3-8B-Instruct",
+  messages=[
+    {"role": "user", "content": [{"type": "text", "text": "Classify this sentiment: vLLM is wonderful!"}]}
+  ]
+)
+```
+Most chat templates for LLMs expect the `content` to be a `string` but there are some newer models like 
+`meta-llama/Llama-Guard-3-1B` that expect the content to be parsed with the new OpenAI spec. In order to choose which
+format the content needs to be parsed in by vLLM, please use the `--chat-template-text-format` argument to specify
+between `string` or `openai`.
+
+
 ## Command line arguments for the server
 
 ```{argparse}
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index e3a1786e68102..26b261780ca79 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -729,8 +729,8 @@ async def chat_completion_full_generator(
                     -1].get("role") == role:
                 last_msg_content = conversation[-1]["content"] or ""
             if isinstance(last_msg_content, list):
-                last_msg_content = "\n".join(
-                    [msg['text'] for msg in last_msg_content])
+                last_msg_content = "\n".join(msg['text']
+                                             for msg in last_msg_content)
 
             for choice in choices:
                 full_message = last_msg_content + (choice.message.content

From 62e35bc617dc69c101bbc784854d3049401346f4 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Wed, 23 Oct 2024 09:55:43 -0700
Subject: [PATCH 37/43] Add bit more docs

Signed-off-by: Vinay Damodaran <vrdn@hey.com>
---
 docs/source/serving/openai_compatible_server.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/source/serving/openai_compatible_server.md b/docs/source/serving/openai_compatible_server.md
index c33c75a3730b2..413c87ab28755 100644
--- a/docs/source/serving/openai_compatible_server.md
+++ b/docs/source/serving/openai_compatible_server.md
@@ -116,7 +116,8 @@ completion = client.chat.completions.create(
 Most chat templates for LLMs expect the `content` to be a `string` but there are some newer models like 
 `meta-llama/Llama-Guard-3-1B` that expect the content to be parsed with the new OpenAI spec. In order to choose which
 format the content needs to be parsed in by vLLM, please use the `--chat-template-text-format` argument to specify
-between `string` or `openai`.
+between `string` or `openai`. The default value is `string` and vLLM internally converts both spec formats to match 
+this, unless explicitly specified.
 
 
 ## Command line arguments for the server

From 0130c74f2633b61ba983bc240e31bf47dd9eac7e Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Wed, 23 Oct 2024 10:55:03 -0700
Subject: [PATCH 38/43] Fix formatting

---
 vllm/entrypoints/chat_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 6b92053e318b2..b16854b535571 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -553,7 +553,6 @@ def _parse_chat_message_content_part(
     raise NotImplementedError(f"Unknown part type: {part_type}")
 
 
-
 # No need to validate using Pydantic again
 _AssistantParser = partial(cast, ChatCompletionAssistantMessageParam)
 _ToolParser = partial(cast, ChatCompletionToolMessageParam)

From f40fbf9cadc898afd8811f4c1cdaa80347cb4fd3 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Wed, 23 Oct 2024 19:45:03 -0700
Subject: [PATCH 39/43] Simplify check with content parser

Signed-off-by: Vinay Damodaran <vrdn@hey.com>
---
 vllm/entrypoints/chat_utils.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 0dd0bfd743ed5..eaed693397cf9 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -431,7 +431,7 @@ def _get_full_multimodal_text_prompt(placeholder_counts: Dict[str, int],
 def _parse_chat_message_content_mm_part(
         part: ChatCompletionContentPartParam) -> Tuple[str, str]:
     """
-    Parses a given multi modal content part based on its type.
+    Parses a given multi-modal content part based on its type.
 
     Args:
         part: A dict containing the content part, with a potential 'type' field.
@@ -492,7 +492,7 @@ def _parse_chat_message_content_parts(
     mm_parser = mm_tracker.create_parser()
     keep_multimodal_content = \
         mm_tracker._model_config.hf_config.model_type in \
-            MODEL_KEEP_MULTI_MODAL_CONTENT
+            MODEL_KEEP_MULTI_MODAL_CONTENT or (chat_template_text_format == "openai")
 
     for part in parts:
         parse_res = _parse_chat_message_content_part(
@@ -510,9 +510,6 @@ def _parse_chat_message_content_parts(
     if mm_placeholder_counts:
         text_prompt = _get_full_multimodal_text_prompt(mm_placeholder_counts,
                                                        text_prompt)
-    if chat_template_text_format == "openai":
-        role_content = [{'type': 'text', 'text': text_prompt}]
-        return [ConversationMessage(role=role, content=role_content)]
     return [ConversationMessage(role=role, content=text_prompt)]
 
 

From 1a772c773253fe0a09c333f19357a9da33767efc Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Wed, 23 Oct 2024 19:46:45 -0700
Subject: [PATCH 40/43] Fix ruff

Signed-off-by: Vinay Damodaran <vrdn@hey.com>
---
 vllm/entrypoints/chat_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index eaed693397cf9..54407063948dc 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -492,7 +492,8 @@ def _parse_chat_message_content_parts(
     mm_parser = mm_tracker.create_parser()
     keep_multimodal_content = \
         mm_tracker._model_config.hf_config.model_type in \
-            MODEL_KEEP_MULTI_MODAL_CONTENT or (chat_template_text_format == "openai")
+            MODEL_KEEP_MULTI_MODAL_CONTENT or \
+        (chat_template_text_format == "openai")
 
     for part in parts:
         parse_res = _parse_chat_message_content_part(

From fd61ada92c483d8648ee5336121cd5e3d8326fee Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Wed, 23 Oct 2024 19:51:06 -0700
Subject: [PATCH 41/43] Rename variable to be more appropriate

Signed-off-by: Vinay Damodaran <vrdn@hey.com>
---
 vllm/entrypoints/chat_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 54407063948dc..db8b80bb572a3 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -490,14 +490,14 @@ def _parse_chat_message_content_parts(
     content: List[Union[str, Dict[str, str]]] = []
 
     mm_parser = mm_tracker.create_parser()
-    keep_multimodal_content = \
+    wrap_dicts = \
         mm_tracker._model_config.hf_config.model_type in \
             MODEL_KEEP_MULTI_MODAL_CONTENT or \
         (chat_template_text_format == "openai")
 
     for part in parts:
         parse_res = _parse_chat_message_content_part(
-            part, mm_parser, wrap_dicts=keep_multimodal_content)
+            part, mm_parser, wrap_dicts=wrap_dicts)
         if parse_res:
             content.append(parse_res)
 

From a04d76d309309f746a31a66c4a6620124984114b Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Wed, 23 Oct 2024 19:52:22 -0700
Subject: [PATCH 42/43] Fix missing part

Signed-off-by: Vinay Damodaran <vrdn@hey.com>
---
 vllm/entrypoints/chat_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index db8b80bb572a3..6f3ee3bbc71a6 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -501,7 +501,7 @@ def _parse_chat_message_content_parts(
         if parse_res:
             content.append(parse_res)
 
-    if keep_multimodal_content:
+    if wrap_dicts:
         # Parsing wraps images and texts as interleaved dictionaries
         return [ConversationMessage(role=role,
                                     content=content)]  # type: ignore

From 1bb9faac4179e9106c4f6ad1347fafacca899523 Mon Sep 17 00:00:00 2001
From: Vinay Damodaran <vrdn@hey.com>
Date: Wed, 23 Oct 2024 19:57:19 -0700
Subject: [PATCH 43/43] Fix formatting

Signed-off-by: Vinay Damodaran <vrdn@hey.com>
---
 vllm/entrypoints/chat_utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 6f3ee3bbc71a6..fef6a91414db6 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -497,7 +497,10 @@ def _parse_chat_message_content_parts(
 
     for part in parts:
         parse_res = _parse_chat_message_content_part(
-            part, mm_parser, wrap_dicts=wrap_dicts)
+            part,
+            mm_parser,
+            wrap_dicts=wrap_dicts,
+        )
         if parse_res:
             content.append(parse_res)