From bde406525fd73ce484ebdced99358b6e52fa10f6 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Mon, 14 Oct 2024 20:30:19 -0700 Subject: [PATCH 01/43] [Bugix]: Make chat content text allow type content --- vllm/entrypoints/chat_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 41354dc602c61..4ad32cbae11d5 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -400,11 +400,13 @@ def _parse_chat_message_content_parts( MODEL_KEEP_MULTI_MODAL_CONTENT has_image = False + has_text = False for part in parts: part_type = part["type"] if part_type == "text": text = _TextParser(part)["text"] texts.append(text) + has_text = True elif part_type == "image_url": image_url = _ImageParser(part)["image_url"] @@ -426,8 +428,7 @@ def _parse_chat_message_content_parts( raise NotImplementedError(f"Unknown part type: {part_type}") text_prompt = "\n".join(texts) - if keep_multimodal_content: - text_prompt = "\n".join(texts) + if has_text or keep_multimodal_content: role_content = [{'type': 'text', 'text': text_prompt}] if has_image: From 3bf919cb8f4cf093b1fb8ac75047b14da44c66f3 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Tue, 15 Oct 2024 13:33:25 -0700 Subject: [PATCH 02/43] Add test to verify content is parsed as expected --- tests/entrypoints/test_chat_utils.py | 37 ++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index 6ded5102c9314..115a32438845b 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -302,6 +302,43 @@ def test_parse_chat_messages_multiple_images_across_messages( ] _assert_mm_data_is_image_input(mm_data, 2) +def test_parse_chat_messages_context_text_format( + phi3v_model_config, + phi3v_tokenizer, +): + conversation, mm_data = parse_chat_messages([{ + "role": + "user", + "content": [{ + "type": "text", + "text": "What's in this text?" + }] + }, { + "role": "assistant", + "content": "Some stuff." + }, { + "role": + "user", + "content": [{ + "type": "text", + "text": "What about this one?" + }] + }], phi3v_model_config, phi3v_tokenizer) + + assert conversation == [ + { + "role": "user", + "content": "What's in this text?" + }, + { + "role": "assistant", + "content": "Some stuff." + }, + { + "role": "user", + "content": "What about this one?" + }, + ] def test_parse_chat_messages_rejects_too_many_images_in_one_message( phi3v_model_config, From 66ab303e9374f67c1de4e18b2f47e260177731d8 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Tue, 15 Oct 2024 14:01:43 -0700 Subject: [PATCH 03/43] Fix formatting --- tests/entrypoints/test_chat_utils.py | 37 ++++++++++++++-------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index 115a32438845b..3b54f43b37b0f 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -302,28 +302,28 @@ def test_parse_chat_messages_multiple_images_across_messages( ] _assert_mm_data_is_image_input(mm_data, 2) + def test_parse_chat_messages_context_text_format( phi3v_model_config, phi3v_tokenizer, ): - conversation, mm_data = parse_chat_messages([{ - "role": - "user", - "content": [{ - "type": "text", - "text": "What's in this text?" - }] - }, { - "role": "assistant", - "content": "Some stuff." - }, { - "role": - "user", - "content": [{ - "type": "text", - "text": "What about this one?" - }] - }], phi3v_model_config, phi3v_tokenizer) + conversation, mm_data = parse_chat_messages( + [{ + "role": "user", + "content": [{ + "type": "text", + "text": "What's in this text?" + }] + }, { + "role": "assistant", + "content": "Some stuff." + }, { + "role": "user", + "content": [{ + "type": "text", + "text": "What about this one?" + }] + }], phi3v_model_config, phi3v_tokenizer) assert conversation == [ { @@ -340,6 +340,7 @@ def test_parse_chat_messages_context_text_format( }, ] + def test_parse_chat_messages_rejects_too_many_images_in_one_message( phi3v_model_config, phi3v_tokenizer, From aed37f668eeac335e61380e1fdfdde25feae138d Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 03:32:24 -0400 Subject: [PATCH 04/43] Fix test to actually test the fix --- tests/entrypoints/test_chat_utils.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index 3b54f43b37b0f..6eb97ca48168b 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -328,7 +328,10 @@ def test_parse_chat_messages_context_text_format( assert conversation == [ { "role": "user", - "content": "What's in this text?" + "content": [{ + "type": "text", + "text": "What's in this text?" + }] }, { "role": "assistant", @@ -336,7 +339,10 @@ def test_parse_chat_messages_context_text_format( }, { "role": "user", - "content": "What about this one?" + "content": [{ + "type": "text", + "text": "What about this one?" + }] }, ] From a194b32d68176aadf7c6ba0e384b9d7fd7bbf234 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 04:07:52 -0400 Subject: [PATCH 05/43] Rewrite logic to fix failing test --- vllm/entrypoints/chat_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 4ad32cbae11d5..e05b6c9712c4e 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -428,7 +428,7 @@ def _parse_chat_message_content_parts( raise NotImplementedError(f"Unknown part type: {part_type}") text_prompt = "\n".join(texts) - if has_text or keep_multimodal_content: + if keep_multimodal_content: role_content = [{'type': 'text', 'text': text_prompt}] if has_image: @@ -440,6 +440,8 @@ def _parse_chat_message_content_parts( if mm_placeholder_counts: text_prompt = _get_full_multimodal_text_prompt( mm_placeholder_counts, text_prompt) + elif has_text: + text_prompt = [{'type': 'text', 'text': text_prompt}] return [ConversationMessage(role=role, content=text_prompt)] From a112a1ac92136a80e559fd0b84ca6f611fe22aef Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 06:00:28 -0400 Subject: [PATCH 06/43] Another attempt to making this work --- vllm/entrypoints/chat_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index c3ac03ee6ed1f..899faa70021dd 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -392,6 +392,7 @@ def _parse_chat_message_content_parts( role: str, parts: Iterable[ChatCompletionContentPartParam], mm_tracker: BaseMultiModalItemTracker, + keep_content_structure: bool, ) -> List[ConversationMessage]: texts: List[str] = [] @@ -441,7 +442,7 @@ def _parse_chat_message_content_parts( if mm_placeholder_counts: text_prompt = _get_full_multimodal_text_prompt( mm_placeholder_counts, text_prompt) - elif has_text: + elif has_text and keep_content_structure: text_prompt = [{'type': 'text', 'text': text_prompt}] return [ConversationMessage(role=role, content=text_prompt)] @@ -458,17 +459,20 @@ def _parse_chat_message_content( role = message["role"] content = message.get("content") + keep_content_structure = True if content is None: content = [] elif isinstance(content, str): content = [ ChatCompletionContentPartTextParam(type="text", text=content) ] + keep_content_structure = False result = _parse_chat_message_content_parts( role, content, # type: ignore mm_tracker, + keep_content_structure, ) for result_msg in result: From 0fee8d782e279a849f21096883d6b30dbb2193b1 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 11:04:32 -0400 Subject: [PATCH 07/43] Remove the offending tests --- tests/entrypoints/openai/test_chat.py | 51 --------------------------- 1 file changed, 51 deletions(-) diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py index 3af0032fd2fb0..34783c7cf93dd 100644 --- a/tests/entrypoints/openai/test_chat.py +++ b/tests/entrypoints/openai/test_chat.py @@ -898,57 +898,6 @@ async def test_extra_fields(client: openai.AsyncOpenAI): assert "extra_forbidden" in exc_info.value.message -@pytest.mark.asyncio -async def test_complex_message_content(client: openai.AsyncOpenAI): - resp = await client.chat.completions.create( - model=MODEL_NAME, - messages=[{ - "role": - "user", - "content": [{ - "type": - "text", - "text": - "what is 1+1? please provide the result without any other text." - }] - }], - temperature=0, - seed=0) - content = resp.choices[0].message.content - assert content == "2" - - -@pytest.mark.asyncio -async def test_custom_role(client: openai.AsyncOpenAI): - # Not sure how the model handles custom roles so we just check that - # both string and complex message content are handled in the same way - - resp1 = await client.chat.completions.create( - model=MODEL_NAME, - messages=[{ - "role": "my-custom-role", - "content": "what is 1+1?", - }], # type: ignore - temperature=0, - seed=0) - - resp2 = await client.chat.completions.create( - model=MODEL_NAME, - messages=[{ - "role": "my-custom-role", - "content": [{ - "type": "text", - "text": "what is 1+1?" - }] - }], # type: ignore - temperature=0, - seed=0) - - content1 = resp1.choices[0].message.content - content2 = resp2.choices[0].message.content - assert content1 == content2 - - @pytest.mark.asyncio async def test_long_seed(client: openai.AsyncOpenAI): for seed in [ From 4011ed1500a4123d67ecc02d2b1ac1271e3ca38c Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 12:36:33 -0400 Subject: [PATCH 08/43] Add cli args to switch between types --- tests/entrypoints/openai/test_chat.py | 65 ++++++++++++++++++++++----- vllm/config.py | 2 + vllm/engine/arg_utils.py | 9 ++++ vllm/entrypoints/chat_utils.py | 9 ++-- 4 files changed, 68 insertions(+), 17 deletions(-) diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py index 34783c7cf93dd..0fbc4cca83bd2 100644 --- a/tests/entrypoints/openai/test_chat.py +++ b/tests/entrypoints/openai/test_chat.py @@ -433,28 +433,18 @@ async def test_chat_completion_stream_options(client: openai.AsyncOpenAI, model=model_name, messages=messages, max_tokens=10, - extra_body=dict(min_tokens=10), temperature=0.0, stream=True, stream_options={ "include_usage": True, - "continuous_usage_stats": True, + "continuous_usage_stats": True }, ) - last_completion_tokens = 0 async for chunk in stream: assert chunk.usage.prompt_tokens >= 0 - assert last_completion_tokens == 0 or \ - chunk.usage.completion_tokens > last_completion_tokens or \ - ( - not chunk.choices and - chunk.usage.completion_tokens == last_completion_tokens - ) + assert chunk.usage.completion_tokens >= 0 assert chunk.usage.total_tokens == (chunk.usage.prompt_tokens + chunk.usage.completion_tokens) - last_completion_tokens = chunk.usage.completion_tokens - - assert last_completion_tokens == 10 # NOTE: Not sure why, but when I place this after `test_guided_regex_chat` @@ -898,6 +888,57 @@ async def test_extra_fields(client: openai.AsyncOpenAI): assert "extra_forbidden" in exc_info.value.message +@pytest.mark.asyncio +async def test_complex_message_content(client: openai.AsyncOpenAI): + resp = await client.chat.completions.create( + model=MODEL_NAME, + messages=[{ + "role": + "user", + "content": [{ + "type": + "text", + "text": + "what is 1+1? please provide the result without any other text." + }] + }], + temperature=0, + seed=0) + content = resp.choices[0].message.content + assert content == "2" + + +@pytest.mark.asyncio +async def test_custom_role(client: openai.AsyncOpenAI): + # Not sure how the model handles custom roles so we just check that + # both string and complex message content are handled in the same way + + resp1 = await client.chat.completions.create( + model=MODEL_NAME, + messages=[{ + "role": "my-custom-role", + "content": "what is 1+1?", + }], # type: ignore + temperature=0, + seed=0) + + resp2 = await client.chat.completions.create( + model=MODEL_NAME, + messages=[{ + "role": "my-custom-role", + "content": [{ + "type": "text", + "text": "what is 1+1?" + }] + }], # type: ignore + temperature=0, + seed=0) + + content1 = resp1.choices[0].message.content + content2 = resp2.choices[0].message.content + assert content1 == content2 + + @pytest.mark.asyncio async def test_long_seed(client: openai.AsyncOpenAI): for seed in [ diff --git a/vllm/config.py b/vllm/config.py index 4533fb017188c..83586eae88539 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -110,6 +110,7 @@ def __init__(self, model: str, tokenizer: str, tokenizer_mode: str, + chat_template_content_type: str, trust_remote_code: bool, dtype: Union[str, torch.dtype], seed: int, @@ -137,6 +138,7 @@ def __init__(self, self.model = model self.tokenizer = tokenizer self.tokenizer_mode = tokenizer_mode + self.chat_template_content_type = chat_template_content_type self.trust_remote_code = trust_remote_code self.seed = seed self.revision = revision diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 41963dcb16922..c178d8667abce 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -86,6 +86,7 @@ class EngineArgs: tokenizer: Optional[str] = None skip_tokenizer_init: bool = False tokenizer_mode: str = 'auto' + chat_template_content_type: str = "string" trust_remote_code: bool = False download_dir: Optional[str] = None load_format: str = 'auto' @@ -238,6 +239,13 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: 'fast tokenizer if available.\n* "slow" will ' 'always use the slow tokenizer. \n* ' '"mistral" will always use the `mistral_common` tokenizer.') + parser.add_argument( + '--chat-template-text-content-format', + type=str, + default='string', + choices=['string', 'openai'], + help='The content to choose with chat template. "string" will keep the content field as ' + 'just a string whereas "openai" will parse the content in the current OpenAI format.') parser.add_argument('--trust-remote-code', action='store_true', help='Trust remote code from huggingface.') @@ -841,6 +849,7 @@ def create_model_config(self) -> ModelConfig: # We know this is not None because we set it in __post_init__ tokenizer=cast(str, self.tokenizer), tokenizer_mode=self.tokenizer_mode, + chat_template_content_type=self.chat_template_content_type, trust_remote_code=self.trust_remote_code, dtype=self.dtype, seed=self.seed, diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 899faa70021dd..78e4d7d8a27b7 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -392,7 +392,7 @@ def _parse_chat_message_content_parts( role: str, parts: Iterable[ChatCompletionContentPartParam], mm_tracker: BaseMultiModalItemTracker, - keep_content_structure: bool, + chat_template_content_type: str, ) -> List[ConversationMessage]: texts: List[str] = [] @@ -442,7 +442,7 @@ def _parse_chat_message_content_parts( if mm_placeholder_counts: text_prompt = _get_full_multimodal_text_prompt( mm_placeholder_counts, text_prompt) - elif has_text and keep_content_structure: + elif has_text and chat_template_content_type == "openai": text_prompt = [{'type': 'text', 'text': text_prompt}] return [ConversationMessage(role=role, content=text_prompt)] @@ -455,24 +455,23 @@ def _parse_chat_message_content_parts( def _parse_chat_message_content( message: ChatCompletionMessageParam, mm_tracker: BaseMultiModalItemTracker, + chat_template_content_type: str, ) -> List[ConversationMessage]: role = message["role"] content = message.get("content") - keep_content_structure = True if content is None: content = [] elif isinstance(content, str): content = [ ChatCompletionContentPartTextParam(type="text", text=content) ] - keep_content_structure = False result = _parse_chat_message_content_parts( role, content, # type: ignore mm_tracker, - keep_content_structure, + chat_template_content_type, ) for result_msg in result: From 80ae489b4dcd862e39a7e83f50c840ec530879bb Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 12:41:52 -0400 Subject: [PATCH 09/43] Minor fix --- vllm/entrypoints/chat_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 78e4d7d8a27b7..334ba77a139ed 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -515,7 +515,7 @@ def parse_chat_messages( mm_tracker = MultiModalItemTracker(model_config, tokenizer) for msg in messages: - sub_messages = _parse_chat_message_content(msg, mm_tracker) + sub_messages = _parse_chat_message_content(msg, mm_tracker, model_config.chat_template_content_type) conversation.extend(sub_messages) @@ -533,7 +533,7 @@ def parse_chat_messages_futures( mm_tracker = AsyncMultiModalItemTracker(model_config, tokenizer) for msg in messages: - sub_messages = _parse_chat_message_content(msg, mm_tracker) + sub_messages = _parse_chat_message_content(msg, mm_tracker, model_config.chat_template_content_type) conversation.extend(sub_messages) From a67e03f0e74a7f8b335af381272a60a9ba394c46 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 13:09:59 -0400 Subject: [PATCH 10/43] Fix tests --- tests/entrypoints/test_chat_utils.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index 6eb97ca48168b..9fcd4466915cb 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -307,6 +307,7 @@ def test_parse_chat_messages_context_text_format( phi3v_model_config, phi3v_tokenizer, ): + phi3v_model_config.chat_template_content_type = "openai" conversation, mm_data = parse_chat_messages( [{ "role": "user", @@ -319,10 +320,7 @@ def test_parse_chat_messages_context_text_format( "content": "Some stuff." }, { "role": "user", - "content": [{ - "type": "text", - "text": "What about this one?" - }] + "content": "What about this one?" }], phi3v_model_config, phi3v_tokenizer) assert conversation == [ @@ -335,7 +333,10 @@ def test_parse_chat_messages_context_text_format( }, { "role": "assistant", - "content": "Some stuff." + "content": [{ + "type": "text", + "text": "Some stuff." + }] }, { "role": "user", From 0b95a0b79e1355f1054686cf7dbcb82cce334c49 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 13:24:44 -0400 Subject: [PATCH 11/43] Fix formatting --- vllm/config.py | 4 ++-- vllm/engine/arg_utils.py | 6 ++++-- vllm/entrypoints/chat_utils.py | 6 ++++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 83586eae88539..61384455e9bbc 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -1481,8 +1481,8 @@ def _verify_args(self) -> None: "typical_acceptance_sampler.") if (self.draft_token_acceptance_method != 'rejection_sampler' - and self.draft_token_acceptance_method != - 'typical_acceptance_sampler'): + and self.draft_token_acceptance_method + != 'typical_acceptance_sampler'): raise ValueError( "Expected draft_token_acceptance_method to be either " "rejection_sampler or typical_acceptance_sampler. Instead it " diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index c178d8667abce..31a7bad71c179 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -244,8 +244,10 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: type=str, default='string', choices=['string', 'openai'], - help='The content to choose with chat template. "string" will keep the content field as ' - 'just a string whereas "openai" will parse the content in the current OpenAI format.') + help='The content to choose with chat template. "string" will ' + 'keep the content field as just a string whereas "openai" ' + 'will parse the content in the current OpenAI format.' + ) parser.add_argument('--trust-remote-code', action='store_true', help='Trust remote code from huggingface.') diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 334ba77a139ed..0aeb18d88bb0c 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -515,7 +515,8 @@ def parse_chat_messages( mm_tracker = MultiModalItemTracker(model_config, tokenizer) for msg in messages: - sub_messages = _parse_chat_message_content(msg, mm_tracker, model_config.chat_template_content_type) + sub_messages = _parse_chat_message_content( + msg, mm_tracker, model_config.chat_template_content_type) conversation.extend(sub_messages) @@ -533,7 +534,8 @@ def parse_chat_messages_futures( mm_tracker = AsyncMultiModalItemTracker(model_config, tokenizer) for msg in messages: - sub_messages = _parse_chat_message_content(msg, mm_tracker, model_config.chat_template_content_type) + sub_messages = _parse_chat_message_content( + msg, mm_tracker, model_config.chat_template_content_type) conversation.extend(sub_messages) From d89e8c0081b9b2cb4ebedffa0c2552d46bac23df Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 13:25:44 -0400 Subject: [PATCH 12/43] Revert chat changes --- tests/entrypoints/openai/test_chat.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py index 0fbc4cca83bd2..28cd30c2294dd 100644 --- a/tests/entrypoints/openai/test_chat.py +++ b/tests/entrypoints/openai/test_chat.py @@ -433,18 +433,28 @@ async def test_chat_completion_stream_options(client: openai.AsyncOpenAI, model=model_name, messages=messages, max_tokens=10, + extra_body=dict(min_tokens=10), temperature=0.0, stream=True, stream_options={ "include_usage": True, - "continuous_usage_stats": True + "continuous_usage_stats": True, }, ) + last_completion_tokens = 0 async for chunk in stream: assert chunk.usage.prompt_tokens >= 0 - assert chunk.usage.completion_tokens >= 0 + assert last_completion_tokens == 0 or \ + chunk.usage.completion_tokens > last_completion_tokens or \ + ( + not chunk.choices and + chunk.usage.completion_tokens == last_completion_tokens + ) assert chunk.usage.total_tokens == (chunk.usage.prompt_tokens + chunk.usage.completion_tokens) + last_completion_tokens = chunk.usage.completion_tokens + + assert last_completion_tokens == 10 # NOTE: Not sure why, but when I place this after `test_guided_regex_chat` @@ -956,4 +966,4 @@ async def test_long_seed(client: openai.AsyncOpenAI): seed=seed) assert ("greater_than_equal" in exc_info.value.message - or "less_than_equal" in exc_info.value.message) + or "less_than_equal" in exc_info.value.message) \ No newline at end of file From be94fc5d023996f297ae0e29efcde09c0b31fa24 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 13:26:43 -0400 Subject: [PATCH 13/43] Add missing new line --- tests/entrypoints/openai/test_chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py index 28cd30c2294dd..3af0032fd2fb0 100644 --- a/tests/entrypoints/openai/test_chat.py +++ b/tests/entrypoints/openai/test_chat.py @@ -966,4 +966,4 @@ async def test_long_seed(client: openai.AsyncOpenAI): seed=seed) assert ("greater_than_equal" in exc_info.value.message - or "less_than_equal" in exc_info.value.message) \ No newline at end of file + or "less_than_equal" in exc_info.value.message) From ff7965ab9d6cc99056bfbae8674a39238e0bf166 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 13:29:10 -0400 Subject: [PATCH 14/43] Minor nits --- vllm/config.py | 4 ++-- vllm/entrypoints/chat_utils.py | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 61384455e9bbc..83586eae88539 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -1481,8 +1481,8 @@ def _verify_args(self) -> None: "typical_acceptance_sampler.") if (self.draft_token_acceptance_method != 'rejection_sampler' - and self.draft_token_acceptance_method - != 'typical_acceptance_sampler'): + and self.draft_token_acceptance_method != + 'typical_acceptance_sampler'): raise ValueError( "Expected draft_token_acceptance_method to be either " "rejection_sampler or typical_acceptance_sampler. Instead it " diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 0aeb18d88bb0c..bcfd236e64ab0 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -516,7 +516,10 @@ def parse_chat_messages( for msg in messages: sub_messages = _parse_chat_message_content( - msg, mm_tracker, model_config.chat_template_content_type) + msg, + mm_tracker, + model_config.chat_template_content_type, + ) conversation.extend(sub_messages) @@ -535,7 +538,10 @@ def parse_chat_messages_futures( for msg in messages: sub_messages = _parse_chat_message_content( - msg, mm_tracker, model_config.chat_template_content_type) + msg, + mm_tracker, + model_config.chat_template_content_type, + ) conversation.extend(sub_messages) From 22489cf2fd4863fc5101650e212e97e655ebef14 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 13:33:54 -0400 Subject: [PATCH 15/43] Minor nits again --- vllm/engine/arg_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 31a7bad71c179..e718da18efa34 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -246,8 +246,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: choices=['string', 'openai'], help='The content to choose with chat template. "string" will ' 'keep the content field as just a string whereas "openai" ' - 'will parse the content in the current OpenAI format.' - ) + 'will parse the content in the current OpenAI format.') parser.add_argument('--trust-remote-code', action='store_true', help='Trust remote code from huggingface.') From f8d2cbaebcca3a0e29a6f279af42590eca84e914 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 13:39:13 -0400 Subject: [PATCH 16/43] Standardize name --- tests/entrypoints/test_chat_utils.py | 2 +- vllm/config.py | 4 ++-- vllm/engine/arg_utils.py | 6 +++--- vllm/entrypoints/chat_utils.py | 12 ++++++------ 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index 9fcd4466915cb..92258c8a9116e 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -307,7 +307,7 @@ def test_parse_chat_messages_context_text_format( phi3v_model_config, phi3v_tokenizer, ): - phi3v_model_config.chat_template_content_type = "openai" + phi3v_model_config.chat_template_text_content_format = "openai" conversation, mm_data = parse_chat_messages( [{ "role": "user", diff --git a/vllm/config.py b/vllm/config.py index 83586eae88539..7fdf38f2bf0cd 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -110,7 +110,7 @@ def __init__(self, model: str, tokenizer: str, tokenizer_mode: str, - chat_template_content_type: str, + chat_template_text_content_format: str, trust_remote_code: bool, dtype: Union[str, torch.dtype], seed: int, @@ -138,7 +138,7 @@ def __init__(self, self.model = model self.tokenizer = tokenizer self.tokenizer_mode = tokenizer_mode - self.chat_template_content_type = chat_template_content_type + self.chat_template_text_content_format = chat_template_text_content_format self.trust_remote_code = trust_remote_code self.seed = seed self.revision = revision diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index e718da18efa34..6df697b2980e1 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -86,7 +86,7 @@ class EngineArgs: tokenizer: Optional[str] = None skip_tokenizer_init: bool = False tokenizer_mode: str = 'auto' - chat_template_content_type: str = "string" + chat_template_text_content_format: str ='string' trust_remote_code: bool = False download_dir: Optional[str] = None load_format: str = 'auto' @@ -242,7 +242,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: parser.add_argument( '--chat-template-text-content-format', type=str, - default='string', + default=EngineArgs.chat_template_text_content_format, choices=['string', 'openai'], help='The content to choose with chat template. "string" will ' 'keep the content field as just a string whereas "openai" ' @@ -850,7 +850,7 @@ def create_model_config(self) -> ModelConfig: # We know this is not None because we set it in __post_init__ tokenizer=cast(str, self.tokenizer), tokenizer_mode=self.tokenizer_mode, - chat_template_content_type=self.chat_template_content_type, + chat_template_text_content_format=self.chat_template_text_content_format, trust_remote_code=self.trust_remote_code, dtype=self.dtype, seed=self.seed, diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index bcfd236e64ab0..7029aceb26607 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -392,7 +392,7 @@ def _parse_chat_message_content_parts( role: str, parts: Iterable[ChatCompletionContentPartParam], mm_tracker: BaseMultiModalItemTracker, - chat_template_content_type: str, + chat_template_text_content_format: str, ) -> List[ConversationMessage]: texts: List[str] = [] @@ -442,7 +442,7 @@ def _parse_chat_message_content_parts( if mm_placeholder_counts: text_prompt = _get_full_multimodal_text_prompt( mm_placeholder_counts, text_prompt) - elif has_text and chat_template_content_type == "openai": + elif has_text and chat_template_text_content_format == "openai": text_prompt = [{'type': 'text', 'text': text_prompt}] return [ConversationMessage(role=role, content=text_prompt)] @@ -455,7 +455,7 @@ def _parse_chat_message_content_parts( def _parse_chat_message_content( message: ChatCompletionMessageParam, mm_tracker: BaseMultiModalItemTracker, - chat_template_content_type: str, + chat_template_text_content_format: str, ) -> List[ConversationMessage]: role = message["role"] content = message.get("content") @@ -471,7 +471,7 @@ def _parse_chat_message_content( role, content, # type: ignore mm_tracker, - chat_template_content_type, + chat_template_text_content_format, ) for result_msg in result: @@ -518,7 +518,7 @@ def parse_chat_messages( sub_messages = _parse_chat_message_content( msg, mm_tracker, - model_config.chat_template_content_type, + model_config.chat_template_text_content_format, ) conversation.extend(sub_messages) @@ -540,7 +540,7 @@ def parse_chat_messages_futures( sub_messages = _parse_chat_message_content( msg, mm_tracker, - model_config.chat_template_content_type, + model_config.chat_template_text_content_format, ) conversation.extend(sub_messages) From 54532f2ec4054a9d55515eb8a8101d049a9070ac Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 13:44:53 -0400 Subject: [PATCH 17/43] Remove unnecessary variable --- vllm/entrypoints/chat_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 7029aceb26607..bd31bbd871031 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -442,7 +442,7 @@ def _parse_chat_message_content_parts( if mm_placeholder_counts: text_prompt = _get_full_multimodal_text_prompt( mm_placeholder_counts, text_prompt) - elif has_text and chat_template_text_content_format == "openai": + if chat_template_text_content_format == "openai": text_prompt = [{'type': 'text', 'text': text_prompt}] return [ConversationMessage(role=role, content=text_prompt)] From ea7274d2ff4d9392aa4ae0d7c2806ae063a0461a Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 13:45:19 -0400 Subject: [PATCH 18/43] Actually remove unnecessary variable --- vllm/entrypoints/chat_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index bd31bbd871031..bdf7b6e2c4b11 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -402,13 +402,11 @@ def _parse_chat_message_content_parts( MODEL_KEEP_MULTI_MODAL_CONTENT has_image = False - has_text = False for part in parts: part_type = part["type"] if part_type == "text": text = _TextParser(part)["text"] texts.append(text) - has_text = True elif part_type == "image_url": image_url = _ImageParser(part)["image_url"] From f3608bee2801a1cc1432cd6b3145844abdde18fe Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 13:51:23 -0400 Subject: [PATCH 19/43] Make variable name simpler --- tests/entrypoints/test_chat_utils.py | 2 +- vllm/config.py | 4 ++-- vllm/engine/arg_utils.py | 14 +++++++------- vllm/entrypoints/chat_utils.py | 12 ++++++------ 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index 92258c8a9116e..f2585696cc8df 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -307,7 +307,7 @@ def test_parse_chat_messages_context_text_format( phi3v_model_config, phi3v_tokenizer, ): - phi3v_model_config.chat_template_text_content_format = "openai" + phi3v_model_config.chat_template_text_format = "openai" conversation, mm_data = parse_chat_messages( [{ "role": "user", diff --git a/vllm/config.py b/vllm/config.py index 7fdf38f2bf0cd..9c4605723fb39 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -110,7 +110,7 @@ def __init__(self, model: str, tokenizer: str, tokenizer_mode: str, - chat_template_text_content_format: str, + chat_template_text_format: str, trust_remote_code: bool, dtype: Union[str, torch.dtype], seed: int, @@ -138,7 +138,7 @@ def __init__(self, self.model = model self.tokenizer = tokenizer self.tokenizer_mode = tokenizer_mode - self.chat_template_text_content_format = chat_template_text_content_format + self.chat_template_text_format = chat_template_text_format self.trust_remote_code = trust_remote_code self.seed = seed self.revision = revision diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 6df697b2980e1..fd00ee2b00932 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -86,7 +86,7 @@ class EngineArgs: tokenizer: Optional[str] = None skip_tokenizer_init: bool = False tokenizer_mode: str = 'auto' - chat_template_text_content_format: str ='string' + chat_template_text_format: str ='string' trust_remote_code: bool = False download_dir: Optional[str] = None load_format: str = 'auto' @@ -240,13 +240,13 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: 'always use the slow tokenizer. \n* ' '"mistral" will always use the `mistral_common` tokenizer.') parser.add_argument( - '--chat-template-text-content-format', + '--chat-template-text-format', type=str, - default=EngineArgs.chat_template_text_content_format, + default=EngineArgs.chat_template_text_format, choices=['string', 'openai'], - help='The content to choose with chat template. "string" will ' - 'keep the content field as just a string whereas "openai" ' - 'will parse the content in the current OpenAI format.') + help='The format to render text content within a chat template. ' + '"string" will keep the content field as a string whereas ' + '"openai" will parse the content in the current OpenAI format.') parser.add_argument('--trust-remote-code', action='store_true', help='Trust remote code from huggingface.') @@ -850,7 +850,7 @@ def create_model_config(self) -> ModelConfig: # We know this is not None because we set it in __post_init__ tokenizer=cast(str, self.tokenizer), tokenizer_mode=self.tokenizer_mode, - chat_template_text_content_format=self.chat_template_text_content_format, + chat_template_text_format=self.chat_template_text_format, trust_remote_code=self.trust_remote_code, dtype=self.dtype, seed=self.seed, diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index bdf7b6e2c4b11..600d95a2e93b3 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -392,7 +392,7 @@ def _parse_chat_message_content_parts( role: str, parts: Iterable[ChatCompletionContentPartParam], mm_tracker: BaseMultiModalItemTracker, - chat_template_text_content_format: str, + chat_template_text_format: str, ) -> List[ConversationMessage]: texts: List[str] = [] @@ -440,7 +440,7 @@ def _parse_chat_message_content_parts( if mm_placeholder_counts: text_prompt = _get_full_multimodal_text_prompt( mm_placeholder_counts, text_prompt) - if chat_template_text_content_format == "openai": + if chat_template_text_format == "openai": text_prompt = [{'type': 'text', 'text': text_prompt}] return [ConversationMessage(role=role, content=text_prompt)] @@ -453,7 +453,7 @@ def _parse_chat_message_content_parts( def _parse_chat_message_content( message: ChatCompletionMessageParam, mm_tracker: BaseMultiModalItemTracker, - chat_template_text_content_format: str, + chat_template_text_format: str, ) -> List[ConversationMessage]: role = message["role"] content = message.get("content") @@ -469,7 +469,7 @@ def _parse_chat_message_content( role, content, # type: ignore mm_tracker, - chat_template_text_content_format, + chat_template_text_format, ) for result_msg in result: @@ -516,7 +516,7 @@ def parse_chat_messages( sub_messages = _parse_chat_message_content( msg, mm_tracker, - model_config.chat_template_text_content_format, + model_config.chat_template_text_format, ) conversation.extend(sub_messages) @@ -538,7 +538,7 @@ def parse_chat_messages_futures( sub_messages = _parse_chat_message_content( msg, mm_tracker, - model_config.chat_template_text_content_format, + model_config.chat_template_text_format, ) conversation.extend(sub_messages) From 79a22bac50c572d0bedc06f772b8adaff8482a47 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 13:53:00 -0400 Subject: [PATCH 20/43] Fix help doc --- vllm/engine/arg_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index fd00ee2b00932..7e58008ae72fd 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -246,7 +246,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: choices=['string', 'openai'], help='The format to render text content within a chat template. ' '"string" will keep the content field as a string whereas ' - '"openai" will parse the content in the current OpenAI format.') + '"openai" will parse content in the current OpenAI format.') parser.add_argument('--trust-remote-code', action='store_true', help='Trust remote code from huggingface.') From c7e53716f0042cacd95e0c188f4bc742ffa14028 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 13:59:56 -0400 Subject: [PATCH 21/43] Fix formatting --- vllm/engine/arg_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 7e58008ae72fd..25ba20976dbe3 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -86,7 +86,7 @@ class EngineArgs: tokenizer: Optional[str] = None skip_tokenizer_init: bool = False tokenizer_mode: str = 'auto' - chat_template_text_format: str ='string' + chat_template_text_format: str = 'string' trust_remote_code: bool = False download_dir: Optional[str] = None load_format: str = 'auto' @@ -245,8 +245,8 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: default=EngineArgs.chat_template_text_format, choices=['string', 'openai'], help='The format to render text content within a chat template. ' - '"string" will keep the content field as a string whereas ' - '"openai" will parse content in the current OpenAI format.') + '"string" will keep the content field as a string whereas ' + '"openai" will parse content in the current OpenAI format.') parser.add_argument('--trust-remote-code', action='store_true', help='Trust remote code from huggingface.') From 80d45d51121fc3cb27776c6bdccdb1c555fb4186 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 14:43:18 -0400 Subject: [PATCH 22/43] Fix default value in config --- vllm/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 9c4605723fb39..09c8b501b0f60 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -110,7 +110,6 @@ def __init__(self, model: str, tokenizer: str, tokenizer_mode: str, - chat_template_text_format: str, trust_remote_code: bool, dtype: Union[str, torch.dtype], seed: int, @@ -134,11 +133,11 @@ def __init__(self, use_async_output_proc: bool = True, override_neuron_config: Optional[Dict[str, Any]] = None, config_format: ConfigFormat = ConfigFormat.AUTO, + chat_template_text_format: str = "string", mm_processor_kwargs: Optional[Dict[str, Any]] = None) -> None: self.model = model self.tokenizer = tokenizer self.tokenizer_mode = tokenizer_mode - self.chat_template_text_format = chat_template_text_format self.trust_remote_code = trust_remote_code self.seed = seed self.revision = revision @@ -169,6 +168,7 @@ def __init__(self, self.model, revision) self.dtype = _get_and_verify_dtype(self.hf_text_config, dtype) self.use_async_output_proc = use_async_output_proc + self.chat_template_text_format = chat_template_text_format self.mm_processor_kwargs = mm_processor_kwargs # Set enforce_eager to False if the value is unset. From 89dd84b4b98d4e1f25135a158e73b36d6caa97fc Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 16:03:53 -0400 Subject: [PATCH 23/43] Fix failing test --- tests/entrypoints/openai/test_serving_chat.py | 1 + vllm/engine/llm_engine.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py index ec550fe82c70f..d91558776738d 100644 --- a/tests/entrypoints/openai/test_serving_chat.py +++ b/tests/entrypoints/openai/test_serving_chat.py @@ -25,6 +25,7 @@ class MockModelConfig: tokenizer = MODEL_NAME trust_remote_code = False tokenizer_mode = "auto" + chat_template_text_format = "string" max_model_len = 100 tokenizer_revision = None embedding_mode = False diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 61c21887e6816..cd9387e961acb 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -251,7 +251,7 @@ def __init__( "num_scheduler_steps=%d, chunked_prefill_enabled=%s " "multi_step_stream_outputs=%s, enable_prefix_caching=%s, " "use_async_output_proc=%s, use_cached_outputs=%s, " - "mm_processor_kwargs=%s)", + "chat_template_text_format=%s, mm_processor_kwargs=%s)", VLLM_VERSION, model_config.model, speculative_config, @@ -286,6 +286,7 @@ def __init__( cache_config.enable_prefix_caching, model_config.use_async_output_proc, use_cached_outputs, + model_config.chat_template_text_format, model_config.mm_processor_kwargs, ) # TODO(woosuk): Print more configs in debug mode. From faafc31660c9205b7c7c11f908f65296851c00e4 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 16:13:14 -0400 Subject: [PATCH 24/43] Fix failing test --- tests/entrypoints/test_chat_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index f2585696cc8df..aed2d9cdb4072 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -23,6 +23,7 @@ def phi3v_model_config(): trust_remote_code=True, dtype="bfloat16", seed=0, + chat_template_text_format="string", limit_mm_per_prompt={ "image": 2, }) From ef74a9cfe1fecde7e14cb4841d2b9c3b3230d8d2 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 16:20:53 -0400 Subject: [PATCH 25/43] Fix failing test again --- vllm/entrypoints/chat_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 600d95a2e93b3..a2c5bab7ac8ee 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -441,7 +441,8 @@ def _parse_chat_message_content_parts( text_prompt = _get_full_multimodal_text_prompt( mm_placeholder_counts, text_prompt) if chat_template_text_format == "openai": - text_prompt = [{'type': 'text', 'text': text_prompt}] + role_content = [{'type': 'text', 'text': text_prompt}] + return [ConversationMessage(role=role, content=role_content)] return [ConversationMessage(role=role, content=text_prompt)] From 1eca1f509af2dc5390568f63dcb162665e9c6e83 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 16:34:27 -0400 Subject: [PATCH 26/43] Fix mypy error --- vllm/entrypoints/chat_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index a2c5bab7ac8ee..99d7bed7957f0 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -96,7 +96,7 @@ class ConversationMessage(TypedDict, total=False): role: Required[str] """The role of the message's author.""" - content: Optional[str] + content: Optional[Union[str, List[ChatCompletionContentPartTextParam]]] """The contents of the message""" tool_call_id: Optional[str] @@ -441,7 +441,7 @@ def _parse_chat_message_content_parts( text_prompt = _get_full_multimodal_text_prompt( mm_placeholder_counts, text_prompt) if chat_template_text_format == "openai": - role_content = [{'type': 'text', 'text': text_prompt}] + role_content = [ChatCompletionContentPartTextParam(type="text", text=content)] return [ConversationMessage(role=role, content=role_content)] return [ConversationMessage(role=role, content=text_prompt)] From 2fe18bd55ca24f4e42d87c9696aeb45d91c94c35 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 16:43:59 -0400 Subject: [PATCH 27/43] Fix mypy error by ignoring --- vllm/entrypoints/chat_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 99d7bed7957f0..b24904df7a250 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -96,7 +96,7 @@ class ConversationMessage(TypedDict, total=False): role: Required[str] """The role of the message's author.""" - content: Optional[Union[str, List[ChatCompletionContentPartTextParam]]] + content: Optional[str] """The contents of the message""" tool_call_id: Optional[str] @@ -441,7 +441,7 @@ def _parse_chat_message_content_parts( text_prompt = _get_full_multimodal_text_prompt( mm_placeholder_counts, text_prompt) if chat_template_text_format == "openai": - role_content = [ChatCompletionContentPartTextParam(type="text", text=content)] + role_content = [ChatCompletionContentPartTextParam(type="text", text=text_prompt)] # type: ignore return [ConversationMessage(role=role, content=role_content)] return [ConversationMessage(role=role, content=text_prompt)] From 6311517c79dbaae8491cb3d2b4bce8786ed28470 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 16:46:07 -0400 Subject: [PATCH 28/43] Put ignore in the right place --- vllm/entrypoints/chat_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index b24904df7a250..0994e8674f0d5 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -441,8 +441,8 @@ def _parse_chat_message_content_parts( text_prompt = _get_full_multimodal_text_prompt( mm_placeholder_counts, text_prompt) if chat_template_text_format == "openai": - role_content = [ChatCompletionContentPartTextParam(type="text", text=text_prompt)] # type: ignore - return [ConversationMessage(role=role, content=role_content)] + role_content = [ChatCompletionContentPartTextParam(type="text", text=text_prompt)] + return [ConversationMessage(role=role, content=role_content)] # type: ignore return [ConversationMessage(role=role, content=text_prompt)] From f3f38871400f5eaa3faaefb6a1af4ec5e4adb9fe Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 16:48:59 -0400 Subject: [PATCH 29/43] Fix formatting --- vllm/engine/arg_utils.py | 2 +- vllm/entrypoints/chat_utils.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 2ec460492e8c4..38962a55379a5 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -256,7 +256,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: choices=['string', 'openai'], help='The format to render text content within a chat template. ' '"string" will keep the content field as a string whereas ' - '"openai" will parse content in the current OpenAI format.') + '"openai"yapf will parse content in the current OpenAI format.') parser.add_argument('--trust-remote-code', action='store_true', help='Trust remote code from huggingface.') diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 0994e8674f0d5..415b139d23a15 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -441,8 +441,12 @@ def _parse_chat_message_content_parts( text_prompt = _get_full_multimodal_text_prompt( mm_placeholder_counts, text_prompt) if chat_template_text_format == "openai": - role_content = [ChatCompletionContentPartTextParam(type="text", text=text_prompt)] - return [ConversationMessage(role=role, content=role_content)] # type: ignore + role_content = [ + ChatCompletionContentPartTextParam(type="text", + text=text_prompt) + ] + return [ConversationMessage(role=role, + content=role_content)] # type: ignore return [ConversationMessage(role=role, content=text_prompt)] From a08b342ddecc4b6f96a6e935a376c3abe8d512e0 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Fri, 18 Oct 2024 17:50:19 -0400 Subject: [PATCH 30/43] Remove stupid typo --- vllm/engine/arg_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 38962a55379a5..2ec460492e8c4 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -256,7 +256,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: choices=['string', 'openai'], help='The format to render text content within a chat template. ' '"string" will keep the content field as a string whereas ' - '"openai"yapf will parse content in the current OpenAI format.') + '"openai" will parse content in the current OpenAI format.') parser.add_argument('--trust-remote-code', action='store_true', help='Trust remote code from huggingface.') From 75ed3e6e4982a86edfafd69896511274aaef1d8b Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Tue, 22 Oct 2024 12:32:11 -0400 Subject: [PATCH 31/43] Fix mypy and tests --- tests/entrypoints/test_chat_utils.py | 2 +- vllm/entrypoints/chat_utils.py | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index 76f2ffa6bdff9..47b410bdf9e20 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -15,7 +15,7 @@ PHI3V_MODEL_ID = "microsoft/Phi-3.5-vision-instruct" -@pytest.fixture(scope="module") +@pytest.fixture(scope="function") def phi3v_model_config(): return ModelConfig(PHI3V_MODEL_ID, task="generate", diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 20a4458c912d8..49cfe5b78c70c 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -121,7 +121,7 @@ class ConversationMessage(TypedDict, total=False): role: Required[str] """The role of the message's author.""" - content: Optional[str] + content: Union[Optional[str], List[Dict[str, str]]] """The contents of the message""" tool_call_id: Optional[str] @@ -523,19 +523,16 @@ def _parse_chat_message_content_parts( if has_image: role_content = [{'type': 'image'}] + role_content return [ConversationMessage(role=role, - content=role_content)] # type: ignore + content=role_content)] else: mm_placeholder_counts = mm_parser.mm_placeholder_counts() if mm_placeholder_counts: text_prompt = _get_full_multimodal_text_prompt( mm_placeholder_counts, text_prompt) if chat_template_text_format == "openai": - role_content = [ - ChatCompletionContentPartTextParam(type="text", - text=text_prompt) - ] + role_content = [{'type': 'text', 'text': text_prompt}] return [ConversationMessage(role=role, - content=role_content)] # type: ignore + content=role_content)] return [ConversationMessage(role=role, content=text_prompt)] From 3e55da82ecd1de81c2d9013837800ddd35d1f571 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Tue, 22 Oct 2024 13:42:40 -0400 Subject: [PATCH 32/43] Fix mypy --- vllm/entrypoints/openai/serving_chat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index c3fa0e44e5e8d..962f36ff294be 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -384,7 +384,7 @@ async def chat_completion_stream_generator( # Send response to echo the input portion of the # last message if request.echo or request.continue_final_message: - last_msg_content: str = "" + last_msg_content: Union[str, List[Dict[str, str]]] = "" if conversation and "content" in conversation[ -1] and conversation[-1].get("role") == role: last_msg_content = conversation[-1]["content"] or "" @@ -724,7 +724,7 @@ async def chat_completion_full_generator( choices.append(choice_data) if request.echo or request.continue_final_message: - last_msg_content = "" + last_msg_content: Union[str, List[Dict[str, str]]] = "" if conversation and "content" in conversation[-1] and conversation[ -1].get("role") == role: last_msg_content = conversation[-1]["content"] or "" From 7a01d53dcbe02d35bbc9cce41d14ecf0fa621b63 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Tue, 22 Oct 2024 14:04:14 -0400 Subject: [PATCH 33/43] Fix mypy again --- vllm/entrypoints/openai/serving_chat.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 962f36ff294be..752fd1ee2b806 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -728,6 +728,8 @@ async def chat_completion_full_generator( if conversation and "content" in conversation[-1] and conversation[ -1].get("role") == role: last_msg_content = conversation[-1]["content"] or "" + if isinstance(last_msg_content, list): + last_msg_content = "\n".join([msg['text'] for msg in last_msg_content]) for choice in choices: full_message = last_msg_content + (choice.message.content From bef9a2dd4661b4d4996c72980b032f367cf77277 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Tue, 22 Oct 2024 14:06:33 -0400 Subject: [PATCH 34/43] Fix formatting --- vllm/entrypoints/openai/serving_chat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 752fd1ee2b806..e3a1786e68102 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -729,7 +729,8 @@ async def chat_completion_full_generator( -1].get("role") == role: last_msg_content = conversation[-1]["content"] or "" if isinstance(last_msg_content, list): - last_msg_content = "\n".join([msg['text'] for msg in last_msg_content]) + last_msg_content = "\n".join( + [msg['text'] for msg in last_msg_content]) for choice in choices: full_message = last_msg_content + (choice.message.content From c0fc5c9697c0f9a6ac1c640d08a23f7be7adb4ae Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Tue, 22 Oct 2024 14:09:39 -0400 Subject: [PATCH 35/43] Fix formatting again --- vllm/entrypoints/chat_utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 49cfe5b78c70c..77e9ccd182002 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -522,8 +522,7 @@ def _parse_chat_message_content_parts( if has_image: role_content = [{'type': 'image'}] + role_content - return [ConversationMessage(role=role, - content=role_content)] + return [ConversationMessage(role=role, content=role_content)] else: mm_placeholder_counts = mm_parser.mm_placeholder_counts() if mm_placeholder_counts: @@ -531,8 +530,7 @@ def _parse_chat_message_content_parts( mm_placeholder_counts, text_prompt) if chat_template_text_format == "openai": role_content = [{'type': 'text', 'text': text_prompt}] - return [ConversationMessage(role=role, - content=role_content)] + return [ConversationMessage(role=role, content=role_content)] return [ConversationMessage(role=role, content=text_prompt)] From 50ba0ce69cce89755cc3e55d47a1fc4d24357cc0 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Wed, 23 Oct 2024 09:40:14 -0700 Subject: [PATCH 36/43] Add docs and str generator --- docs/source/serving/openai_compatible_server.md | 16 ++++++++++++++++ vllm/entrypoints/openai/serving_chat.py | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/docs/source/serving/openai_compatible_server.md b/docs/source/serving/openai_compatible_server.md index cc8e539a8a6d3..c33c75a3730b2 100644 --- a/docs/source/serving/openai_compatible_server.md +++ b/docs/source/serving/openai_compatible_server.md @@ -103,6 +103,22 @@ vllm serve --chat-template ./path-to-chat-template.jinja vLLM community provides a set of chat templates for popular models. You can find them in the examples directory [here](https://github.com/vllm-project/vllm/tree/main/examples/) +With the inclusion of multi-modal chat APIs, the OpenAI spec now accepts chat messages in a new format which specifies +both a `type` and a `text` field. An example is provided below: +```python +completion = client.chat.completions.create( + model="NousResearch/Meta-Llama-3-8B-Instruct", + messages=[ + {"role": "user", "content": [{"type": "text", "text": "Classify this sentiment: vLLM is wonderful!"}]} + ] +) +``` +Most chat templates for LLMs expect the `content` to be a `string` but there are some newer models like +`meta-llama/Llama-Guard-3-1B` that expect the content to be parsed with the new OpenAI spec. In order to choose which +format the content needs to be parsed in by vLLM, please use the `--chat-template-text-format` argument to specify +between `string` or `openai`. + + ## Command line arguments for the server ```{argparse} diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index e3a1786e68102..26b261780ca79 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -729,8 +729,8 @@ async def chat_completion_full_generator( -1].get("role") == role: last_msg_content = conversation[-1]["content"] or "" if isinstance(last_msg_content, list): - last_msg_content = "\n".join( - [msg['text'] for msg in last_msg_content]) + last_msg_content = "\n".join(msg['text'] + for msg in last_msg_content) for choice in choices: full_message = last_msg_content + (choice.message.content From 62e35bc617dc69c101bbc784854d3049401346f4 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Wed, 23 Oct 2024 09:55:43 -0700 Subject: [PATCH 37/43] Add bit more docs Signed-off-by: Vinay Damodaran --- docs/source/serving/openai_compatible_server.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/serving/openai_compatible_server.md b/docs/source/serving/openai_compatible_server.md index c33c75a3730b2..413c87ab28755 100644 --- a/docs/source/serving/openai_compatible_server.md +++ b/docs/source/serving/openai_compatible_server.md @@ -116,7 +116,8 @@ completion = client.chat.completions.create( Most chat templates for LLMs expect the `content` to be a `string` but there are some newer models like `meta-llama/Llama-Guard-3-1B` that expect the content to be parsed with the new OpenAI spec. In order to choose which format the content needs to be parsed in by vLLM, please use the `--chat-template-text-format` argument to specify -between `string` or `openai`. +between `string` or `openai`. The default value is `string` and vLLM internally converts both spec formats to match +this, unless explicitly specified. ## Command line arguments for the server From 0130c74f2633b61ba983bc240e31bf47dd9eac7e Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Wed, 23 Oct 2024 10:55:03 -0700 Subject: [PATCH 38/43] Fix formatting --- vllm/entrypoints/chat_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 6b92053e318b2..b16854b535571 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -553,7 +553,6 @@ def _parse_chat_message_content_part( raise NotImplementedError(f"Unknown part type: {part_type}") - # No need to validate using Pydantic again _AssistantParser = partial(cast, ChatCompletionAssistantMessageParam) _ToolParser = partial(cast, ChatCompletionToolMessageParam) From f40fbf9cadc898afd8811f4c1cdaa80347cb4fd3 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Wed, 23 Oct 2024 19:45:03 -0700 Subject: [PATCH 39/43] Simplify check with content parser Signed-off-by: Vinay Damodaran --- vllm/entrypoints/chat_utils.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 0dd0bfd743ed5..eaed693397cf9 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -431,7 +431,7 @@ def _get_full_multimodal_text_prompt(placeholder_counts: Dict[str, int], def _parse_chat_message_content_mm_part( part: ChatCompletionContentPartParam) -> Tuple[str, str]: """ - Parses a given multi modal content part based on its type. + Parses a given multi-modal content part based on its type. Args: part: A dict containing the content part, with a potential 'type' field. @@ -492,7 +492,7 @@ def _parse_chat_message_content_parts( mm_parser = mm_tracker.create_parser() keep_multimodal_content = \ mm_tracker._model_config.hf_config.model_type in \ - MODEL_KEEP_MULTI_MODAL_CONTENT + MODEL_KEEP_MULTI_MODAL_CONTENT or (chat_template_text_format == "openai") for part in parts: parse_res = _parse_chat_message_content_part( @@ -510,9 +510,6 @@ def _parse_chat_message_content_parts( if mm_placeholder_counts: text_prompt = _get_full_multimodal_text_prompt(mm_placeholder_counts, text_prompt) - if chat_template_text_format == "openai": - role_content = [{'type': 'text', 'text': text_prompt}] - return [ConversationMessage(role=role, content=role_content)] return [ConversationMessage(role=role, content=text_prompt)] From 1a772c773253fe0a09c333f19357a9da33767efc Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Wed, 23 Oct 2024 19:46:45 -0700 Subject: [PATCH 40/43] Fix ruff Signed-off-by: Vinay Damodaran --- vllm/entrypoints/chat_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index eaed693397cf9..54407063948dc 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -492,7 +492,8 @@ def _parse_chat_message_content_parts( mm_parser = mm_tracker.create_parser() keep_multimodal_content = \ mm_tracker._model_config.hf_config.model_type in \ - MODEL_KEEP_MULTI_MODAL_CONTENT or (chat_template_text_format == "openai") + MODEL_KEEP_MULTI_MODAL_CONTENT or \ + (chat_template_text_format == "openai") for part in parts: parse_res = _parse_chat_message_content_part( From fd61ada92c483d8648ee5336121cd5e3d8326fee Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Wed, 23 Oct 2024 19:51:06 -0700 Subject: [PATCH 41/43] Rename variable to be more appropriate Signed-off-by: Vinay Damodaran --- vllm/entrypoints/chat_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 54407063948dc..db8b80bb572a3 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -490,14 +490,14 @@ def _parse_chat_message_content_parts( content: List[Union[str, Dict[str, str]]] = [] mm_parser = mm_tracker.create_parser() - keep_multimodal_content = \ + wrap_dicts = \ mm_tracker._model_config.hf_config.model_type in \ MODEL_KEEP_MULTI_MODAL_CONTENT or \ (chat_template_text_format == "openai") for part in parts: parse_res = _parse_chat_message_content_part( - part, mm_parser, wrap_dicts=keep_multimodal_content) + part, mm_parser, wrap_dicts=wrap_dicts) if parse_res: content.append(parse_res) From a04d76d309309f746a31a66c4a6620124984114b Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Wed, 23 Oct 2024 19:52:22 -0700 Subject: [PATCH 42/43] Fix missing part Signed-off-by: Vinay Damodaran --- vllm/entrypoints/chat_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index db8b80bb572a3..6f3ee3bbc71a6 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -501,7 +501,7 @@ def _parse_chat_message_content_parts( if parse_res: content.append(parse_res) - if keep_multimodal_content: + if wrap_dicts: # Parsing wraps images and texts as interleaved dictionaries return [ConversationMessage(role=role, content=content)] # type: ignore From 1bb9faac4179e9106c4f6ad1347fafacca899523 Mon Sep 17 00:00:00 2001 From: Vinay Damodaran Date: Wed, 23 Oct 2024 19:57:19 -0700 Subject: [PATCH 43/43] Fix formatting Signed-off-by: Vinay Damodaran --- vllm/entrypoints/chat_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 6f3ee3bbc71a6..fef6a91414db6 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -497,7 +497,10 @@ def _parse_chat_message_content_parts( for part in parts: parse_res = _parse_chat_message_content_part( - part, mm_parser, wrap_dicts=wrap_dicts) + part, + mm_parser, + wrap_dicts=wrap_dicts, + ) if parse_res: content.append(parse_res)