From 2ea9d856027b9f187bc13965ecdb4dc9eefce968 Mon Sep 17 00:00:00 2001 From: "will.yang" Date: Tue, 22 Apr 2025 10:48:28 +0800 Subject: [PATCH 1/4] add reasoning content to ChatCompletion --- src/agents/models/chatcmpl_stream_handler.py | 75 +++++++++++++++++--- 1 file changed, 64 insertions(+), 11 deletions(-) diff --git a/src/agents/models/chatcmpl_stream_handler.py b/src/agents/models/chatcmpl_stream_handler.py index 32f04acb..f099d86a 100644 --- a/src/agents/models/chatcmpl_stream_handler.py +++ b/src/agents/models/chatcmpl_stream_handler.py @@ -48,6 +48,9 @@ async def handle_stream( usage: CompletionUsage | None = None state = StreamingState() + is_reasoning_model = False + emit_reasoning_content = False + emit_content = False async for chunk in stream: if not state.started: state.started = True @@ -62,9 +65,16 @@ async def handle_stream( continue delta = chunk.choices[0].delta + reasoning_content = None + content = None + if hasattr(delta, "reasoning_content"): + reasoning_content = delta.reasoning_content + is_reasoning_model = True + if hasattr(delta, "content"): + content = delta.content # Handle text - if delta.content: + if reasoning_content or content: if not state.text_content_index_and_output: # Initialize a content tracker for streaming text state.text_content_index_and_output = ( @@ -100,16 +110,59 @@ async def handle_stream( ), type="response.content_part.added", ) - # Emit the delta for this segment of content - yield ResponseTextDeltaEvent( - content_index=state.text_content_index_and_output[0], - delta=delta.content, - item_id=FAKE_RESPONSES_ID, - output_index=0, - type="response.output_text.delta", - ) - # Accumulate the text into the response part - state.text_content_index_and_output[1].text += delta.content + + if reasoning_content is not None: + if not emit_reasoning_content: + emit_reasoning_content = True + + reasoning_content_title = "# reasoning content\n\n" + # Emit the reasoning content title + yield ResponseTextDeltaEvent( + content_index=state.text_content_index_and_output[0], + delta=reasoning_content_title, + item_id=FAKE_RESPONSES_ID, + output_index=0, + type="response.output_text.delta", + ) + # Accumulate the text into the response part + state.text_content_index_and_output[1].text += reasoning_content_title + + # Emit the delta for this segment of content + yield ResponseTextDeltaEvent( + content_index=state.text_content_index_and_output[0], + delta=reasoning_content, + item_id=FAKE_RESPONSES_ID, + output_index=0, + type="response.output_text.delta", + ) + # Accumulate the text into the response part + state.text_content_index_and_output[1].text += reasoning_content + + if content is not None: + if not emit_content and is_reasoning_model: + emit_content = True + content_title = "\n\n# content\n\n" + # Emit the content title + yield ResponseTextDeltaEvent( + content_index=state.text_content_index_and_output[0], + delta=content_title, + item_id=FAKE_RESPONSES_ID, + output_index=0, + type="response.output_text.delta", + ) + # Accumulate the text into the response part + state.text_content_index_and_output[1].text += content_title + + # Emit the delta for this segment of content + yield ResponseTextDeltaEvent( + content_index=state.text_content_index_and_output[0], + delta=content, + item_id=FAKE_RESPONSES_ID, + output_index=0, + type="response.output_text.delta", + ) + # Accumulate the text into the response part + state.text_content_index_and_output[1].text += content # Handle refusals (model declines to answer) if delta.refusal: From a07ccda5db3f518bda2c89f6e58493d79d2304e0 Mon Sep 17 00:00:00 2001 From: "will.yang" Date: Wed, 23 Apr 2025 22:51:41 +0800 Subject: [PATCH 2/4] add test for reasoning content --- tests/test_openai_chatcompletions_stream.py | 97 +++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/tests/test_openai_chatcompletions_stream.py b/tests/test_openai_chatcompletions_stream.py index b82f2430..2549cd49 100644 --- a/tests/test_openai_chatcompletions_stream.py +++ b/tests/test_openai_chatcompletions_stream.py @@ -114,6 +114,103 @@ async def patched_fetch_response(self, *args, **kwargs): assert completed_resp.usage.total_tokens == 12 +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_stream_response_yields_events_for_reasoning_content(monkeypatch) -> None: + """ + Validate that `stream_response` emits the correct sequence of events when + using reasoning model. + """ + delta1 = ChoiceDelta(content=None) + delta1.reasoning_content = "Okay" + chunk1 = ChatCompletionChunk( + id="chunk-id", + created=1, + model="fake", + object="chat.completion.chunk", + choices=[Choice(index=0, delta=delta1)], + ) + + chunk2 = ChatCompletionChunk( + id="chunk-id", + created=1, + model="fake", + object="chat.completion.chunk", + choices=[Choice(index=0, delta=ChoiceDelta(content="He"))], + ) + chunk3 = ChatCompletionChunk( + id="chunk-id", + created=1, + model="fake", + object="chat.completion.chunk", + choices=[Choice(index=0, delta=ChoiceDelta(content="llo"))], + ) + async def fake_stream() -> AsyncIterator[ChatCompletionChunk]: + for c in (chunk1, chunk2, chunk3): + yield c + + # Patch _fetch_response to inject our fake stream + async def patched_fetch_response(self, *args, **kwargs): + # `_fetch_response` is expected to return a Response skeleton and the async stream + resp = Response( + id="resp-id", + created_at=0, + model="fake-model", + object="response", + output=[], + tool_choice="none", + tools=[], + parallel_tool_calls=False, + ) + return resp, fake_stream() + + monkeypatch.setattr(OpenAIChatCompletionsModel, "_fetch_response", patched_fetch_response) + model = OpenAIProvider(use_responses=False).get_model("gpt-4") + output_events = [] + async for event in model.stream_response( + system_instructions=None, + input="", + model_settings=ModelSettings(), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + previous_response_id=None, + ): + output_events.append(event) + # We expect a response.created, then a response.output_item.added, content part added, + # two content delta events (for "He" and "llo"), a content part done, the assistant message + # output_item.done, and finally response.completed. + # There should be 8 events in total. + assert len(output_events) == 11 + # First event indicates creation. + assert output_events[0].type == "response.created" + # The output item added and content part added events should mark the assistant message. + assert output_events[1].type == "response.output_item.added" + assert output_events[2].type == "response.content_part.added" + # Two text delta events. + assert output_events[3].type == "response.output_text.delta" + assert output_events[3].delta == "# reasoning content\n\n" + assert output_events[4].type == "response.output_text.delta" + assert output_events[4].delta == "Okay" + assert output_events[5].type == "response.output_text.delta" + assert output_events[5].delta == "\n\n# content\n\n" + assert output_events[6].type == "response.output_text.delta" + assert output_events[6].delta == "He" + assert output_events[7].type == "response.output_text.delta" + assert output_events[7].delta == "llo" + # After streaming, the content part and item should be marked done. + assert output_events[8].type == "response.content_part.done" + assert output_events[9].type == "response.output_item.done" + # Last event indicates completion of the stream. + assert output_events[10].type == "response.completed" + # The completed response should have one output message with full text. + completed_resp = output_events[10].response + assert isinstance(completed_resp.output[0], ResponseOutputMessage) + assert isinstance(completed_resp.output[0].content[0], ResponseOutputText) + assert completed_resp.output[0].content[0].text == "# reasoning content\n\nOkay\n\n# content\n\nHello" + + @pytest.mark.allow_call_model_methods @pytest.mark.asyncio async def test_stream_response_yields_events_for_refusal_content(monkeypatch) -> None: From e3c908a49c7b07a4476c64a83dc9ab75f8674ff7 Mon Sep 17 00:00:00 2001 From: "will.yang" Date: Wed, 23 Apr 2025 23:05:48 +0800 Subject: [PATCH 3/4] fix mypy --- tests/test_openai_chatcompletions_stream.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_openai_chatcompletions_stream.py b/tests/test_openai_chatcompletions_stream.py index 2549cd49..5e865274 100644 --- a/tests/test_openai_chatcompletions_stream.py +++ b/tests/test_openai_chatcompletions_stream.py @@ -122,7 +122,7 @@ async def test_stream_response_yields_events_for_reasoning_content(monkeypatch) using reasoning model. """ delta1 = ChoiceDelta(content=None) - delta1.reasoning_content = "Okay" + setattr(delta1, "reasoning_content", "Okay") chunk1 = ChatCompletionChunk( id="chunk-id", created=1, @@ -208,7 +208,8 @@ async def patched_fetch_response(self, *args, **kwargs): completed_resp = output_events[10].response assert isinstance(completed_resp.output[0], ResponseOutputMessage) assert isinstance(completed_resp.output[0].content[0], ResponseOutputText) - assert completed_resp.output[0].content[0].text == "# reasoning content\n\nOkay\n\n# content\n\nHello" + assert (completed_resp.output[0].content[0].text == + "# reasoning content\n\nOkay\n\n# content\n\nHello") @pytest.mark.allow_call_model_methods From 94aa8a2d28c7eb68694e195e8355ddcd8f7d38f2 Mon Sep 17 00:00:00 2001 From: "will.yang" Date: Wed, 23 Apr 2025 23:21:07 +0800 Subject: [PATCH 4/4] ignore B010 --- tests/test_openai_chatcompletions_stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_openai_chatcompletions_stream.py b/tests/test_openai_chatcompletions_stream.py index 5e865274..a321d98f 100644 --- a/tests/test_openai_chatcompletions_stream.py +++ b/tests/test_openai_chatcompletions_stream.py @@ -122,7 +122,7 @@ async def test_stream_response_yields_events_for_reasoning_content(monkeypatch) using reasoning model. """ delta1 = ChoiceDelta(content=None) - setattr(delta1, "reasoning_content", "Okay") + setattr(delta1, "reasoning_content", "Okay") # noqa: B010 chunk1 = ChatCompletionChunk( id="chunk-id", created=1,