Skip to content

Commit 5001a58

Browse files
committed
update
1 parent 6824250 commit 5001a58

File tree

1 file changed

+23
-12
lines changed

1 file changed

+23
-12
lines changed

gpt_server/serving/openai_api_server.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -502,12 +502,18 @@ async def create_chat_completion(request: CustomChatCompletionRequest):
502502
task_usage = UsageInfo.parse_obj(content["usage"])
503503
for usage_key, usage_value in task_usage.dict().items():
504504
setattr(usage, usage_key, getattr(usage, usage_key) + usage_value)
505-
print(666, choices[0].message.tool_calls, type(choices[0].message.tool_calls))
506505
return CustomChatCompletionResponse(
507506
model=request.model, choices=choices, usage=usage
508507
)
509508

510509

510+
from gpt_server.openai_api_protocol.custom_api_protocol import (
511+
CustomChatCompletionStreamResponse,
512+
CustomChatCompletionResponseStreamChoice,
513+
CustomDeltaMessage,
514+
)
515+
516+
511517
async def chat_completion_stream_generator(
512518
model_name: str, gen_params: Dict[str, Any], n: int, worker_addr: str
513519
) -> Generator[str, Any, None]:
@@ -542,21 +548,28 @@ async def chat_completion_stream_generator(
542548
if len(decoded_unicode) > len(previous_text)
543549
else previous_text
544550
)
545-
546551
if len(delta_text) == 0:
547552
delta_text = None
548-
choice_data = ChatCompletionResponseStreamChoice(
553+
choice_data = CustomChatCompletionResponseStreamChoice(
549554
index=i,
550-
delta=DeltaMessage(content=delta_text),
555+
delta=CustomDeltaMessage(
556+
role="assistant",
557+
content=delta_text,
558+
tool_calls=content.get("tool_calls", None),
559+
),
551560
finish_reason=content.get("finish_reason", None),
552561
)
553-
chunk = ChatCompletionStreamResponse(
554-
id=id, choices=[choice_data], model=model_name
562+
563+
chunk = CustomChatCompletionStreamResponse(
564+
id=id,
565+
choices=[choice_data],
566+
model=model_name,
567+
usage=content.get("usage", None),
555568
)
556-
if delta_text is None:
557-
if content.get("finish_reason", None) is not None:
558-
finish_stream_events.append(chunk)
559-
continue
569+
# if delta_text is None:
570+
# if content.get("finish_reason", None) is not None:
571+
# finish_stream_events.append(chunk)
572+
# continue
560573
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
561574
# There is not "content" field in the last delta message, so exclude_none to exclude field "content".
562575
for finish_chunk in finish_stream_events:
@@ -585,7 +598,6 @@ async def create_completion(request: CompletionRequest):
585598

586599
if isinstance(max_tokens, int) and max_tokens < request.max_tokens:
587600
request.max_tokens = max_tokens
588-
589601
if request.stream:
590602
generator = generate_completion_stream_generator(
591603
request, request.n, worker_addr
@@ -703,7 +715,6 @@ async def generate_completion_stream_generator(
703715

704716

705717
async def generate_completion_stream(payload: Dict[str, Any], worker_addr: str):
706-
controller_address = app_settings.controller_address
707718
async with httpx.AsyncClient() as client:
708719
delimiter = b"\0"
709720
async with client.stream(

0 commit comments

Comments
 (0)