@@ -502,12 +502,18 @@ async def create_chat_completion(request: CustomChatCompletionRequest):
502502 task_usage = UsageInfo .parse_obj (content ["usage" ])
503503 for usage_key , usage_value in task_usage .dict ().items ():
504504 setattr (usage , usage_key , getattr (usage , usage_key ) + usage_value )
505- print (666 , choices [0 ].message .tool_calls , type (choices [0 ].message .tool_calls ))
506505 return CustomChatCompletionResponse (
507506 model = request .model , choices = choices , usage = usage
508507 )
509508
510509
510+ from gpt_server .openai_api_protocol .custom_api_protocol import (
511+ CustomChatCompletionStreamResponse ,
512+ CustomChatCompletionResponseStreamChoice ,
513+ CustomDeltaMessage ,
514+ )
515+
516+
511517async def chat_completion_stream_generator (
512518 model_name : str , gen_params : Dict [str , Any ], n : int , worker_addr : str
513519) -> Generator [str , Any , None ]:
@@ -542,21 +548,28 @@ async def chat_completion_stream_generator(
542548 if len (decoded_unicode ) > len (previous_text )
543549 else previous_text
544550 )
545-
546551 if len (delta_text ) == 0 :
547552 delta_text = None
548- choice_data = ChatCompletionResponseStreamChoice (
553+ choice_data = CustomChatCompletionResponseStreamChoice (
549554 index = i ,
550- delta = DeltaMessage (content = delta_text ),
555+ delta = CustomDeltaMessage (
556+ role = "assistant" ,
557+ content = delta_text ,
558+ tool_calls = content .get ("tool_calls" , None ),
559+ ),
551560 finish_reason = content .get ("finish_reason" , None ),
552561 )
553- chunk = ChatCompletionStreamResponse (
554- id = id , choices = [choice_data ], model = model_name
562+
563+ chunk = CustomChatCompletionStreamResponse (
564+ id = id ,
565+ choices = [choice_data ],
566+ model = model_name ,
567+ usage = content .get ("usage" , None ),
555568 )
556- if delta_text is None :
557- if content .get ("finish_reason" , None ) is not None :
558- finish_stream_events .append (chunk )
559- continue
569+ # if delta_text is None:
570+ # if content.get("finish_reason", None) is not None:
571+ # finish_stream_events.append(chunk)
572+ # continue
560573 yield f"data: { chunk .model_dump_json (exclude_unset = True )} \n \n "
561574 # There is not "content" field in the last delta message, so exclude_none to exclude field "content".
562575 for finish_chunk in finish_stream_events :
@@ -585,7 +598,6 @@ async def create_completion(request: CompletionRequest):
585598
586599 if isinstance (max_tokens , int ) and max_tokens < request .max_tokens :
587600 request .max_tokens = max_tokens
588-
589601 if request .stream :
590602 generator = generate_completion_stream_generator (
591603 request , request .n , worker_addr
@@ -703,7 +715,6 @@ async def generate_completion_stream_generator(
703715
704716
705717async def generate_completion_stream (payload : Dict [str , Any ], worker_addr : str ):
706- controller_address = app_settings .controller_address
707718 async with httpx .AsyncClient () as client :
708719 delimiter = b"\0 "
709720 async with client .stream (
0 commit comments