From 37d5b7e39948c2cf5d74d7eff3c071328d3d11d5 Mon Sep 17 00:00:00 2001 From: aisensiy Date: Thu, 12 Oct 2023 17:32:37 +0800 Subject: [PATCH 01/11] Use session id from gradio state --- lmdeploy/serve/gradio/app.py | 42 ++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/lmdeploy/serve/gradio/app.py b/lmdeploy/serve/gradio/app.py index 71db7a2749..0023a78a29 100644 --- a/lmdeploy/serve/gradio/app.py +++ b/lmdeploy/serve/gradio/app.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import os +import random import threading import time from functools import partial @@ -336,7 +337,7 @@ async def chat_stream_local( state_chatbot: Sequence, cancel_btn: gr.Button, reset_btn: gr.Button, - request: gr.Request, + session_id: int, ): """Chat with AI assistant. @@ -345,14 +346,10 @@ async def chat_stream_local( state_chatbot (Sequence): the chatting history request (gr.Request): the request from a user """ - session_id = threading.current_thread().ident - if request is not None: - session_id = ip2id(request.kwargs['client']['host']) - bot_summarized_response = '' state_chatbot = state_chatbot + [(instruction, None)] yield (state_chatbot, state_chatbot, disable_btn, enable_btn, - f'{bot_summarized_response}'.strip()) + session_id) async for outputs in InterFace.async_engine.generate( instruction, @@ -373,14 +370,15 @@ async def chat_stream_local( state_chatbot[-1][1] + response ) # piece by piece yield (state_chatbot, state_chatbot, enable_btn, disable_btn, - f'{bot_summarized_response}'.strip()) + session_id) yield (state_chatbot, state_chatbot, disable_btn, enable_btn, - f'{bot_summarized_response}'.strip()) + session_id) async def reset_local_func(instruction_txtbox: gr.Textbox, - state_chatbot: gr.State, request: gr.Request): + state_chatbot: gr.State, + session_id: int): """reset the session. Args: @@ -389,10 +387,6 @@ async def reset_local_func(instruction_txtbox: gr.Textbox, request (gr.Request): the request from a user """ state_chatbot = [] - - session_id = threading.current_thread().ident - if request is not None: - session_id = ip2id(request.kwargs['client']['host']) # end the session async for out in InterFace.async_engine.generate('', session_id, @@ -406,11 +400,12 @@ async def reset_local_func(instruction_txtbox: gr.Textbox, state_chatbot, state_chatbot, gr.Textbox.update(value=''), + session_id ) async def cancel_local_func(state_chatbot: gr.State, cancel_btn: gr.Button, - reset_btn: gr.Button, request: gr.Request): + reset_btn: gr.Button, session_id: int): """stop the session. Args: @@ -418,9 +413,6 @@ async def cancel_local_func(state_chatbot: gr.State, cancel_btn: gr.Button, state_chatbot (Sequence): the chatting history request (gr.Request): the request from a user """ - session_id = threading.current_thread().ident - if request is not None: - session_id = ip2id(request.kwargs['client']['host']) # end the session async for out in InterFace.async_engine.generate('', session_id, @@ -442,7 +434,7 @@ async def cancel_local_func(state_chatbot: gr.State, cancel_btn: gr.Button, sequence_start=True, sequence_end=False): pass - return (state_chatbot, disable_btn, enable_btn) + return (state_chatbot, disable_btn, enable_btn, session_id) def run_local(model_path: str, @@ -465,6 +457,7 @@ def run_local(model_path: str, with gr.Blocks(css=CSS, theme=THEME) as demo: state_chatbot = gr.State([]) + session_id = gr.State(random.randint(0,100000)) with gr.Column(elem_id='container'): gr.Markdown('## LMDeploy Playground') @@ -481,20 +474,21 @@ def run_local(model_path: str, send_event = instruction_txtbox.submit( chat_stream_local, - [instruction_txtbox, state_chatbot, cancel_btn, reset_btn], - [state_chatbot, chatbot, cancel_btn, reset_btn]) + [instruction_txtbox, state_chatbot, cancel_btn, reset_btn, session_id], + [state_chatbot, chatbot, cancel_btn, reset_btn, session_id]) instruction_txtbox.submit( lambda: gr.Textbox.update(value=''), [], [instruction_txtbox], ) cancel_btn.click(cancel_local_func, - [state_chatbot, cancel_btn, reset_btn], - [state_chatbot, cancel_btn, reset_btn], + [state_chatbot, cancel_btn, reset_btn, session_id], + [state_chatbot, cancel_btn, reset_btn, session_id], cancels=[send_event]) - reset_btn.click(reset_local_func, [instruction_txtbox, state_chatbot], - [state_chatbot, chatbot, instruction_txtbox], + reset_btn.click(reset_local_func, + [instruction_txtbox, state_chatbot, session_id], + [state_chatbot, chatbot, instruction_txtbox, session_id], cancels=[send_event]) print(f'server is gonna mount on: http://{server_name}:{server_port}') From 4931e68f661223079899016911533f952766e420 Mon Sep 17 00:00:00 2001 From: aisensiy Date: Thu, 12 Oct 2023 22:53:35 +0800 Subject: [PATCH 02/11] use a new session id after reset --- lmdeploy/serve/gradio/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lmdeploy/serve/gradio/app.py b/lmdeploy/serve/gradio/app.py index 0023a78a29..0c61254e33 100644 --- a/lmdeploy/serve/gradio/app.py +++ b/lmdeploy/serve/gradio/app.py @@ -400,7 +400,7 @@ async def reset_local_func(instruction_txtbox: gr.Textbox, state_chatbot, state_chatbot, gr.Textbox.update(value=''), - session_id + random.randint(0, 100000), ) From d0df11af580f50fb7f2f96073f3acd3dd6fb20ae Mon Sep 17 00:00:00 2001 From: aisensiy Date: Thu, 12 Oct 2023 22:54:01 +0800 Subject: [PATCH 03/11] rename session id like a state --- lmdeploy/serve/gradio/app.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lmdeploy/serve/gradio/app.py b/lmdeploy/serve/gradio/app.py index 0c61254e33..c1569ca00a 100644 --- a/lmdeploy/serve/gradio/app.py +++ b/lmdeploy/serve/gradio/app.py @@ -457,7 +457,7 @@ def run_local(model_path: str, with gr.Blocks(css=CSS, theme=THEME) as demo: state_chatbot = gr.State([]) - session_id = gr.State(random.randint(0,100000)) + state_session_id = gr.State(random.randint(0,100000)) with gr.Column(elem_id='container'): gr.Markdown('## LMDeploy Playground') @@ -474,21 +474,21 @@ def run_local(model_path: str, send_event = instruction_txtbox.submit( chat_stream_local, - [instruction_txtbox, state_chatbot, cancel_btn, reset_btn, session_id], - [state_chatbot, chatbot, cancel_btn, reset_btn, session_id]) + [instruction_txtbox, state_chatbot, cancel_btn, reset_btn, state_session_id], + [state_chatbot, chatbot, cancel_btn, reset_btn, state_session_id]) instruction_txtbox.submit( lambda: gr.Textbox.update(value=''), [], [instruction_txtbox], ) cancel_btn.click(cancel_local_func, - [state_chatbot, cancel_btn, reset_btn, session_id], - [state_chatbot, cancel_btn, reset_btn, session_id], + [state_chatbot, cancel_btn, reset_btn, state_session_id], + [state_chatbot, cancel_btn, reset_btn, state_session_id], cancels=[send_event]) reset_btn.click(reset_local_func, - [instruction_txtbox, state_chatbot, session_id], - [state_chatbot, chatbot, instruction_txtbox, session_id], + [instruction_txtbox, state_chatbot, state_session_id], + [state_chatbot, chatbot, instruction_txtbox, state_session_id], cancels=[send_event]) print(f'server is gonna mount on: http://{server_name}:{server_port}') From e084725abb834d22d90ba4d1f32c99b8ca0b3da1 Mon Sep 17 00:00:00 2001 From: aisensiy Date: Thu, 12 Oct 2023 23:02:24 +0800 Subject: [PATCH 04/11] update comments --- lmdeploy/serve/gradio/app.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lmdeploy/serve/gradio/app.py b/lmdeploy/serve/gradio/app.py index c1569ca00a..4e46fd4816 100644 --- a/lmdeploy/serve/gradio/app.py +++ b/lmdeploy/serve/gradio/app.py @@ -344,7 +344,9 @@ async def chat_stream_local( Args: instruction (str): user's prompt state_chatbot (Sequence): the chatting history - request (gr.Request): the request from a user + cancel_btn (gr.Button): the cancel button + reset_btn (gr.Button): the reset button + session_id (int): the session id """ state_chatbot = state_chatbot + [(instruction, None)] @@ -384,7 +386,7 @@ async def reset_local_func(instruction_txtbox: gr.Textbox, Args: instruction_txtbox (str): user's prompt state_chatbot (Sequence): the chatting history - request (gr.Request): the request from a user + session_id (int): the session id """ state_chatbot = [] # end the session @@ -404,14 +406,16 @@ async def reset_local_func(instruction_txtbox: gr.Textbox, ) -async def cancel_local_func(state_chatbot: gr.State, cancel_btn: gr.Button, +async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button, reset_btn: gr.Button, session_id: int): """stop the session. Args: instruction_txtbox (str): user's prompt state_chatbot (Sequence): the chatting history - request (gr.Request): the request from a user + cancel_btn (gr.Button): the cancel button + reset_btn (gr.Button): the reset button + session_id (int): the session id """ # end the session async for out in InterFace.async_engine.generate('', From 67ec14511d063f905ec61ad63709fbf885739bf6 Mon Sep 17 00:00:00 2001 From: aisensiy Date: Fri, 13 Oct 2023 10:45:59 +0800 Subject: [PATCH 05/11] reformat files --- lmdeploy/serve/gradio/app.py | 37 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/lmdeploy/serve/gradio/app.py b/lmdeploy/serve/gradio/app.py index 4e46fd4816..67855b34d1 100644 --- a/lmdeploy/serve/gradio/app.py +++ b/lmdeploy/serve/gradio/app.py @@ -350,8 +350,7 @@ async def chat_stream_local( """ state_chatbot = state_chatbot + [(instruction, None)] - yield (state_chatbot, state_chatbot, disable_btn, enable_btn, - session_id) + yield (state_chatbot, state_chatbot, disable_btn, enable_btn, session_id) async for outputs in InterFace.async_engine.generate( instruction, @@ -374,13 +373,11 @@ async def chat_stream_local( yield (state_chatbot, state_chatbot, enable_btn, disable_btn, session_id) - yield (state_chatbot, state_chatbot, disable_btn, enable_btn, - session_id) + yield (state_chatbot, state_chatbot, disable_btn, enable_btn, session_id) async def reset_local_func(instruction_txtbox: gr.Textbox, - state_chatbot: gr.State, - session_id: int): + state_chatbot: gr.State, session_id: int): """reset the session. Args: @@ -461,7 +458,7 @@ def run_local(model_path: str, with gr.Blocks(css=CSS, theme=THEME) as demo: state_chatbot = gr.State([]) - state_session_id = gr.State(random.randint(0,100000)) + state_session_id = gr.State(random.randint(0, 100000)) with gr.Column(elem_id='container'): gr.Markdown('## LMDeploy Playground') @@ -476,24 +473,26 @@ def run_local(model_path: str, cancel_btn = gr.Button(value='Cancel', interactive=False) reset_btn = gr.Button(value='Reset') - send_event = instruction_txtbox.submit( - chat_stream_local, - [instruction_txtbox, state_chatbot, cancel_btn, reset_btn, state_session_id], - [state_chatbot, chatbot, cancel_btn, reset_btn, state_session_id]) + send_event = instruction_txtbox.submit(chat_stream_local, [ + instruction_txtbox, state_chatbot, cancel_btn, reset_btn, + state_session_id + ], [state_chatbot, chatbot, cancel_btn, reset_btn, state_session_id]) instruction_txtbox.submit( lambda: gr.Textbox.update(value=''), [], [instruction_txtbox], ) - cancel_btn.click(cancel_local_func, - [state_chatbot, cancel_btn, reset_btn, state_session_id], - [state_chatbot, cancel_btn, reset_btn, state_session_id], - cancels=[send_event]) + cancel_btn.click( + cancel_local_func, + [state_chatbot, cancel_btn, reset_btn, state_session_id], + [state_chatbot, cancel_btn, reset_btn, state_session_id], + cancels=[send_event]) - reset_btn.click(reset_local_func, - [instruction_txtbox, state_chatbot, state_session_id], - [state_chatbot, chatbot, instruction_txtbox, state_session_id], - cancels=[send_event]) + reset_btn.click( + reset_local_func, + [instruction_txtbox, state_chatbot, state_session_id], + [state_chatbot, chatbot, instruction_txtbox, state_session_id], + cancels=[send_event]) print(f'server is gonna mount on: http://{server_name}:{server_port}') demo.queue(concurrency_count=batch_size, max_size=100, From c2d22efc1c9872daf18eee41429399e811329f79 Mon Sep 17 00:00:00 2001 From: aisensiy Date: Mon, 16 Oct 2023 14:28:19 +0800 Subject: [PATCH 06/11] init session id on block loaded --- lmdeploy/serve/gradio/app.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/lmdeploy/serve/gradio/app.py b/lmdeploy/serve/gradio/app.py index 67855b34d1..33e18bb541 100644 --- a/lmdeploy/serve/gradio/app.py +++ b/lmdeploy/serve/gradio/app.py @@ -395,11 +395,13 @@ async def reset_local_func(instruction_txtbox: gr.Textbox, sequence_end=True): pass + new_session_id = random.randint(0, 100000) return ( state_chatbot, state_chatbot, gr.Textbox.update(value=''), - random.randint(0, 100000), + new_session_id, + new_session_id, ) @@ -458,11 +460,13 @@ def run_local(model_path: str, with gr.Blocks(css=CSS, theme=THEME) as demo: state_chatbot = gr.State([]) - state_session_id = gr.State(random.randint(0, 100000)) + state_session_id = gr.State(-1) with gr.Column(elem_id='container'): gr.Markdown('## LMDeploy Playground') + session_id_box = gr.Number(label='Session ID') + chatbot = gr.Chatbot( elem_id='chatbot', label=InterFace.async_engine.tm_model.model_name) @@ -488,11 +492,21 @@ def run_local(model_path: str, [state_chatbot, cancel_btn, reset_btn, state_session_id], cancels=[send_event]) - reset_btn.click( - reset_local_func, - [instruction_txtbox, state_chatbot, state_session_id], - [state_chatbot, chatbot, instruction_txtbox, state_session_id], - cancels=[send_event]) + reset_btn.click(reset_local_func, + [instruction_txtbox, state_chatbot, state_session_id], + [ + state_chatbot, chatbot, instruction_txtbox, + session_id_box, state_session_id + ], + cancels=[send_event]) + + def init(): + new_session_id = random.randint(0, 100000) + return [new_session_id, new_session_id] + + demo.load(init, + inputs=None, + outputs=[state_session_id, session_id_box]) print(f'server is gonna mount on: http://{server_name}:{server_port}') demo.queue(concurrency_count=batch_size, max_size=100, From af8c449ed5e5e5c763c8b4ff23f0cb7cb69ff43c Mon Sep 17 00:00:00 2001 From: aisensiy Date: Fri, 20 Oct 2023 14:20:36 +0800 Subject: [PATCH 07/11] use auto increased session id --- lmdeploy/serve/gradio/app.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/lmdeploy/serve/gradio/app.py b/lmdeploy/serve/gradio/app.py index 33e18bb541..bc58cea5b9 100644 --- a/lmdeploy/serve/gradio/app.py +++ b/lmdeploy/serve/gradio/app.py @@ -1,6 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. import os -import random import threading import time from functools import partial @@ -25,6 +24,13 @@ disable_btn = gr.Button.update(interactive=False) +# a IO interface mananing variables +class InterFace: + async_engine: AsyncEngine = None # for run_local + restful_api_url: str = None # for run_restful + global_session_id: int = 0 + + def chat_stream(state_chatbot: Sequence, llama_chatbot: Chatbot, request: gr.Request): """Chat with AI assistant. @@ -146,12 +152,6 @@ def run_server(triton_server_addr: str, ) -# a IO interface mananing variables -class InterFace: - async_engine: AsyncEngine = None # for run_local - restful_api_url: str = None # for run_restful - - def chat_stream_restful( instruction: str, state_chatbot: Sequence, @@ -377,7 +377,7 @@ async def chat_stream_local( async def reset_local_func(instruction_txtbox: gr.Textbox, - state_chatbot: gr.State, session_id: int): + state_chatbot: Sequence, session_id: int): """reset the session. Args: @@ -394,8 +394,8 @@ async def reset_local_func(instruction_txtbox: gr.Textbox, sequence_start=False, sequence_end=True): pass - - new_session_id = random.randint(0, 100000) + InterFace.global_session_id += 1 + new_session_id = InterFace.global_session_id return ( state_chatbot, state_chatbot, @@ -501,7 +501,8 @@ def run_local(model_path: str, cancels=[send_event]) def init(): - new_session_id = random.randint(0, 100000) + InterFace.global_session_id += 1 + new_session_id = InterFace.global_session_id return [new_session_id, new_session_id] demo.load(init, From f62b0c5867aad9ac25d83350d8488558fd50317b Mon Sep 17 00:00:00 2001 From: aisensiy Date: Sat, 4 Nov 2023 00:25:14 +0800 Subject: [PATCH 08/11] remove session id textbox --- lmdeploy/serve/gradio/turbomind_coupled.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/lmdeploy/serve/gradio/turbomind_coupled.py b/lmdeploy/serve/gradio/turbomind_coupled.py index 8aed8eb737..7bff73eb6a 100644 --- a/lmdeploy/serve/gradio/turbomind_coupled.py +++ b/lmdeploy/serve/gradio/turbomind_coupled.py @@ -146,8 +146,6 @@ def run_local(model_path: str, with gr.Column(elem_id='container'): gr.Markdown('## LMDeploy Playground') - session_id_box = gr.Number(label='Session ID') - chatbot = gr.Chatbot( elem_id='chatbot', label=InterFace.async_engine.tm_model.model_name) @@ -173,22 +171,18 @@ def run_local(model_path: str, [state_chatbot, cancel_btn, reset_btn, state_session_id], cancels=[send_event]) - reset_btn.click(reset_local_func, - [instruction_txtbox, state_chatbot, state_session_id], - [ - state_chatbot, chatbot, instruction_txtbox, - session_id_box, state_session_id - ], - cancels=[send_event]) + reset_btn.click( + reset_local_func, + [instruction_txtbox, state_chatbot, state_session_id], + [state_chatbot, chatbot, instruction_txtbox, state_session_id], + cancels=[send_event]) def init(): InterFace.global_session_id += 1 new_session_id = InterFace.global_session_id return [new_session_id, new_session_id] - demo.load(init, - inputs=None, - outputs=[state_session_id, session_id_box]) + demo.load(init, inputs=None, outputs=[state_session_id]) print(f'server is gonna mount on: http://{server_name}:{server_port}') demo.queue(concurrency_count=batch_size, max_size=100, From b4529b05a82bab532792ce975f42277eae0276ff Mon Sep 17 00:00:00 2001 From: AllentDan Date: Mon, 6 Nov 2023 10:30:39 +0800 Subject: [PATCH 09/11] apply to api_server and tritonserver --- lmdeploy/serve/gradio/api_server_backend.py | 64 ++++++++----------- .../serve/gradio/triton_server_backend.py | 28 ++++---- lmdeploy/serve/gradio/turbomind_coupled.py | 38 ++++------- 3 files changed, 58 insertions(+), 72 deletions(-) diff --git a/lmdeploy/serve/gradio/api_server_backend.py b/lmdeploy/serve/gradio/api_server_backend.py index ce64508795..1c397799e1 100644 --- a/lmdeploy/serve/gradio/api_server_backend.py +++ b/lmdeploy/serve/gradio/api_server_backend.py @@ -1,5 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -import threading import time from typing import Sequence @@ -8,20 +7,16 @@ from lmdeploy.serve.gradio.constants import CSS, THEME, disable_btn, enable_btn from lmdeploy.serve.openai.api_client import (get_model_list, get_streaming_response) -from lmdeploy.serve.openai.api_server import ip2id class InterFace: api_server_url: str = None + global_session_id: int = 0 -def chat_stream_restful( - instruction: str, - state_chatbot: Sequence, - cancel_btn: gr.Button, - reset_btn: gr.Button, - request: gr.Request, -): +def chat_stream_restful(instruction: str, state_chatbot: Sequence, + cancel_btn: gr.Button, reset_btn: gr.Button, + session_id: int): """Chat with AI assistant. Args: @@ -29,14 +24,9 @@ def chat_stream_restful( state_chatbot (Sequence): the chatting history request (gr.Request): the request from a user """ - session_id = threading.current_thread().ident - if request is not None: - session_id = ip2id(request.kwargs['client']['host']) - bot_summarized_response = '' state_chatbot = state_chatbot + [(instruction, None)] - yield (state_chatbot, state_chatbot, disable_btn, enable_btn, - f'{bot_summarized_response}'.strip()) + yield (state_chatbot, state_chatbot, disable_btn, enable_btn) for response, tokens, finish_reason in get_streaming_response( instruction, @@ -56,15 +46,13 @@ def chat_stream_restful( state_chatbot[-1] = (state_chatbot[-1][0], state_chatbot[-1][1] + response ) # piece by piece - yield (state_chatbot, state_chatbot, enable_btn, disable_btn, - f'{bot_summarized_response}'.strip()) + yield (state_chatbot, state_chatbot, enable_btn, disable_btn) - yield (state_chatbot, state_chatbot, disable_btn, enable_btn, - f'{bot_summarized_response}'.strip()) + yield (state_chatbot, state_chatbot, disable_btn, enable_btn) def reset_restful_func(instruction_txtbox: gr.Textbox, state_chatbot: gr.State, - request: gr.Request): + session_id: int): """reset the session. Args: @@ -73,10 +61,6 @@ def reset_restful_func(instruction_txtbox: gr.Textbox, state_chatbot: gr.State, request (gr.Request): the request from a user """ state_chatbot = [] - - session_id = threading.current_thread().ident - if request is not None: - session_id = ip2id(request.kwargs['client']['host']) # end the session for response, tokens, finish_reason in get_streaming_response( '', @@ -94,7 +78,7 @@ def reset_restful_func(instruction_txtbox: gr.Textbox, state_chatbot: gr.State, def cancel_restful_func(state_chatbot: gr.State, cancel_btn: gr.Button, - reset_btn: gr.Button, request: gr.Request): + reset_btn: gr.Button, session_id: int): """stop the session. Args: @@ -103,9 +87,6 @@ def cancel_restful_func(state_chatbot: gr.State, cancel_btn: gr.Button, request (gr.Request): the request from a user """ yield (state_chatbot, disable_btn, disable_btn) - session_id = threading.current_thread().ident - if request is not None: - session_id = ip2id(request.kwargs['client']['host']) # end the session for out in get_streaming_response( '', @@ -152,6 +133,7 @@ def run_api_server(api_server_url: str, with gr.Blocks(css=CSS, theme=THEME) as demo: state_chatbot = gr.State([]) + state_session_id = gr.State(0) with gr.Column(elem_id='container'): gr.Markdown('## LMDeploy Playground') @@ -164,25 +146,33 @@ def run_api_server(api_server_url: str, cancel_btn = gr.Button(value='Cancel', interactive=False) reset_btn = gr.Button(value='Reset') - send_event = instruction_txtbox.submit( - chat_stream_restful, - [instruction_txtbox, state_chatbot, cancel_btn, reset_btn], - [state_chatbot, chatbot, cancel_btn, reset_btn]) + send_event = instruction_txtbox.submit(chat_stream_restful, [ + instruction_txtbox, state_chatbot, cancel_btn, reset_btn, + state_session_id + ], [state_chatbot, chatbot, cancel_btn, reset_btn]) instruction_txtbox.submit( lambda: gr.Textbox.update(value=''), [], [instruction_txtbox], ) - cancel_btn.click(cancel_restful_func, - [state_chatbot, cancel_btn, reset_btn], - [state_chatbot, cancel_btn, reset_btn], - cancels=[send_event]) + cancel_btn.click( + cancel_restful_func, + [state_chatbot, cancel_btn, reset_btn, state_session_id], + [state_chatbot, cancel_btn, reset_btn], + cancels=[send_event]) reset_btn.click(reset_restful_func, - [instruction_txtbox, state_chatbot], + [instruction_txtbox, state_chatbot, state_session_id], [state_chatbot, chatbot, instruction_txtbox], cancels=[send_event]) + def init(): + InterFace.global_session_id += 1 + new_session_id = InterFace.global_session_id + return new_session_id + + demo.load(init, inputs=None, outputs=[state_session_id]) + print(f'server is gonna mount on: http://{server_name}:{server_port}') demo.queue(concurrency_count=batch_size, max_size=100, api_open=True).launch( diff --git a/lmdeploy/serve/gradio/triton_server_backend.py b/lmdeploy/serve/gradio/triton_server_backend.py index 5936f4ba5f..d8371ee4aa 100644 --- a/lmdeploy/serve/gradio/triton_server_backend.py +++ b/lmdeploy/serve/gradio/triton_server_backend.py @@ -1,19 +1,20 @@ # Copyright (c) OpenMMLab. All rights reserved. import os -import threading from functools import partial from typing import Sequence import gradio as gr from lmdeploy.serve.gradio.constants import CSS, THEME, disable_btn, enable_btn -from lmdeploy.serve.openai.api_server import ip2id from lmdeploy.serve.turbomind.chatbot import Chatbot +class InterFace: + global_session_id: int = 0 + + def chat_stream(state_chatbot: Sequence, llama_chatbot: Chatbot, - cancel_btn: gr.Button, reset_btn: gr.Button, - request: gr.Request): + cancel_btn: gr.Button, reset_btn: gr.Button, session_id: int): """Chat with AI assistant. Args: @@ -25,9 +26,6 @@ def chat_stream(state_chatbot: Sequence, llama_chatbot: Chatbot, request (gr.Request): the request from a user """ instruction = state_chatbot[-1][0] - session_id = threading.current_thread().ident - if request is not None: - session_id = ip2id(request.kwargs['client']['host']) bot_response = llama_chatbot.stream_infer( session_id, instruction, f'{session_id}-{len(state_chatbot)}') @@ -92,6 +90,7 @@ def run_triton_server(triton_server_addr: str, llama_chatbot = gr.State( Chatbot(triton_server_addr, log_level=log_level, display=True)) state_chatbot = gr.State([]) + state_session_id = gr.State(0) model_name = llama_chatbot.value.model_name reset_all = partial(reset_all_func, model_name=model_name, @@ -110,10 +109,10 @@ def run_triton_server(triton_server_addr: str, send_event = instruction_txtbox.submit( add_instruction, [instruction_txtbox, state_chatbot], - [instruction_txtbox, state_chatbot]).then( - chat_stream, - [state_chatbot, llama_chatbot, cancel_btn, reset_btn], - [state_chatbot, chatbot, cancel_btn, reset_btn]) + [instruction_txtbox, state_chatbot]).then(chat_stream, [ + state_chatbot, llama_chatbot, cancel_btn, reset_btn, + state_session_id + ], [state_chatbot, chatbot, cancel_btn, reset_btn]) cancel_btn.click(cancel_func, [state_chatbot, llama_chatbot, cancel_btn, reset_btn], @@ -125,6 +124,13 @@ def run_triton_server(triton_server_addr: str, [llama_chatbot, state_chatbot, chatbot, instruction_txtbox], cancels=[send_event]) + def init(): + InterFace.global_session_id += 1 + new_session_id = InterFace.global_session_id + return new_session_id + + demo.load(init, inputs=None, outputs=[state_session_id]) + print(f'server is gonna mount on: http://{server_name}:{server_port}') demo.queue(concurrency_count=4, max_size=100, api_open=True).launch( max_threads=10, diff --git a/lmdeploy/serve/gradio/turbomind_coupled.py b/lmdeploy/serve/gradio/turbomind_coupled.py index 7bff73eb6a..2efdb7db33 100644 --- a/lmdeploy/serve/gradio/turbomind_coupled.py +++ b/lmdeploy/serve/gradio/turbomind_coupled.py @@ -30,7 +30,7 @@ async def chat_stream_local( """ state_chatbot = state_chatbot + [(instruction, None)] - yield (state_chatbot, state_chatbot, disable_btn, enable_btn, session_id) + yield (state_chatbot, state_chatbot, disable_btn, enable_btn) async for outputs in InterFace.async_engine.generate( instruction, @@ -51,10 +51,9 @@ async def chat_stream_local( state_chatbot[-1] = (state_chatbot[-1][0], state_chatbot[-1][1] + response ) # piece by piece - yield (state_chatbot, state_chatbot, enable_btn, disable_btn, - session_id) + yield (state_chatbot, state_chatbot, enable_btn, disable_btn) - yield (state_chatbot, state_chatbot, disable_btn, enable_btn, session_id) + yield (state_chatbot, state_chatbot, disable_btn, enable_btn) async def reset_local_func(instruction_txtbox: gr.Textbox, @@ -75,15 +74,7 @@ async def reset_local_func(instruction_txtbox: gr.Textbox, sequence_start=False, sequence_end=True): pass - InterFace.global_session_id += 1 - new_session_id = InterFace.global_session_id - return ( - state_chatbot, - state_chatbot, - gr.Textbox.update(value=''), - new_session_id, - new_session_id, - ) + return (state_chatbot, state_chatbot, gr.Textbox.update(value='')) async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button, @@ -97,7 +88,7 @@ async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button, reset_btn (gr.Button): the reset button session_id (int): the session id """ - yield (state_chatbot, disable_btn, enable_btn, session_id) + yield (state_chatbot, disable_btn, enable_btn) async for out in InterFace.async_engine.generate('', session_id, request_output_len=0, @@ -118,7 +109,7 @@ async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button, sequence_start=True, sequence_end=False): pass - yield (state_chatbot, disable_btn, enable_btn, session_id) + yield (state_chatbot, disable_btn, enable_btn) def run_local(model_path: str, @@ -141,7 +132,7 @@ def run_local(model_path: str, with gr.Blocks(css=CSS, theme=THEME) as demo: state_chatbot = gr.State([]) - state_session_id = gr.State(-1) + state_session_id = gr.State(0) with gr.Column(elem_id='container'): gr.Markdown('## LMDeploy Playground') @@ -159,7 +150,7 @@ def run_local(model_path: str, send_event = instruction_txtbox.submit(chat_stream_local, [ instruction_txtbox, state_chatbot, cancel_btn, reset_btn, state_session_id - ], [state_chatbot, chatbot, cancel_btn, reset_btn, state_session_id]) + ], [state_chatbot, chatbot, cancel_btn, reset_btn]) instruction_txtbox.submit( lambda: gr.Textbox.update(value=''), [], @@ -168,19 +159,18 @@ def run_local(model_path: str, cancel_btn.click( cancel_local_func, [state_chatbot, cancel_btn, reset_btn, state_session_id], - [state_chatbot, cancel_btn, reset_btn, state_session_id], + [state_chatbot, cancel_btn, reset_btn], cancels=[send_event]) - reset_btn.click( - reset_local_func, - [instruction_txtbox, state_chatbot, state_session_id], - [state_chatbot, chatbot, instruction_txtbox, state_session_id], - cancels=[send_event]) + reset_btn.click(reset_local_func, + [instruction_txtbox, state_chatbot, state_session_id], + [state_chatbot, chatbot, instruction_txtbox], + cancels=[send_event]) def init(): InterFace.global_session_id += 1 new_session_id = InterFace.global_session_id - return [new_session_id, new_session_id] + return new_session_id demo.load(init, inputs=None, outputs=[state_session_id]) From 0427695dc4e53ef8cdcee6c7f04443aca08075d2 Mon Sep 17 00:00:00 2001 From: AllentDan Date: Mon, 6 Nov 2023 11:53:02 +0800 Subject: [PATCH 10/11] update docstring --- lmdeploy/serve/gradio/api_server_backend.py | 6 +++--- lmdeploy/serve/gradio/triton_server_backend.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lmdeploy/serve/gradio/api_server_backend.py b/lmdeploy/serve/gradio/api_server_backend.py index 1c397799e1..acf540d90f 100644 --- a/lmdeploy/serve/gradio/api_server_backend.py +++ b/lmdeploy/serve/gradio/api_server_backend.py @@ -22,7 +22,7 @@ def chat_stream_restful(instruction: str, state_chatbot: Sequence, Args: instruction (str): user's prompt state_chatbot (Sequence): the chatting history - request (gr.Request): the request from a user + session_id (int): the session id """ state_chatbot = state_chatbot + [(instruction, None)] @@ -58,7 +58,7 @@ def reset_restful_func(instruction_txtbox: gr.Textbox, state_chatbot: gr.State, Args: instruction_txtbox (str): user's prompt state_chatbot (Sequence): the chatting history - request (gr.Request): the request from a user + session_id (int): the session id """ state_chatbot = [] # end the session @@ -84,7 +84,7 @@ def cancel_restful_func(state_chatbot: gr.State, cancel_btn: gr.Button, Args: instruction_txtbox (str): user's prompt state_chatbot (Sequence): the chatting history - request (gr.Request): the request from a user + session_id (int): the session id """ yield (state_chatbot, disable_btn, disable_btn) # end the session diff --git a/lmdeploy/serve/gradio/triton_server_backend.py b/lmdeploy/serve/gradio/triton_server_backend.py index d8371ee4aa..479f0c9503 100644 --- a/lmdeploy/serve/gradio/triton_server_backend.py +++ b/lmdeploy/serve/gradio/triton_server_backend.py @@ -23,7 +23,7 @@ def chat_stream(state_chatbot: Sequence, llama_chatbot: Chatbot, llama_chatbot (Chatbot): the instance of a chatbot cancel_btn (bool): enable the cancel button or not reset_btn (bool): enable the reset button or not - request (gr.Request): the request from a user + session_id (int): the session id """ instruction = state_chatbot[-1][0] From a9459e63095f360c89b555df3212252ced7b011d Mon Sep 17 00:00:00 2001 From: AllentDan Date: Mon, 6 Nov 2023 14:23:21 +0800 Subject: [PATCH 11/11] add lock for safety --- lmdeploy/serve/gradio/api_server_backend.py | 5 ++++- lmdeploy/serve/gradio/triton_server_backend.py | 5 ++++- lmdeploy/serve/gradio/turbomind_coupled.py | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/lmdeploy/serve/gradio/api_server_backend.py b/lmdeploy/serve/gradio/api_server_backend.py index acf540d90f..8dd92fa0fd 100644 --- a/lmdeploy/serve/gradio/api_server_backend.py +++ b/lmdeploy/serve/gradio/api_server_backend.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import time +from threading import Lock from typing import Sequence import gradio as gr @@ -12,6 +13,7 @@ class InterFace: api_server_url: str = None global_session_id: int = 0 + lock = Lock() def chat_stream_restful(instruction: str, state_chatbot: Sequence, @@ -167,7 +169,8 @@ def run_api_server(api_server_url: str, cancels=[send_event]) def init(): - InterFace.global_session_id += 1 + with InterFace.lock: + InterFace.global_session_id += 1 new_session_id = InterFace.global_session_id return new_session_id diff --git a/lmdeploy/serve/gradio/triton_server_backend.py b/lmdeploy/serve/gradio/triton_server_backend.py index 479f0c9503..9148903cc5 100644 --- a/lmdeploy/serve/gradio/triton_server_backend.py +++ b/lmdeploy/serve/gradio/triton_server_backend.py @@ -1,6 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. import os from functools import partial +from threading import Lock from typing import Sequence import gradio as gr @@ -11,6 +12,7 @@ class InterFace: global_session_id: int = 0 + lock = Lock() def chat_stream(state_chatbot: Sequence, llama_chatbot: Chatbot, @@ -125,7 +127,8 @@ def run_triton_server(triton_server_addr: str, cancels=[send_event]) def init(): - InterFace.global_session_id += 1 + with InterFace.lock: + InterFace.global_session_id += 1 new_session_id = InterFace.global_session_id return new_session_id diff --git a/lmdeploy/serve/gradio/turbomind_coupled.py b/lmdeploy/serve/gradio/turbomind_coupled.py index 2efdb7db33..e344abcbda 100644 --- a/lmdeploy/serve/gradio/turbomind_coupled.py +++ b/lmdeploy/serve/gradio/turbomind_coupled.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. +from threading import Lock from typing import Sequence import gradio as gr @@ -10,6 +11,7 @@ class InterFace: async_engine: AsyncEngine = None global_session_id: int = 0 + lock = Lock() async def chat_stream_local( @@ -168,7 +170,8 @@ def run_local(model_path: str, cancels=[send_event]) def init(): - InterFace.global_session_id += 1 + with InterFace.lock: + InterFace.global_session_id += 1 new_session_id = InterFace.global_session_id return new_session_id