From 37d5b7e39948c2cf5d74d7eff3c071328d3d11d5 Mon Sep 17 00:00:00 2001
From: aisensiy <aisensiy@163.com>
Date: Thu, 12 Oct 2023 17:32:37 +0800
Subject: [PATCH 01/11] Use session id from gradio state

---
 lmdeploy/serve/gradio/app.py | 42 ++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 24 deletions(-)

diff --git a/lmdeploy/serve/gradio/app.py b/lmdeploy/serve/gradio/app.py
index 71db7a2749..0023a78a29 100644
--- a/lmdeploy/serve/gradio/app.py
+++ b/lmdeploy/serve/gradio/app.py
@@ -1,5 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import os
+import random
 import threading
 import time
 from functools import partial
@@ -336,7 +337,7 @@ async def chat_stream_local(
     state_chatbot: Sequence,
     cancel_btn: gr.Button,
     reset_btn: gr.Button,
-    request: gr.Request,
+    session_id: int,
 ):
     """Chat with AI assistant.
 
@@ -345,14 +346,10 @@ async def chat_stream_local(
         state_chatbot (Sequence): the chatting history
         request (gr.Request): the request from a user
     """
-    session_id = threading.current_thread().ident
-    if request is not None:
-        session_id = ip2id(request.kwargs['client']['host'])
-    bot_summarized_response = ''
     state_chatbot = state_chatbot + [(instruction, None)]
 
     yield (state_chatbot, state_chatbot, disable_btn, enable_btn,
-           f'{bot_summarized_response}'.strip())
+           session_id)
 
     async for outputs in InterFace.async_engine.generate(
             instruction,
@@ -373,14 +370,15 @@ async def chat_stream_local(
                                  state_chatbot[-1][1] + response
                                  )  # piece by piece
         yield (state_chatbot, state_chatbot, enable_btn, disable_btn,
-               f'{bot_summarized_response}'.strip())
+               session_id)
 
     yield (state_chatbot, state_chatbot, disable_btn, enable_btn,
-           f'{bot_summarized_response}'.strip())
+           session_id)
 
 
 async def reset_local_func(instruction_txtbox: gr.Textbox,
-                           state_chatbot: gr.State, request: gr.Request):
+                           state_chatbot: gr.State, 
+                           session_id: int):
     """reset the session.
 
     Args:
@@ -389,10 +387,6 @@ async def reset_local_func(instruction_txtbox: gr.Textbox,
         request (gr.Request): the request from a user
     """
     state_chatbot = []
-
-    session_id = threading.current_thread().ident
-    if request is not None:
-        session_id = ip2id(request.kwargs['client']['host'])
     # end the session
     async for out in InterFace.async_engine.generate('',
                                                      session_id,
@@ -406,11 +400,12 @@ async def reset_local_func(instruction_txtbox: gr.Textbox,
         state_chatbot,
         state_chatbot,
         gr.Textbox.update(value=''),
+        session_id
     )
 
 
 async def cancel_local_func(state_chatbot: gr.State, cancel_btn: gr.Button,
-                            reset_btn: gr.Button, request: gr.Request):
+                            reset_btn: gr.Button, session_id: int):
     """stop the session.
 
     Args:
@@ -418,9 +413,6 @@ async def cancel_local_func(state_chatbot: gr.State, cancel_btn: gr.Button,
         state_chatbot (Sequence): the chatting history
         request (gr.Request): the request from a user
     """
-    session_id = threading.current_thread().ident
-    if request is not None:
-        session_id = ip2id(request.kwargs['client']['host'])
     # end the session
     async for out in InterFace.async_engine.generate('',
                                                      session_id,
@@ -442,7 +434,7 @@ async def cancel_local_func(state_chatbot: gr.State, cancel_btn: gr.Button,
                                                      sequence_start=True,
                                                      sequence_end=False):
         pass
-    return (state_chatbot, disable_btn, enable_btn)
+    return (state_chatbot, disable_btn, enable_btn, session_id)
 
 
 def run_local(model_path: str,
@@ -465,6 +457,7 @@ def run_local(model_path: str,
 
     with gr.Blocks(css=CSS, theme=THEME) as demo:
         state_chatbot = gr.State([])
+        session_id = gr.State(random.randint(0,100000))
 
         with gr.Column(elem_id='container'):
             gr.Markdown('## LMDeploy Playground')
@@ -481,20 +474,21 @@ def run_local(model_path: str,
 
         send_event = instruction_txtbox.submit(
             chat_stream_local,
-            [instruction_txtbox, state_chatbot, cancel_btn, reset_btn],
-            [state_chatbot, chatbot, cancel_btn, reset_btn])
+            [instruction_txtbox, state_chatbot, cancel_btn, reset_btn, session_id],
+            [state_chatbot, chatbot, cancel_btn, reset_btn, session_id])
         instruction_txtbox.submit(
             lambda: gr.Textbox.update(value=''),
             [],
             [instruction_txtbox],
         )
         cancel_btn.click(cancel_local_func,
-                         [state_chatbot, cancel_btn, reset_btn],
-                         [state_chatbot, cancel_btn, reset_btn],
+                         [state_chatbot, cancel_btn, reset_btn, session_id],
+                         [state_chatbot, cancel_btn, reset_btn, session_id],
                          cancels=[send_event])
 
-        reset_btn.click(reset_local_func, [instruction_txtbox, state_chatbot],
-                        [state_chatbot, chatbot, instruction_txtbox],
+        reset_btn.click(reset_local_func, 
+                        [instruction_txtbox, state_chatbot, session_id],
+                        [state_chatbot, chatbot, instruction_txtbox, session_id],
                         cancels=[send_event])
 
     print(f'server is gonna mount on: http://{server_name}:{server_port}')

From 4931e68f661223079899016911533f952766e420 Mon Sep 17 00:00:00 2001
From: aisensiy <aisensiy@163.com>
Date: Thu, 12 Oct 2023 22:53:35 +0800
Subject: [PATCH 02/11] use a new session id after reset

---
 lmdeploy/serve/gradio/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lmdeploy/serve/gradio/app.py b/lmdeploy/serve/gradio/app.py
index 0023a78a29..0c61254e33 100644
--- a/lmdeploy/serve/gradio/app.py
+++ b/lmdeploy/serve/gradio/app.py
@@ -400,7 +400,7 @@ async def reset_local_func(instruction_txtbox: gr.Textbox,
         state_chatbot,
         state_chatbot,
         gr.Textbox.update(value=''),
-        session_id
+        random.randint(0, 100000),
     )
 
 

From d0df11af580f50fb7f2f96073f3acd3dd6fb20ae Mon Sep 17 00:00:00 2001
From: aisensiy <aisensiy@163.com>
Date: Thu, 12 Oct 2023 22:54:01 +0800
Subject: [PATCH 03/11] rename session id like a state

---
 lmdeploy/serve/gradio/app.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lmdeploy/serve/gradio/app.py b/lmdeploy/serve/gradio/app.py
index 0c61254e33..c1569ca00a 100644
--- a/lmdeploy/serve/gradio/app.py
+++ b/lmdeploy/serve/gradio/app.py
@@ -457,7 +457,7 @@ def run_local(model_path: str,
 
     with gr.Blocks(css=CSS, theme=THEME) as demo:
         state_chatbot = gr.State([])
-        session_id = gr.State(random.randint(0,100000))
+        state_session_id = gr.State(random.randint(0,100000))
 
         with gr.Column(elem_id='container'):
             gr.Markdown('## LMDeploy Playground')
@@ -474,21 +474,21 @@ def run_local(model_path: str,
 
         send_event = instruction_txtbox.submit(
             chat_stream_local,
-            [instruction_txtbox, state_chatbot, cancel_btn, reset_btn, session_id],
-            [state_chatbot, chatbot, cancel_btn, reset_btn, session_id])
+            [instruction_txtbox, state_chatbot, cancel_btn, reset_btn, state_session_id],
+            [state_chatbot, chatbot, cancel_btn, reset_btn, state_session_id])
         instruction_txtbox.submit(
             lambda: gr.Textbox.update(value=''),
             [],
             [instruction_txtbox],
         )
         cancel_btn.click(cancel_local_func,
-                         [state_chatbot, cancel_btn, reset_btn, session_id],
-                         [state_chatbot, cancel_btn, reset_btn, session_id],
+                         [state_chatbot, cancel_btn, reset_btn, state_session_id],
+                         [state_chatbot, cancel_btn, reset_btn, state_session_id],
                          cancels=[send_event])
 
         reset_btn.click(reset_local_func, 
-                        [instruction_txtbox, state_chatbot, session_id],
-                        [state_chatbot, chatbot, instruction_txtbox, session_id],
+                        [instruction_txtbox, state_chatbot, state_session_id],
+                        [state_chatbot, chatbot, instruction_txtbox, state_session_id],
                         cancels=[send_event])
 
     print(f'server is gonna mount on: http://{server_name}:{server_port}')

From e084725abb834d22d90ba4d1f32c99b8ca0b3da1 Mon Sep 17 00:00:00 2001
From: aisensiy <aisensiy@163.com>
Date: Thu, 12 Oct 2023 23:02:24 +0800
Subject: [PATCH 04/11] update comments

---
 lmdeploy/serve/gradio/app.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/lmdeploy/serve/gradio/app.py b/lmdeploy/serve/gradio/app.py
index c1569ca00a..4e46fd4816 100644
--- a/lmdeploy/serve/gradio/app.py
+++ b/lmdeploy/serve/gradio/app.py
@@ -344,7 +344,9 @@ async def chat_stream_local(
     Args:
         instruction (str): user's prompt
         state_chatbot (Sequence): the chatting history
-        request (gr.Request): the request from a user
+        cancel_btn (gr.Button): the cancel button
+        reset_btn (gr.Button): the reset button
+        session_id (int): the session id
     """
     state_chatbot = state_chatbot + [(instruction, None)]
 
@@ -384,7 +386,7 @@ async def reset_local_func(instruction_txtbox: gr.Textbox,
     Args:
         instruction_txtbox (str): user's prompt
         state_chatbot (Sequence): the chatting history
-        request (gr.Request): the request from a user
+        session_id (int): the session id
     """
     state_chatbot = []
     # end the session
@@ -404,14 +406,16 @@ async def reset_local_func(instruction_txtbox: gr.Textbox,
     )
 
 
-async def cancel_local_func(state_chatbot: gr.State, cancel_btn: gr.Button,
+async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button,
                             reset_btn: gr.Button, session_id: int):
     """stop the session.
 
     Args:
         instruction_txtbox (str): user's prompt
         state_chatbot (Sequence): the chatting history
-        request (gr.Request): the request from a user
+        cancel_btn (gr.Button): the cancel button
+        reset_btn (gr.Button): the reset button
+        session_id (int): the session id
     """
     # end the session
     async for out in InterFace.async_engine.generate('',

From 67ec14511d063f905ec61ad63709fbf885739bf6 Mon Sep 17 00:00:00 2001
From: aisensiy <aisensiy@163.com>
Date: Fri, 13 Oct 2023 10:45:59 +0800
Subject: [PATCH 05/11] reformat files

---
 lmdeploy/serve/gradio/app.py | 37 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/lmdeploy/serve/gradio/app.py b/lmdeploy/serve/gradio/app.py
index 4e46fd4816..67855b34d1 100644
--- a/lmdeploy/serve/gradio/app.py
+++ b/lmdeploy/serve/gradio/app.py
@@ -350,8 +350,7 @@ async def chat_stream_local(
     """
     state_chatbot = state_chatbot + [(instruction, None)]
 
-    yield (state_chatbot, state_chatbot, disable_btn, enable_btn,
-           session_id)
+    yield (state_chatbot, state_chatbot, disable_btn, enable_btn, session_id)
 
     async for outputs in InterFace.async_engine.generate(
             instruction,
@@ -374,13 +373,11 @@ async def chat_stream_local(
         yield (state_chatbot, state_chatbot, enable_btn, disable_btn,
                session_id)
 
-    yield (state_chatbot, state_chatbot, disable_btn, enable_btn,
-           session_id)
+    yield (state_chatbot, state_chatbot, disable_btn, enable_btn, session_id)
 
 
 async def reset_local_func(instruction_txtbox: gr.Textbox,
-                           state_chatbot: gr.State, 
-                           session_id: int):
+                           state_chatbot: gr.State, session_id: int):
     """reset the session.
 
     Args:
@@ -461,7 +458,7 @@ def run_local(model_path: str,
 
     with gr.Blocks(css=CSS, theme=THEME) as demo:
         state_chatbot = gr.State([])
-        state_session_id = gr.State(random.randint(0,100000))
+        state_session_id = gr.State(random.randint(0, 100000))
 
         with gr.Column(elem_id='container'):
             gr.Markdown('## LMDeploy Playground')
@@ -476,24 +473,26 @@ def run_local(model_path: str,
                 cancel_btn = gr.Button(value='Cancel', interactive=False)
                 reset_btn = gr.Button(value='Reset')
 
-        send_event = instruction_txtbox.submit(
-            chat_stream_local,
-            [instruction_txtbox, state_chatbot, cancel_btn, reset_btn, state_session_id],
-            [state_chatbot, chatbot, cancel_btn, reset_btn, state_session_id])
+        send_event = instruction_txtbox.submit(chat_stream_local, [
+            instruction_txtbox, state_chatbot, cancel_btn, reset_btn,
+            state_session_id
+        ], [state_chatbot, chatbot, cancel_btn, reset_btn, state_session_id])
         instruction_txtbox.submit(
             lambda: gr.Textbox.update(value=''),
             [],
             [instruction_txtbox],
         )
-        cancel_btn.click(cancel_local_func,
-                         [state_chatbot, cancel_btn, reset_btn, state_session_id],
-                         [state_chatbot, cancel_btn, reset_btn, state_session_id],
-                         cancels=[send_event])
+        cancel_btn.click(
+            cancel_local_func,
+            [state_chatbot, cancel_btn, reset_btn, state_session_id],
+            [state_chatbot, cancel_btn, reset_btn, state_session_id],
+            cancels=[send_event])
 
-        reset_btn.click(reset_local_func, 
-                        [instruction_txtbox, state_chatbot, state_session_id],
-                        [state_chatbot, chatbot, instruction_txtbox, state_session_id],
-                        cancels=[send_event])
+        reset_btn.click(
+            reset_local_func,
+            [instruction_txtbox, state_chatbot, state_session_id],
+            [state_chatbot, chatbot, instruction_txtbox, state_session_id],
+            cancels=[send_event])
 
     print(f'server is gonna mount on: http://{server_name}:{server_port}')
     demo.queue(concurrency_count=batch_size, max_size=100,

From c2d22efc1c9872daf18eee41429399e811329f79 Mon Sep 17 00:00:00 2001
From: aisensiy <aisensiy@163.com>
Date: Mon, 16 Oct 2023 14:28:19 +0800
Subject: [PATCH 06/11] init session id on block loaded

---
 lmdeploy/serve/gradio/app.py | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/lmdeploy/serve/gradio/app.py b/lmdeploy/serve/gradio/app.py
index 67855b34d1..33e18bb541 100644
--- a/lmdeploy/serve/gradio/app.py
+++ b/lmdeploy/serve/gradio/app.py
@@ -395,11 +395,13 @@ async def reset_local_func(instruction_txtbox: gr.Textbox,
                                                      sequence_end=True):
         pass
 
+    new_session_id = random.randint(0, 100000)
     return (
         state_chatbot,
         state_chatbot,
         gr.Textbox.update(value=''),
-        random.randint(0, 100000),
+        new_session_id,
+        new_session_id,
     )
 
 
@@ -458,11 +460,13 @@ def run_local(model_path: str,
 
     with gr.Blocks(css=CSS, theme=THEME) as demo:
         state_chatbot = gr.State([])
-        state_session_id = gr.State(random.randint(0, 100000))
+        state_session_id = gr.State(-1)
 
         with gr.Column(elem_id='container'):
             gr.Markdown('## LMDeploy Playground')
 
+            session_id_box = gr.Number(label='Session ID')
+
             chatbot = gr.Chatbot(
                 elem_id='chatbot',
                 label=InterFace.async_engine.tm_model.model_name)
@@ -488,11 +492,21 @@ def run_local(model_path: str,
             [state_chatbot, cancel_btn, reset_btn, state_session_id],
             cancels=[send_event])
 
-        reset_btn.click(
-            reset_local_func,
-            [instruction_txtbox, state_chatbot, state_session_id],
-            [state_chatbot, chatbot, instruction_txtbox, state_session_id],
-            cancels=[send_event])
+        reset_btn.click(reset_local_func,
+                        [instruction_txtbox, state_chatbot, state_session_id],
+                        [
+                            state_chatbot, chatbot, instruction_txtbox,
+                            session_id_box, state_session_id
+                        ],
+                        cancels=[send_event])
+
+        def init():
+            new_session_id = random.randint(0, 100000)
+            return [new_session_id, new_session_id]
+
+        demo.load(init,
+                  inputs=None,
+                  outputs=[state_session_id, session_id_box])
 
     print(f'server is gonna mount on: http://{server_name}:{server_port}')
     demo.queue(concurrency_count=batch_size, max_size=100,

From af8c449ed5e5e5c763c8b4ff23f0cb7cb69ff43c Mon Sep 17 00:00:00 2001
From: aisensiy <aisensiy@163.com>
Date: Fri, 20 Oct 2023 14:20:36 +0800
Subject: [PATCH 07/11] use auto increased session id

---
 lmdeploy/serve/gradio/app.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/lmdeploy/serve/gradio/app.py b/lmdeploy/serve/gradio/app.py
index 33e18bb541..bc58cea5b9 100644
--- a/lmdeploy/serve/gradio/app.py
+++ b/lmdeploy/serve/gradio/app.py
@@ -1,6 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import os
-import random
 import threading
 import time
 from functools import partial
@@ -25,6 +24,13 @@
 disable_btn = gr.Button.update(interactive=False)
 
 
+# a IO interface mananing variables
+class InterFace:
+    async_engine: AsyncEngine = None  # for run_local
+    restful_api_url: str = None  # for run_restful
+    global_session_id: int = 0
+
+
 def chat_stream(state_chatbot: Sequence, llama_chatbot: Chatbot,
                 request: gr.Request):
     """Chat with AI assistant.
@@ -146,12 +152,6 @@ def run_server(triton_server_addr: str,
     )
 
 
-# a IO interface mananing variables
-class InterFace:
-    async_engine: AsyncEngine = None  # for run_local
-    restful_api_url: str = None  # for run_restful
-
-
 def chat_stream_restful(
     instruction: str,
     state_chatbot: Sequence,
@@ -377,7 +377,7 @@ async def chat_stream_local(
 
 
 async def reset_local_func(instruction_txtbox: gr.Textbox,
-                           state_chatbot: gr.State, session_id: int):
+                           state_chatbot: Sequence, session_id: int):
     """reset the session.
 
     Args:
@@ -394,8 +394,8 @@ async def reset_local_func(instruction_txtbox: gr.Textbox,
                                                      sequence_start=False,
                                                      sequence_end=True):
         pass
-
-    new_session_id = random.randint(0, 100000)
+    InterFace.global_session_id += 1
+    new_session_id = InterFace.global_session_id
     return (
         state_chatbot,
         state_chatbot,
@@ -501,7 +501,8 @@ def run_local(model_path: str,
                         cancels=[send_event])
 
         def init():
-            new_session_id = random.randint(0, 100000)
+            InterFace.global_session_id += 1
+            new_session_id = InterFace.global_session_id
             return [new_session_id, new_session_id]
 
         demo.load(init,

From f62b0c5867aad9ac25d83350d8488558fd50317b Mon Sep 17 00:00:00 2001
From: aisensiy <aisensiy@163.com>
Date: Sat, 4 Nov 2023 00:25:14 +0800
Subject: [PATCH 08/11] remove session id textbox

---
 lmdeploy/serve/gradio/turbomind_coupled.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/lmdeploy/serve/gradio/turbomind_coupled.py b/lmdeploy/serve/gradio/turbomind_coupled.py
index 8aed8eb737..7bff73eb6a 100644
--- a/lmdeploy/serve/gradio/turbomind_coupled.py
+++ b/lmdeploy/serve/gradio/turbomind_coupled.py
@@ -146,8 +146,6 @@ def run_local(model_path: str,
         with gr.Column(elem_id='container'):
             gr.Markdown('## LMDeploy Playground')
 
-            session_id_box = gr.Number(label='Session ID')
-
             chatbot = gr.Chatbot(
                 elem_id='chatbot',
                 label=InterFace.async_engine.tm_model.model_name)
@@ -173,22 +171,18 @@ def run_local(model_path: str,
             [state_chatbot, cancel_btn, reset_btn, state_session_id],
             cancels=[send_event])
 
-        reset_btn.click(reset_local_func,
-                        [instruction_txtbox, state_chatbot, state_session_id],
-                        [
-                            state_chatbot, chatbot, instruction_txtbox,
-                            session_id_box, state_session_id
-                        ],
-                        cancels=[send_event])
+        reset_btn.click(
+            reset_local_func,
+            [instruction_txtbox, state_chatbot, state_session_id],
+            [state_chatbot, chatbot, instruction_txtbox, state_session_id],
+            cancels=[send_event])
 
         def init():
             InterFace.global_session_id += 1
             new_session_id = InterFace.global_session_id
             return [new_session_id, new_session_id]
 
-        demo.load(init,
-                  inputs=None,
-                  outputs=[state_session_id, session_id_box])
+        demo.load(init, inputs=None, outputs=[state_session_id])
 
     print(f'server is gonna mount on: http://{server_name}:{server_port}')
     demo.queue(concurrency_count=batch_size, max_size=100,

From b4529b05a82bab532792ce975f42277eae0276ff Mon Sep 17 00:00:00 2001
From: AllentDan <AllentDan@yeah.net>
Date: Mon, 6 Nov 2023 10:30:39 +0800
Subject: [PATCH 09/11] apply to api_server and tritonserver

---
 lmdeploy/serve/gradio/api_server_backend.py   | 64 ++++++++-----------
 .../serve/gradio/triton_server_backend.py     | 28 ++++----
 lmdeploy/serve/gradio/turbomind_coupled.py    | 38 ++++-------
 3 files changed, 58 insertions(+), 72 deletions(-)

diff --git a/lmdeploy/serve/gradio/api_server_backend.py b/lmdeploy/serve/gradio/api_server_backend.py
index ce64508795..1c397799e1 100644
--- a/lmdeploy/serve/gradio/api_server_backend.py
+++ b/lmdeploy/serve/gradio/api_server_backend.py
@@ -1,5 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-import threading
 import time
 from typing import Sequence
 
@@ -8,20 +7,16 @@
 from lmdeploy.serve.gradio.constants import CSS, THEME, disable_btn, enable_btn
 from lmdeploy.serve.openai.api_client import (get_model_list,
                                               get_streaming_response)
-from lmdeploy.serve.openai.api_server import ip2id
 
 
 class InterFace:
     api_server_url: str = None
+    global_session_id: int = 0
 
 
-def chat_stream_restful(
-    instruction: str,
-    state_chatbot: Sequence,
-    cancel_btn: gr.Button,
-    reset_btn: gr.Button,
-    request: gr.Request,
-):
+def chat_stream_restful(instruction: str, state_chatbot: Sequence,
+                        cancel_btn: gr.Button, reset_btn: gr.Button,
+                        session_id: int):
     """Chat with AI assistant.
 
     Args:
@@ -29,14 +24,9 @@ def chat_stream_restful(
         state_chatbot (Sequence): the chatting history
         request (gr.Request): the request from a user
     """
-    session_id = threading.current_thread().ident
-    if request is not None:
-        session_id = ip2id(request.kwargs['client']['host'])
-    bot_summarized_response = ''
     state_chatbot = state_chatbot + [(instruction, None)]
 
-    yield (state_chatbot, state_chatbot, disable_btn, enable_btn,
-           f'{bot_summarized_response}'.strip())
+    yield (state_chatbot, state_chatbot, disable_btn, enable_btn)
 
     for response, tokens, finish_reason in get_streaming_response(
             instruction,
@@ -56,15 +46,13 @@ def chat_stream_restful(
             state_chatbot[-1] = (state_chatbot[-1][0],
                                  state_chatbot[-1][1] + response
                                  )  # piece by piece
-        yield (state_chatbot, state_chatbot, enable_btn, disable_btn,
-               f'{bot_summarized_response}'.strip())
+        yield (state_chatbot, state_chatbot, enable_btn, disable_btn)
 
-    yield (state_chatbot, state_chatbot, disable_btn, enable_btn,
-           f'{bot_summarized_response}'.strip())
+    yield (state_chatbot, state_chatbot, disable_btn, enable_btn)
 
 
 def reset_restful_func(instruction_txtbox: gr.Textbox, state_chatbot: gr.State,
-                       request: gr.Request):
+                       session_id: int):
     """reset the session.
 
     Args:
@@ -73,10 +61,6 @@ def reset_restful_func(instruction_txtbox: gr.Textbox, state_chatbot: gr.State,
         request (gr.Request): the request from a user
     """
     state_chatbot = []
-
-    session_id = threading.current_thread().ident
-    if request is not None:
-        session_id = ip2id(request.kwargs['client']['host'])
     # end the session
     for response, tokens, finish_reason in get_streaming_response(
             '',
@@ -94,7 +78,7 @@ def reset_restful_func(instruction_txtbox: gr.Textbox, state_chatbot: gr.State,
 
 
 def cancel_restful_func(state_chatbot: gr.State, cancel_btn: gr.Button,
-                        reset_btn: gr.Button, request: gr.Request):
+                        reset_btn: gr.Button, session_id: int):
     """stop the session.
 
     Args:
@@ -103,9 +87,6 @@ def cancel_restful_func(state_chatbot: gr.State, cancel_btn: gr.Button,
         request (gr.Request): the request from a user
     """
     yield (state_chatbot, disable_btn, disable_btn)
-    session_id = threading.current_thread().ident
-    if request is not None:
-        session_id = ip2id(request.kwargs['client']['host'])
     # end the session
     for out in get_streaming_response(
             '',
@@ -152,6 +133,7 @@ def run_api_server(api_server_url: str,
 
     with gr.Blocks(css=CSS, theme=THEME) as demo:
         state_chatbot = gr.State([])
+        state_session_id = gr.State(0)
 
         with gr.Column(elem_id='container'):
             gr.Markdown('## LMDeploy Playground')
@@ -164,25 +146,33 @@ def run_api_server(api_server_url: str,
                 cancel_btn = gr.Button(value='Cancel', interactive=False)
                 reset_btn = gr.Button(value='Reset')
 
-        send_event = instruction_txtbox.submit(
-            chat_stream_restful,
-            [instruction_txtbox, state_chatbot, cancel_btn, reset_btn],
-            [state_chatbot, chatbot, cancel_btn, reset_btn])
+        send_event = instruction_txtbox.submit(chat_stream_restful, [
+            instruction_txtbox, state_chatbot, cancel_btn, reset_btn,
+            state_session_id
+        ], [state_chatbot, chatbot, cancel_btn, reset_btn])
         instruction_txtbox.submit(
             lambda: gr.Textbox.update(value=''),
             [],
             [instruction_txtbox],
         )
-        cancel_btn.click(cancel_restful_func,
-                         [state_chatbot, cancel_btn, reset_btn],
-                         [state_chatbot, cancel_btn, reset_btn],
-                         cancels=[send_event])
+        cancel_btn.click(
+            cancel_restful_func,
+            [state_chatbot, cancel_btn, reset_btn, state_session_id],
+            [state_chatbot, cancel_btn, reset_btn],
+            cancels=[send_event])
 
         reset_btn.click(reset_restful_func,
-                        [instruction_txtbox, state_chatbot],
+                        [instruction_txtbox, state_chatbot, state_session_id],
                         [state_chatbot, chatbot, instruction_txtbox],
                         cancels=[send_event])
 
+        def init():
+            InterFace.global_session_id += 1
+            new_session_id = InterFace.global_session_id
+            return new_session_id
+
+        demo.load(init, inputs=None, outputs=[state_session_id])
+
     print(f'server is gonna mount on: http://{server_name}:{server_port}')
     demo.queue(concurrency_count=batch_size, max_size=100,
                api_open=True).launch(
diff --git a/lmdeploy/serve/gradio/triton_server_backend.py b/lmdeploy/serve/gradio/triton_server_backend.py
index 5936f4ba5f..d8371ee4aa 100644
--- a/lmdeploy/serve/gradio/triton_server_backend.py
+++ b/lmdeploy/serve/gradio/triton_server_backend.py
@@ -1,19 +1,20 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import os
-import threading
 from functools import partial
 from typing import Sequence
 
 import gradio as gr
 
 from lmdeploy.serve.gradio.constants import CSS, THEME, disable_btn, enable_btn
-from lmdeploy.serve.openai.api_server import ip2id
 from lmdeploy.serve.turbomind.chatbot import Chatbot
 
 
+class InterFace:
+    global_session_id: int = 0
+
+
 def chat_stream(state_chatbot: Sequence, llama_chatbot: Chatbot,
-                cancel_btn: gr.Button, reset_btn: gr.Button,
-                request: gr.Request):
+                cancel_btn: gr.Button, reset_btn: gr.Button, session_id: int):
     """Chat with AI assistant.
 
     Args:
@@ -25,9 +26,6 @@ def chat_stream(state_chatbot: Sequence, llama_chatbot: Chatbot,
         request (gr.Request): the request from a user
     """
     instruction = state_chatbot[-1][0]
-    session_id = threading.current_thread().ident
-    if request is not None:
-        session_id = ip2id(request.kwargs['client']['host'])
 
     bot_response = llama_chatbot.stream_infer(
         session_id, instruction, f'{session_id}-{len(state_chatbot)}')
@@ -92,6 +90,7 @@ def run_triton_server(triton_server_addr: str,
         llama_chatbot = gr.State(
             Chatbot(triton_server_addr, log_level=log_level, display=True))
         state_chatbot = gr.State([])
+        state_session_id = gr.State(0)
         model_name = llama_chatbot.value.model_name
         reset_all = partial(reset_all_func,
                             model_name=model_name,
@@ -110,10 +109,10 @@ def run_triton_server(triton_server_addr: str,
 
         send_event = instruction_txtbox.submit(
             add_instruction, [instruction_txtbox, state_chatbot],
-            [instruction_txtbox, state_chatbot]).then(
-                chat_stream,
-                [state_chatbot, llama_chatbot, cancel_btn, reset_btn],
-                [state_chatbot, chatbot, cancel_btn, reset_btn])
+            [instruction_txtbox, state_chatbot]).then(chat_stream, [
+                state_chatbot, llama_chatbot, cancel_btn, reset_btn,
+                state_session_id
+            ], [state_chatbot, chatbot, cancel_btn, reset_btn])
 
         cancel_btn.click(cancel_func,
                          [state_chatbot, llama_chatbot, cancel_btn, reset_btn],
@@ -125,6 +124,13 @@ def run_triton_server(triton_server_addr: str,
             [llama_chatbot, state_chatbot, chatbot, instruction_txtbox],
             cancels=[send_event])
 
+        def init():
+            InterFace.global_session_id += 1
+            new_session_id = InterFace.global_session_id
+            return new_session_id
+
+        demo.load(init, inputs=None, outputs=[state_session_id])
+
     print(f'server is gonna mount on: http://{server_name}:{server_port}')
     demo.queue(concurrency_count=4, max_size=100, api_open=True).launch(
         max_threads=10,
diff --git a/lmdeploy/serve/gradio/turbomind_coupled.py b/lmdeploy/serve/gradio/turbomind_coupled.py
index 7bff73eb6a..2efdb7db33 100644
--- a/lmdeploy/serve/gradio/turbomind_coupled.py
+++ b/lmdeploy/serve/gradio/turbomind_coupled.py
@@ -30,7 +30,7 @@ async def chat_stream_local(
     """
     state_chatbot = state_chatbot + [(instruction, None)]
 
-    yield (state_chatbot, state_chatbot, disable_btn, enable_btn, session_id)
+    yield (state_chatbot, state_chatbot, disable_btn, enable_btn)
 
     async for outputs in InterFace.async_engine.generate(
             instruction,
@@ -51,10 +51,9 @@ async def chat_stream_local(
             state_chatbot[-1] = (state_chatbot[-1][0],
                                  state_chatbot[-1][1] + response
                                  )  # piece by piece
-        yield (state_chatbot, state_chatbot, enable_btn, disable_btn,
-               session_id)
+        yield (state_chatbot, state_chatbot, enable_btn, disable_btn)
 
-    yield (state_chatbot, state_chatbot, disable_btn, enable_btn, session_id)
+    yield (state_chatbot, state_chatbot, disable_btn, enable_btn)
 
 
 async def reset_local_func(instruction_txtbox: gr.Textbox,
@@ -75,15 +74,7 @@ async def reset_local_func(instruction_txtbox: gr.Textbox,
                                                      sequence_start=False,
                                                      sequence_end=True):
         pass
-    InterFace.global_session_id += 1
-    new_session_id = InterFace.global_session_id
-    return (
-        state_chatbot,
-        state_chatbot,
-        gr.Textbox.update(value=''),
-        new_session_id,
-        new_session_id,
-    )
+    return (state_chatbot, state_chatbot, gr.Textbox.update(value=''))
 
 
 async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button,
@@ -97,7 +88,7 @@ async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button,
         reset_btn (gr.Button): the reset button
         session_id (int): the session id
     """
-    yield (state_chatbot, disable_btn, enable_btn, session_id)
+    yield (state_chatbot, disable_btn, enable_btn)
     async for out in InterFace.async_engine.generate('',
                                                      session_id,
                                                      request_output_len=0,
@@ -118,7 +109,7 @@ async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button,
                                                      sequence_start=True,
                                                      sequence_end=False):
         pass
-    yield (state_chatbot, disable_btn, enable_btn, session_id)
+    yield (state_chatbot, disable_btn, enable_btn)
 
 
 def run_local(model_path: str,
@@ -141,7 +132,7 @@ def run_local(model_path: str,
 
     with gr.Blocks(css=CSS, theme=THEME) as demo:
         state_chatbot = gr.State([])
-        state_session_id = gr.State(-1)
+        state_session_id = gr.State(0)
 
         with gr.Column(elem_id='container'):
             gr.Markdown('## LMDeploy Playground')
@@ -159,7 +150,7 @@ def run_local(model_path: str,
         send_event = instruction_txtbox.submit(chat_stream_local, [
             instruction_txtbox, state_chatbot, cancel_btn, reset_btn,
             state_session_id
-        ], [state_chatbot, chatbot, cancel_btn, reset_btn, state_session_id])
+        ], [state_chatbot, chatbot, cancel_btn, reset_btn])
         instruction_txtbox.submit(
             lambda: gr.Textbox.update(value=''),
             [],
@@ -168,19 +159,18 @@ def run_local(model_path: str,
         cancel_btn.click(
             cancel_local_func,
             [state_chatbot, cancel_btn, reset_btn, state_session_id],
-            [state_chatbot, cancel_btn, reset_btn, state_session_id],
+            [state_chatbot, cancel_btn, reset_btn],
             cancels=[send_event])
 
-        reset_btn.click(
-            reset_local_func,
-            [instruction_txtbox, state_chatbot, state_session_id],
-            [state_chatbot, chatbot, instruction_txtbox, state_session_id],
-            cancels=[send_event])
+        reset_btn.click(reset_local_func,
+                        [instruction_txtbox, state_chatbot, state_session_id],
+                        [state_chatbot, chatbot, instruction_txtbox],
+                        cancels=[send_event])
 
         def init():
             InterFace.global_session_id += 1
             new_session_id = InterFace.global_session_id
-            return [new_session_id, new_session_id]
+            return new_session_id
 
         demo.load(init, inputs=None, outputs=[state_session_id])
 

From 0427695dc4e53ef8cdcee6c7f04443aca08075d2 Mon Sep 17 00:00:00 2001
From: AllentDan <AllentDan@yeah.net>
Date: Mon, 6 Nov 2023 11:53:02 +0800
Subject: [PATCH 10/11] update docstring

---
 lmdeploy/serve/gradio/api_server_backend.py    | 6 +++---
 lmdeploy/serve/gradio/triton_server_backend.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lmdeploy/serve/gradio/api_server_backend.py b/lmdeploy/serve/gradio/api_server_backend.py
index 1c397799e1..acf540d90f 100644
--- a/lmdeploy/serve/gradio/api_server_backend.py
+++ b/lmdeploy/serve/gradio/api_server_backend.py
@@ -22,7 +22,7 @@ def chat_stream_restful(instruction: str, state_chatbot: Sequence,
     Args:
         instruction (str): user's prompt
         state_chatbot (Sequence): the chatting history
-        request (gr.Request): the request from a user
+        session_id (int): the session id
     """
     state_chatbot = state_chatbot + [(instruction, None)]
 
@@ -58,7 +58,7 @@ def reset_restful_func(instruction_txtbox: gr.Textbox, state_chatbot: gr.State,
     Args:
         instruction_txtbox (str): user's prompt
         state_chatbot (Sequence): the chatting history
-        request (gr.Request): the request from a user
+        session_id (int): the session id
     """
     state_chatbot = []
     # end the session
@@ -84,7 +84,7 @@ def cancel_restful_func(state_chatbot: gr.State, cancel_btn: gr.Button,
     Args:
         instruction_txtbox (str): user's prompt
         state_chatbot (Sequence): the chatting history
-        request (gr.Request): the request from a user
+        session_id (int): the session id
     """
     yield (state_chatbot, disable_btn, disable_btn)
     # end the session
diff --git a/lmdeploy/serve/gradio/triton_server_backend.py b/lmdeploy/serve/gradio/triton_server_backend.py
index d8371ee4aa..479f0c9503 100644
--- a/lmdeploy/serve/gradio/triton_server_backend.py
+++ b/lmdeploy/serve/gradio/triton_server_backend.py
@@ -23,7 +23,7 @@ def chat_stream(state_chatbot: Sequence, llama_chatbot: Chatbot,
         llama_chatbot (Chatbot): the instance of a chatbot
         cancel_btn (bool): enable the cancel button or not
         reset_btn (bool): enable the reset button or not
-        request (gr.Request): the request from a user
+        session_id (int): the session id
     """
     instruction = state_chatbot[-1][0]
 

From a9459e63095f360c89b555df3212252ced7b011d Mon Sep 17 00:00:00 2001
From: AllentDan <AllentDan@yeah.net>
Date: Mon, 6 Nov 2023 14:23:21 +0800
Subject: [PATCH 11/11] add lock for safety

---
 lmdeploy/serve/gradio/api_server_backend.py    | 5 ++++-
 lmdeploy/serve/gradio/triton_server_backend.py | 5 ++++-
 lmdeploy/serve/gradio/turbomind_coupled.py     | 5 ++++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/lmdeploy/serve/gradio/api_server_backend.py b/lmdeploy/serve/gradio/api_server_backend.py
index acf540d90f..8dd92fa0fd 100644
--- a/lmdeploy/serve/gradio/api_server_backend.py
+++ b/lmdeploy/serve/gradio/api_server_backend.py
@@ -1,5 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import time
+from threading import Lock
 from typing import Sequence
 
 import gradio as gr
@@ -12,6 +13,7 @@
 class InterFace:
     api_server_url: str = None
     global_session_id: int = 0
+    lock = Lock()
 
 
 def chat_stream_restful(instruction: str, state_chatbot: Sequence,
@@ -167,7 +169,8 @@ def run_api_server(api_server_url: str,
                         cancels=[send_event])
 
         def init():
-            InterFace.global_session_id += 1
+            with InterFace.lock:
+                InterFace.global_session_id += 1
             new_session_id = InterFace.global_session_id
             return new_session_id
 
diff --git a/lmdeploy/serve/gradio/triton_server_backend.py b/lmdeploy/serve/gradio/triton_server_backend.py
index 479f0c9503..9148903cc5 100644
--- a/lmdeploy/serve/gradio/triton_server_backend.py
+++ b/lmdeploy/serve/gradio/triton_server_backend.py
@@ -1,6 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import os
 from functools import partial
+from threading import Lock
 from typing import Sequence
 
 import gradio as gr
@@ -11,6 +12,7 @@
 
 class InterFace:
     global_session_id: int = 0
+    lock = Lock()
 
 
 def chat_stream(state_chatbot: Sequence, llama_chatbot: Chatbot,
@@ -125,7 +127,8 @@ def run_triton_server(triton_server_addr: str,
             cancels=[send_event])
 
         def init():
-            InterFace.global_session_id += 1
+            with InterFace.lock:
+                InterFace.global_session_id += 1
             new_session_id = InterFace.global_session_id
             return new_session_id
 
diff --git a/lmdeploy/serve/gradio/turbomind_coupled.py b/lmdeploy/serve/gradio/turbomind_coupled.py
index 2efdb7db33..e344abcbda 100644
--- a/lmdeploy/serve/gradio/turbomind_coupled.py
+++ b/lmdeploy/serve/gradio/turbomind_coupled.py
@@ -1,4 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+from threading import Lock
 from typing import Sequence
 
 import gradio as gr
@@ -10,6 +11,7 @@
 class InterFace:
     async_engine: AsyncEngine = None
     global_session_id: int = 0
+    lock = Lock()
 
 
 async def chat_stream_local(
@@ -168,7 +170,8 @@ def run_local(model_path: str,
                         cancels=[send_event])
 
         def init():
-            InterFace.global_session_id += 1
+            with InterFace.lock:
+                InterFace.global_session_id += 1
             new_session_id = InterFace.global_session_id
             return new_session_id