From 824ef42c6ceb8724c3cbb2cec913387b3c4a18f7 Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Tue, 21 May 2024 12:41:07 +0800
Subject: [PATCH 01/12] Add function call for oai_std, prepare to access the
 graphical dialogue

---
 request_llms/oai_std_model_template.py | 20 ++++++++++++++------
 shared_utils/cookie_manager.py         |  2 +-
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/request_llms/oai_std_model_template.py b/request_llms/oai_std_model_template.py
index 648dbe41c..f8a89a805 100644
--- a/request_llms/oai_std_model_template.py
+++ b/request_llms/oai_std_model_template.py
@@ -66,7 +66,7 @@ def decode_chunk(chunk):
     return respose, finish_reason
 
 
-def generate_message(input, model, key, history, max_output_token, system_prompt, temperature):
+def generate_message(chatbot, input, model, key, history, max_output_token, system_prompt, temperature):
     """
     整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
     """
@@ -115,7 +115,9 @@ def generate_message(input, model, key, history, max_output_token, system_prompt
 def get_predict_function(
         api_key_conf_name,
         max_output_token,
-        disable_proxy = False
+        disable_proxy = False,
+        encode_call = generate_message,
+        decode_call = decode_chunk
     ):
     """
     为openai格式的API生成响应函数，其中传入参数：
@@ -126,6 +128,10 @@ def get_predict_function(
         ⚠️请不要与模型的最大token数量相混淆。
     disable_proxy：
         是否使用代理，True为不使用，False为使用。
+    encode_call：
+        是否使用自定义的encode函数，如果不是，则使用默认的generate_message
+    decode_call：
+        是否使用自定义的decode函数，如果不是，则使用默认的decode_chunk
     """
 
     APIKEY = get_conf(api_key_conf_name)
@@ -156,7 +162,8 @@ def predict_no_ui_long_connection(
             raise RuntimeError(f"APIKEY为空,请检查配置文件的{APIKEY}")
         if inputs == "":
             inputs = "你好👋"
-        headers, playload = generate_message(
+        headers, playload = encode_call(
+            chatbot=None,
             input=inputs,
             model=llm_kwargs["llm_model"],
             key=APIKEY,
@@ -206,7 +213,7 @@ def predict_no_ui_long_connection(
                 break
             except requests.exceptions.ConnectionError:
                 chunk = next(stream_response)  # 失败了，重试一次？再失败就没办法了。
-            response_text, finish_reason = decode_chunk(chunk)
+            response_text, finish_reason = decode_call(chunk)
             # 返回的数据流第一次为空，继续等待
             if response_text == "" and finish_reason != "False":
                 continue
@@ -289,7 +296,8 @@ def predict(
             )  # 刷新界面
             time.sleep(2)
 
-        headers, playload = generate_message(
+        headers, playload = encode_call(
+            chatbot=chatbot,
             input=inputs,
             model=llm_kwargs["llm_model"],
             key=APIKEY,
@@ -347,7 +355,7 @@ def predict(
                 break
             except requests.exceptions.ConnectionError:
                 chunk = next(stream_response)  # 失败了，重试一次？再失败就没办法了。
-            response_text, finish_reason = decode_chunk(chunk)
+            response_text, finish_reason = decode_call(chunk)
             # 返回的数据流第一次为空，继续等待
             if response_text == "" and finish_reason != "False":
                 continue
diff --git a/shared_utils/cookie_manager.py b/shared_utils/cookie_manager.py
index c0994324c..b5df1e241 100644
--- a/shared_utils/cookie_manager.py
+++ b/shared_utils/cookie_manager.py
@@ -102,7 +102,7 @@ def process_history_cache(history_cache):
     output_list = [txt, txtx]
     input_name_list = ["txt(input)", "txtx(input)"]
     output_name_list = ["txt", "txtx"]
-    js_callback = """(txt, txtx)=>{console.log(txt); console.log(txtx);}"""
+    js_callback = "(txt, txtx)=>{console.log(txt); console.log(txtx);}"
     def function(txt, txtx):
         return "booo", "goooo"
     create_button_with_javascript_callback(btn_value, elem_id, variant, js_callback, input_list, output_list, function, input_name_list, output_name_list)

From 524775502986a1322d7e9de73197dcfcade8fe76 Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Tue, 21 May 2024 13:27:20 +0800
Subject: [PATCH 02/12] Update qwenapi

---
 request_llms/bridge_all.py  |  51 ++++++++++----
 request_llms/bridge_qwen.py | 131 +++++++++++++++++++++---------------
 2 files changed, 114 insertions(+), 68 deletions(-)

diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py
index 06e695835..6e1527c37 100644
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -72,6 +72,7 @@ def decode(self, *args, **kwargs):
 ollama_endpoint = "http://localhost:11434/api/chat"
 yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions"
 deepseekapi_endpoint = "https://api.deepseek.com/v1/chat/completions"
+qwenapi_endpoint = "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions"
 
 if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
 azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
@@ -93,6 +94,7 @@ def decode(self, *args, **kwargs):
 if ollama_endpoint in API_URL_REDIRECT: ollama_endpoint = API_URL_REDIRECT[ollama_endpoint]
 if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint]
 if deepseekapi_endpoint in API_URL_REDIRECT: deepseekapi_endpoint = API_URL_REDIRECT[deepseekapi_endpoint]
+if qwenapi_endpoint in API_URL_REDIRECT: qwenapi_endpoint = API_URL_REDIRECT[qwenapi_endpoint]
 
 # 获取tokenizer
 tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
@@ -640,38 +642,61 @@ def decode(self, *args, **kwargs):
     except:
         print(trimmed_format_exc())
 # -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=-
-if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS:   # zhipuai
+qwen_models = ["qwen-turbo","qwen-plus","qwen-max","qwen-max-longcontext","qwen-long"]
+if any(item in qwen_models for item in AVAIL_LLM_MODELS):
     try:
-        from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
-        from .bridge_qwen import predict as qwen_ui
+        qwen_1500_noui, qwen_1500_ui = get_predict_function(
+            api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=1500, disable_proxy=False
+            )
+        qwen_2000_noui, qwen_2000_ui = get_predict_function(
+            api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=2000, disable_proxy=False
+            )
         model_info.update({
             "qwen-turbo": {
-                "fn_with_ui": qwen_ui,
-                "fn_without_ui": qwen_noui,
+                "fn_with_ui": qwen_1500_ui,
+                "fn_without_ui": qwen_1500_noui,
                 "can_multi_thread": True,
-                "endpoint": None,
+                "endpoint": qwenapi_endpoint,
                 "max_token": 6144,
                 "tokenizer": tokenizer_gpt35,
                 "token_cnt": get_token_num_gpt35,
             },
             "qwen-plus": {
-                "fn_with_ui": qwen_ui,
-                "fn_without_ui": qwen_noui,
+                "fn_with_ui": qwen_2000_ui,
+                "fn_without_ui": qwen_2000_noui,
                 "can_multi_thread": True,
-                "endpoint": None,
+                "endpoint": qwenapi_endpoint,
                 "max_token": 30720,
                 "tokenizer": tokenizer_gpt35,
                 "token_cnt": get_token_num_gpt35,
             },
             "qwen-max": {
-                "fn_with_ui": qwen_ui,
-                "fn_without_ui": qwen_noui,
+                "fn_with_ui": qwen_2000_ui,
+                "fn_without_ui": qwen_2000_noui,
                 "can_multi_thread": True,
-                "endpoint": None,
+                "endpoint": qwenapi_endpoint,
+                "max_token": 6144,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+            "qwen-max-longcontext": {
+                "fn_with_ui": qwen_2000_ui,
+                "fn_without_ui": qwen_2000_noui,
+                "can_multi_thread": True,
+                "endpoint": qwenapi_endpoint,
                 "max_token": 28672,
                 "tokenizer": tokenizer_gpt35,
                 "token_cnt": get_token_num_gpt35,
-            }
+            },
+            "qwen-long": {
+                "fn_with_ui": qwen_2000_ui,
+                "fn_without_ui": qwen_2000_noui,
+                "can_multi_thread": True,
+                "endpoint": qwenapi_endpoint,
+                "max_token": 1000000,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
         })
     except:
         print(trimmed_format_exc())
diff --git a/request_llms/bridge_qwen.py b/request_llms/bridge_qwen.py
index 2b1eeed27..7ee4524bd 100644
--- a/request_llms/bridge_qwen.py
+++ b/request_llms/bridge_qwen.py
@@ -1,66 +1,87 @@
-import time
-import os
-from toolbox import update_ui, get_conf, update_ui_lastest_msg
-from toolbox import check_packages, report_exception
+import json
 
-model_name = 'Qwen'
+timeout_bot_msg = (
+    "[Local Message] Request timeout. Network error. Please check proxy settings in config.py."
+    + "网络错误，检查代理服务器是否可用，以及代理设置的格式是否正确，格式须是[协议]://[地址]:[端口]，缺一不可。"
+)
 
-def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[], sys_prompt:str="",
-                                  observe_window:list=[], console_slience:bool=False):
+def decode_chunk(chunk):
     """
-        ⭐多线程方法
-        函数的说明请见 request_llms/bridge_all.py
+    用于解读"content"和"finish_reason"的内容
     """
-    watch_dog_patience = 5
-    response = ""
-
-    from .com_qwenapi import QwenRequestInstance
-    sri = QwenRequestInstance()
-    for response in sri.generate(inputs, llm_kwargs, history, sys_prompt):
-        if len(observe_window) >= 1:
-            observe_window[0] = response
-        if len(observe_window) >= 2:
-            if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
-    return response
-
-def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
-    """
-        ⭐单线程方法
-        函数的说明请见 request_llms/bridge_all.py
-    """
-    chatbot.append((inputs, ""))
-    yield from update_ui(chatbot=chatbot, history=history)
+    chunk = chunk.decode()
+    respose = ""
+    finish_reason = "False"
+    try:
+        chunk = json.loads(chunk[6:])
+    except:
+        finish_reason = "JSON_ERROR"
+    # 错误处理部分
+    if "error" in chunk:
+        respose = "API_ERROR"
+        try:
+            chunk = json.loads(chunk)
+            finish_reason = chunk["error"]["code"]
+        except:
+            finish_reason = "API_ERROR"
+        return respose, finish_reason
 
-    # 尝试导入依赖，如果缺少依赖，则给出安装建议
     try:
-        check_packages(["dashscope"])
+        respose = chunk["choices"][0]["delta"]["content"]
     except:
-        yield from update_ui_lastest_msg(f"导入软件依赖失败。使用该模型需要额外依赖，安装方法```pip install --upgrade dashscope```。",
-                                         chatbot=chatbot, history=history, delay=0)
-        return
+        pass
+    try:
+        finish_reason = chunk["choices"][0]["finish_reason"]
+    except:
+        pass
+    return respose, finish_reason
+
 
-    # 检查DASHSCOPE_API_KEY
-    if get_conf("DASHSCOPE_API_KEY") == "":
-        yield from update_ui_lastest_msg(f"请配置 DASHSCOPE_API_KEY。",
-                                         chatbot=chatbot, history=history, delay=0)
-        return
+def generate_message(chatbot, input, model, key, history, max_output_token, system_prompt, temperature):
+    """
+    整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
+    """
+    api_key = f"Bearer {key}"
 
-    if additional_fn is not None:
-        from core_functional import handle_core_functionality
-        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
-        chatbot[-1] = (inputs, "")
-        yield from update_ui(chatbot=chatbot, history=history)
+    headers = {"Content-Type": "application/json", "Authorization": api_key}
 
-    # 开始接收回复
-    from .com_qwenapi import QwenRequestInstance
-    sri = QwenRequestInstance()
-    response = f"[Local Message] 等待{model_name}响应中 ..."
-    for response in sri.generate(inputs, llm_kwargs, history, system_prompt):
-        chatbot[-1] = (inputs, response)
-        yield from update_ui(chatbot=chatbot, history=history)
+    conversation_cnt = len(history) // 2
 
-    # 总结输出
-    if response == f"[Local Message] 等待{model_name}响应中 ...":
-        response = f"[Local Message] {model_name}响应异常 ..."
-    history.extend([inputs, response])
-    yield from update_ui(chatbot=chatbot, history=history)
\ No newline at end of file
+    messages = [{"role": "system", "content": system_prompt}]
+    if conversation_cnt:
+        for index in range(0, 2 * conversation_cnt, 2):
+            what_i_have_asked = {}
+            what_i_have_asked["role"] = "user"
+            what_i_have_asked["content"] = history[index]
+            what_gpt_answer = {}
+            what_gpt_answer["role"] = "assistant"
+            what_gpt_answer["content"] = history[index + 1]
+            if what_i_have_asked["content"] != "":
+                if what_gpt_answer["content"] == "":
+                    continue
+                if what_gpt_answer["content"] == timeout_bot_msg:
+                    continue
+                messages.append(what_i_have_asked)
+                messages.append(what_gpt_answer)
+            else:
+                messages[-1]["content"] = what_gpt_answer["content"]
+    what_i_ask_now = {}
+    what_i_ask_now["role"] = "user"
+    what_i_ask_now["content"] = input
+    messages.append(what_i_ask_now)
+    if temperature == 2: temperature -= 1e-5
+    playload = {
+        "model": model,
+        "input": messages,
+        "parameters":{
+            "result_format": "message",
+            "temperature": temperature,
+            "incremental_output": True,
+            "max_tokens": max_output_token,
+        }
+    }
+    try:
+        print(f" {model} : {conversation_cnt} : {input[:100]} ..........")
+    except:
+        print("输入中可能存在乱码。")
+    return headers, playload
\ No newline at end of file

From 3b438df55f55756c3a0c0a3c29053a135bf7fd52 Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Tue, 21 May 2024 13:27:40 +0800
Subject: [PATCH 03/12] No need now

---
 request_llms/bridge_qwen.py | 87 --------------------------------
 request_llms/com_qwenapi.py | 98 -------------------------------------
 2 files changed, 185 deletions(-)
 delete mode 100644 request_llms/bridge_qwen.py
 delete mode 100644 request_llms/com_qwenapi.py

diff --git a/request_llms/bridge_qwen.py b/request_llms/bridge_qwen.py
deleted file mode 100644
index 7ee4524bd..000000000
--- a/request_llms/bridge_qwen.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import json
-
-timeout_bot_msg = (
-    "[Local Message] Request timeout. Network error. Please check proxy settings in config.py."
-    + "网络错误，检查代理服务器是否可用，以及代理设置的格式是否正确，格式须是[协议]://[地址]:[端口]，缺一不可。"
-)
-
-def decode_chunk(chunk):
-    """
-    用于解读"content"和"finish_reason"的内容
-    """
-    chunk = chunk.decode()
-    respose = ""
-    finish_reason = "False"
-    try:
-        chunk = json.loads(chunk[6:])
-    except:
-        finish_reason = "JSON_ERROR"
-    # 错误处理部分
-    if "error" in chunk:
-        respose = "API_ERROR"
-        try:
-            chunk = json.loads(chunk)
-            finish_reason = chunk["error"]["code"]
-        except:
-            finish_reason = "API_ERROR"
-        return respose, finish_reason
-
-    try:
-        respose = chunk["choices"][0]["delta"]["content"]
-    except:
-        pass
-    try:
-        finish_reason = chunk["choices"][0]["finish_reason"]
-    except:
-        pass
-    return respose, finish_reason
-
-
-def generate_message(chatbot, input, model, key, history, max_output_token, system_prompt, temperature):
-    """
-    整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
-    """
-    api_key = f"Bearer {key}"
-
-    headers = {"Content-Type": "application/json", "Authorization": api_key}
-
-    conversation_cnt = len(history) // 2
-
-    messages = [{"role": "system", "content": system_prompt}]
-    if conversation_cnt:
-        for index in range(0, 2 * conversation_cnt, 2):
-            what_i_have_asked = {}
-            what_i_have_asked["role"] = "user"
-            what_i_have_asked["content"] = history[index]
-            what_gpt_answer = {}
-            what_gpt_answer["role"] = "assistant"
-            what_gpt_answer["content"] = history[index + 1]
-            if what_i_have_asked["content"] != "":
-                if what_gpt_answer["content"] == "":
-                    continue
-                if what_gpt_answer["content"] == timeout_bot_msg:
-                    continue
-                messages.append(what_i_have_asked)
-                messages.append(what_gpt_answer)
-            else:
-                messages[-1]["content"] = what_gpt_answer["content"]
-    what_i_ask_now = {}
-    what_i_ask_now["role"] = "user"
-    what_i_ask_now["content"] = input
-    messages.append(what_i_ask_now)
-    if temperature == 2: temperature -= 1e-5
-    playload = {
-        "model": model,
-        "input": messages,
-        "parameters":{
-            "result_format": "message",
-            "temperature": temperature,
-            "incremental_output": True,
-            "max_tokens": max_output_token,
-        }
-    }
-    try:
-        print(f" {model} : {conversation_cnt} : {input[:100]} ..........")
-    except:
-        print("输入中可能存在乱码。")
-    return headers, playload
\ No newline at end of file
diff --git a/request_llms/com_qwenapi.py b/request_llms/com_qwenapi.py
deleted file mode 100644
index 2cde52c1b..000000000
--- a/request_llms/com_qwenapi.py
+++ /dev/null
@@ -1,98 +0,0 @@
-from http import HTTPStatus
-from toolbox import get_conf
-import threading
-import logging
-
-timeout_bot_msg = '[Local Message] Request timeout. Network error.'
-
-class QwenRequestInstance():
-    def __init__(self):
-        import dashscope
-        self.time_to_yield_event = threading.Event()
-        self.time_to_exit_event = threading.Event()
-        self.result_buf = ""
-
-        def validate_key():
-            DASHSCOPE_API_KEY = get_conf("DASHSCOPE_API_KEY")
-            if DASHSCOPE_API_KEY == '': return False
-            return True
-
-        if not validate_key():
-            raise RuntimeError('请配置 DASHSCOPE_API_KEY')
-        dashscope.api_key = get_conf("DASHSCOPE_API_KEY")
-
-
-    def generate(self, inputs, llm_kwargs, history, system_prompt):
-        # import _thread as thread
-        from dashscope import Generation
-        QWEN_MODEL = {
-            'qwen-turbo': Generation.Models.qwen_turbo,
-            'qwen-plus': Generation.Models.qwen_plus,
-            'qwen-max': Generation.Models.qwen_max,
-        }[llm_kwargs['llm_model']]
-        top_p = llm_kwargs.get('top_p', 0.8)
-        if top_p == 0: top_p += 1e-5
-        if top_p == 1: top_p -= 1e-5
-
-        self.result_buf = ""
-        responses = Generation.call(
-            model=QWEN_MODEL,
-            messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt),
-            top_p=top_p,
-            temperature=llm_kwargs.get('temperature', 1.0),
-            result_format='message',
-            stream=True,
-            incremental_output=True
-        )
-
-        for response in responses:
-            if response.status_code == HTTPStatus.OK:
-                if response.output.choices[0].finish_reason == 'stop':
-                    try:
-                        self.result_buf += response.output.choices[0].message.content
-                    except:
-                        pass
-                    yield self.result_buf
-                    break
-                elif response.output.choices[0].finish_reason == 'length':
-                    self.result_buf += "[Local Message] 生成长度过长，后续输出被截断"
-                    yield self.result_buf
-                    break
-                else:
-                    self.result_buf += response.output.choices[0].message.content
-                    yield self.result_buf
-            else:
-                self.result_buf += f"[Local Message] 请求错误：状态码：{response.status_code}，错误码:{response.code}，消息：{response.message}"
-                yield self.result_buf
-                break
-        logging.info(f'[raw_input] {inputs}')
-        logging.info(f'[response] {self.result_buf}')
-        return self.result_buf
-
-
-def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
-    conversation_cnt = len(history) // 2
-    if system_prompt == '': system_prompt = 'Hello!'
-    messages = [{"role": "user", "content": system_prompt}, {"role": "assistant", "content": "Certainly!"}]
-    if conversation_cnt:
-        for index in range(0, 2*conversation_cnt, 2):
-            what_i_have_asked = {}
-            what_i_have_asked["role"] = "user"
-            what_i_have_asked["content"] = history[index]
-            what_gpt_answer = {}
-            what_gpt_answer["role"] = "assistant"
-            what_gpt_answer["content"] = history[index+1]
-            if what_i_have_asked["content"] != "":
-                if what_gpt_answer["content"] == "":
-                    continue
-                if what_gpt_answer["content"] == timeout_bot_msg:
-                    continue
-                messages.append(what_i_have_asked)
-                messages.append(what_gpt_answer)
-            else:
-                messages[-1]['content'] = what_gpt_answer['content']
-    what_i_ask_now = {}
-    what_i_ask_now["role"] = "user"
-    what_i_ask_now["content"] = inputs
-    messages.append(what_i_ask_now)
-    return messages

From 0d775f2fcafa6c4c4ebfd280afb6bd4168ed3f48 Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Tue, 21 May 2024 13:31:45 +0800
Subject: [PATCH 04/12] Update config

---
 config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config.py b/config.py
index ad206d022..1866b2a36 100644
--- a/config.py
+++ b/config.py
@@ -43,7 +43,7 @@
 # AVAIL_LLM_MODELS = [
 #   "qianfan", "deepseekcoder",
 #   "spark", "sparkv2", "sparkv3", "sparkv3.5",
-#   "qwen-turbo", "qwen-plus", "qwen-max", "qwen-local",
+#   "qwen-turbo", "qwen-plus", "qwen-max", "qwen-max-longcontext", "qwen-long", "qwen-local",
 #   "moonshot-v1-128k", "moonshot-v1-32k", "moonshot-v1-8k",
 #   "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0125", "gpt-4o-2024-05-13"
 #   "claude-3-haiku-20240307","claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2",
@@ -127,7 +127,7 @@
 QWEN_LOCAL_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8"
 
 
-# 接入通义千问在线大模型 https://dashscope.console.aliyun.com/
+# 接入通义千问在线大模型 https://bailian.console.aliyun.com/
 DASHSCOPE_API_KEY = "" # 阿里灵积云API_KEY
 
 

From 661da50c168cdb35898367d19d070e737f026f42 Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Thu, 30 May 2024 15:24:57 +0800
Subject: [PATCH 05/12] Add support for one-api version model

---
 request_llms/bridge_all.py      |  30 +++++++++
 request_llms/oai_version_std.py | 105 ++++++++++++++++++++++++++++++++
 2 files changed, 135 insertions(+)
 create mode 100644 request_llms/oai_version_std.py

diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py
index 6e1527c37..3068a861a 100644
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -940,6 +940,36 @@ def decode(self, *args, **kwargs):
             "token_cnt": get_token_num_gpt35,
         },
     })
+# -=-=-=-=-=-=- one-api-version 对齐支持 -=-=-=-=-=-=-
+for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-version-")]:
+    # 为了更灵活地接入one-api多模型管理界面，设计了此接口，例子：AVAIL_LLM_MODELS = ["one-api-version-gpt-4o(max_token=32000)"]
+    # 其中
+    #   "one-api-version-"          是前缀（必要）
+    #   "gpt-4o"      是模型名（必要）
+    #   "(max_token=32000)"  是配置（非必要）
+    try:
+        _, max_token_tmp = read_one_api_model_name(model)
+    except:
+        print(f"one-api-version模型 {model} 的 max_token 配置不是整数，请检查配置文件。")
+        continue
+    try:
+        from .oai_version_std import generate_message_version
+        one_api_version_noui, one_api_version_ui = get_predict_function(
+            api_key_conf_name="API_KEY", max_output_token=4000, disable_proxy=False, encode_call=generate_message_version
+            )
+        model_info.update({
+            model: {
+                "fn_with_ui": one_api_version_ui,
+                "fn_without_ui": one_api_version_noui,
+                "can_multi_thread": True,
+                "endpoint": openai_endpoint,
+                "max_token": max_token_tmp,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+        })
+    except:
+        print(trimmed_format_exc())
 # -=-=-=-=-=-=- vllm 对齐支持 -=-=-=-=-=-=-
 for model in [m for m in AVAIL_LLM_MODELS if m.startswith("vllm-")]:
     # 为了更灵活地接入vllm多模型管理界面，设计了此接口，例子：AVAIL_LLM_MODELS = ["vllm-/home/hmp/llm/cache/Qwen1___5-32B-Chat(max_token=6666)"]
diff --git a/request_llms/oai_version_std.py b/request_llms/oai_version_std.py
new file mode 100644
index 000000000..7bbae0c29
--- /dev/null
+++ b/request_llms/oai_version_std.py
@@ -0,0 +1,105 @@
+from toolbox import update_ui, encode_image, every_image_file_in_path ,read_one_api_model_name
+import os
+
+timeout_bot_msg = (
+    "[Local Message] Request timeout. Network error. Please check proxy settings in config.py."
+    + "网络错误，检查代理服务器是否可用，以及代理设置的格式是否正确，格式须是[协议]://[地址]:[端口]，缺一不可。"
+)
+
+
+def multiple_picture_types(image_paths):
+    """
+    根据图片类型返回image/jpeg, image/png, image/gif, image/webp，无法判断则返回image/jpeg
+    """
+    for image_path in image_paths:
+        if image_path.endswith(".jpeg") or image_path.endswith(".jpg"):
+            return "image/jpeg"
+        elif image_path.endswith(".png"):
+            return "image/png"
+        elif image_path.endswith(".gif"):
+            return "image/gif"
+        elif image_path.endswith(".webp"):
+            return "image/webp"
+    return "image/jpeg"
+
+
+def generate_message_version(
+    chatbot, input, model, key, history, max_output_token, system_prompt, temperature
+):
+    """
+    整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
+    """
+    if chatbot != None:
+        have_recent_file, image_paths = every_image_file_in_path(chatbot)
+    else:
+        have_recent_file = False
+        image_paths = []
+    conversation_cnt = len(history) // 2
+    messages = [
+        {"role": "system", "content": [{"type": "text", "text": system_prompt}]}
+    ]
+
+    def make_media_input(inputs, image_paths):
+        for image_path in image_paths:
+            inputs = (
+                inputs
+                + f'<br/><br/><div align="center"><img src="file={os.path.abspath(image_path)}"></div>'
+            )
+        return inputs
+
+    if have_recent_file and chatbot != None:
+        chatbot.append((make_media_input(input, image_paths), ""))
+    if conversation_cnt:
+        for index in range(0, 2 * conversation_cnt, 2):
+            what_i_have_asked = {}
+            what_i_have_asked["role"] = "user"
+            what_i_have_asked["content"] = [{"type": "text", "text": history[index]}]
+            what_gpt_answer = {}
+            what_gpt_answer["role"] = "assistant"
+            what_gpt_answer["content"] = [{"type": "text", "text": history[index + 1]}]
+            if what_i_have_asked["content"][0]["text"] != "":
+                if what_i_have_asked["content"][0]["text"] == "":
+                    continue
+                if what_i_have_asked["content"][0]["text"] == timeout_bot_msg:
+                    continue
+                messages.append(what_i_have_asked)
+                messages.append(what_gpt_answer)
+            else:
+                messages[-1]["content"][0]["text"] = what_gpt_answer["content"][0][
+                    "text"
+                ]
+
+    what_i_ask_now = {}
+    what_i_ask_now["role"] = "user"
+    what_i_ask_now["content"] = []
+    if have_recent_file:
+        for image_path in image_paths:
+            what_i_ask_now["content"].append(
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:{multiple_picture_types(image_path)};base64,{encode_image(image_path)}"
+                    },
+                }
+            )
+    what_i_ask_now["content"].append({"type": "text", "text": input})
+
+    messages.append(what_i_ask_now)
+    # 开始整理headers与message
+    api_key = f"Bearer {key}"
+    headers = {"Content-Type": "application/json", "Authorization": api_key}
+    if model.startswith("one-api-version-"):
+        model,_ = read_one_api_model_name(model)
+        model = model.replace("one-api-version-", "")
+    playload = {
+        "model": model,
+        "messages": messages,
+        "temperature": temperature,
+        "stream": True,
+        "max_tokens": max_output_token,
+    }
+    try:
+        print(f" {model} : {conversation_cnt} : {input[:100]} ..........")
+    except:
+        print("输入中可能存在乱码。")
+    return headers, playload

From 69b64948caf2176aa6e7effae83c562908da8fd8 Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Thu, 30 May 2024 16:37:02 +0800
Subject: [PATCH 06/12] Solve the problem input will show twice...

---
 request_llms/oai_version_std.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/request_llms/oai_version_std.py b/request_llms/oai_version_std.py
index 7bbae0c29..9d53149d2 100644
--- a/request_llms/oai_version_std.py
+++ b/request_llms/oai_version_std.py
@@ -39,16 +39,16 @@ def generate_message_version(
         {"role": "system", "content": [{"type": "text", "text": system_prompt}]}
     ]
 
-    def make_media_input(inputs, image_paths):
-        for image_path in image_paths:
-            inputs = (
-                inputs
-                + f'<br/><br/><div align="center"><img src="file={os.path.abspath(image_path)}"></div>'
-            )
-        return inputs
+    # def make_media_input(inputs, image_paths):
+    #     for image_path in image_paths:
+    #         inputs = (
+    #             inputs
+    #             + f'<br/><br/><div align="center"><img src="file={os.path.abspath(image_path)}"></div>'
+    #         )
+    #     return inputs
 
-    if have_recent_file and chatbot != None:
-        chatbot.append((make_media_input(input, image_paths), ""))
+    # if have_recent_file and chatbot != None:
+    #     chatbot.append((make_media_input(input, image_paths), ""))
     if conversation_cnt:
         for index in range(0, 2 * conversation_cnt, 2):
             what_i_have_asked = {}

From 829f07f0a2ed6d2c738a29d74b3e99c94dc8d244 Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Thu, 30 May 2024 17:15:02 +0800
Subject: [PATCH 07/12] Add support for groq

---
 config.py                  |  9 +++++++
 request_llms/bridge_all.py | 50 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/config.py b/config.py
index 1866b2a36..d278b0f9e 100644
--- a/config.py
+++ b/config.py
@@ -49,12 +49,15 @@
 #   "claude-3-haiku-20240307","claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2",
 #   "moss", "llama2", "chatglm_onnx", "internlm", "jittorllms_pangualpha", "jittorllms_llama",
 #   "deepseek-chat" ,"deepseek-coder",
+#   "llama3-8b-8192", "gemma-7b-it", "mixtral-8x7b-32768", "llama3-70b-8192",
 #   "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview",
 # ]
 # --- --- --- ---
 # 此外，您还可以在接入one-api/vllm/ollama时，
 # 使用"one-api-*","vllm-*","ollama-*"前缀直接使用非标准方式接入的模型，例如
 # AVAIL_LLM_MODELS = ["one-api-claude-3-sonnet-20240229(max_token=100000)", "ollama-phi3(max_token=4096)"]
+# 在接入多模态模型时，可以使用"one-api-version-*"前缀接入，例如
+# AVAIL_LLM_MODELS = ["one-api-version-gpt-4o(max_token=32000)"]
 # --- --- --- ---
 
 
@@ -229,14 +232,20 @@
 # 零一万物(Yi Model) API KEY
 YIMODEL_API_KEY = ""
 
+
 # 深度求索(DeepSeek) API KEY，默认请求地址为"https://api.deepseek.com/v1/chat/completions"
 DEEPSEEK_API_KEY = ""
 
+
 # Mathpix 拥有执行PDF的OCR功能，但是需要注册账号
 MATHPIX_APPID = ""
 MATHPIX_APPKEY = ""
 
 
+# Groq API KEY，默认请求地址为"https://api.groq.com/openai/v1/chat/completions"
+GROQ_API_KEY = ""
+
+
 # DOC2X的PDF解析服务，注册账号并获取API KEY: https://doc2x.noedgeai.com/login
 DOC2X_API_KEY = ""
 
diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py
index 3068a861a..21b4a526e 100644
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -73,6 +73,7 @@ def decode(self, *args, **kwargs):
 yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions"
 deepseekapi_endpoint = "https://api.deepseek.com/v1/chat/completions"
 qwenapi_endpoint = "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions"
+groq_endpoint = "https://api.groq.com/openai/v1/chat/completions"
 
 if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
 azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
@@ -95,6 +96,7 @@ def decode(self, *args, **kwargs):
 if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint]
 if deepseekapi_endpoint in API_URL_REDIRECT: deepseekapi_endpoint = API_URL_REDIRECT[deepseekapi_endpoint]
 if qwenapi_endpoint in API_URL_REDIRECT: qwenapi_endpoint = API_URL_REDIRECT[qwenapi_endpoint]
+if groq_endpoint in API_URL_REDIRECT: groq_endpoint = API_URL_REDIRECT[groq_endpoint]
 
 # 获取tokenizer
 tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
@@ -917,6 +919,52 @@ def decode(self, *args, **kwargs):
         })
     except:
         print(trimmed_format_exc())
+# -=-=-=-=-=-=- groq -=-=-=-=-=-=-
+groq_models = ["llama3-8b-8192", "gemma-7b-it", "mixtral-8x7b-32768", "llama3-70b-8192"]
+if any(item in groq_models for item in AVAIL_LLM_MODELS):
+    try:
+        groq_8k_noui, groq_8k_ui = get_predict_function(
+            api_key_conf_name="GROQ_API_KEY", max_output_token=8192, disable_proxy=False
+            )
+        groq_32k_noui, groq_32k_ui = get_predict_function(
+            api_key_conf_name="GROQ_API_KEY", max_output_token=32768, disable_proxy=False
+            )
+        model_info.update({
+            "llama3-8b-8192": {
+                "fn_with_ui": groq_8k_ui,
+                "fn_without_ui": groq_8k_noui,
+                "endpoint": groq_endpoint,
+                "max_token": 8192,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+            "gemma-7b-it": {
+                "fn_with_ui": groq_8k_ui,
+                "fn_without_ui": groq_8k_noui,
+                "endpoint": groq_endpoint,
+                "max_token": 8192,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+            "mixtral-8x7b-32768": {
+                "fn_with_ui": groq_32k_ui,
+                "fn_without_ui": groq_32k_noui,
+                "endpoint": groq_endpoint,
+                "max_token": 32768,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+            "llama3-70b-8192": {
+                "fn_with_ui": groq_8k_ui,
+                "fn_without_ui": groq_8k_noui,
+                "endpoint": groq_endpoint,
+                "max_token": 8192,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+        })
+    except:
+        print(trimmed_format_exc())
 # -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=-
 for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
     # 为了更灵活地接入one-api多模型管理界面，设计了此接口，例子：AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"]
@@ -942,7 +990,7 @@ def decode(self, *args, **kwargs):
     })
 # -=-=-=-=-=-=- one-api-version 对齐支持 -=-=-=-=-=-=-
 for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-version-")]:
-    # 为了更灵活地接入one-api多模型管理界面，设计了此接口，例子：AVAIL_LLM_MODELS = ["one-api-version-gpt-4o(max_token=32000)"]
+    # 为了更灵活地接入one-api多模型管理界面中的多模态模型，设计了此接口，例子：AVAIL_LLM_MODELS = ["one-api-version-gpt-4o(max_token=32000)"]
     # 其中
     #   "one-api-version-"          是前缀（必要）
     #   "gpt-4o"      是模型名（必要）

From 0dd8647466557dac862da8ad2d6ad31f0705087a Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Thu, 30 May 2024 17:26:23 +0800
Subject: [PATCH 08/12] Add support for yi-vision

---
 config.py                         |  2 +-
 request_llms/bridge_all.py        | 19 ++++++-
 request_llms/bridge_yi_version.py | 88 +++++++++++++++++++++++++++++++
 3 files changed, 107 insertions(+), 2 deletions(-)
 create mode 100644 request_llms/bridge_yi_version.py

diff --git a/config.py b/config.py
index d278b0f9e..14bafb494 100644
--- a/config.py
+++ b/config.py
@@ -50,7 +50,7 @@
 #   "moss", "llama2", "chatglm_onnx", "internlm", "jittorllms_pangualpha", "jittorllms_llama",
 #   "deepseek-chat" ,"deepseek-coder",
 #   "llama3-8b-8192", "gemma-7b-it", "mixtral-8x7b-32768", "llama3-70b-8192",
-#   "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview",
+#   "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview","yi-vision"
 # ]
 # --- --- --- ---
 # 此外，您还可以在接入one-api/vllm/ollama时，
diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py
index 21b4a526e..30c25940d 100644
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -703,7 +703,7 @@ def decode(self, *args, **kwargs):
     except:
         print(trimmed_format_exc())
 # -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=-
-yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview"]
+yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview","yi-vision"]
 if any(item in yi_models for item in AVAIL_LLM_MODELS):
     try:
         yimodel_4k_noui, yimodel_4k_ui = get_predict_function(
@@ -715,6 +715,23 @@ def decode(self, *args, **kwargs):
         yimodel_200k_noui, yimodel_200k_ui = get_predict_function(
             api_key_conf_name="YIMODEL_API_KEY", max_output_token=4096, disable_proxy=False
             )
+        if "yi-vision" in AVAIL_LLM_MODELS:
+            from .bridge_yi_version import yi_version_generate_message_version
+            yimodel_version_noui, yimodel_version_ui = get_predict_function(
+                api_key_conf_name="YIMODEL_API_KEY", max_output_token=600, disable_proxy=False, encode_call=yi_version_generate_message_version
+                )
+            model_info.update({
+                "yi-vision": {
+                    "fn_with_ui": yimodel_version_ui,
+                    "fn_without_ui": yimodel_version_noui,
+                    "can_multi_thread": True,
+                    "endpoint": yimodel_endpoint,
+                    "max_token": 4000,
+                    "tokenizer": tokenizer_gpt35,
+                    "token_cnt": get_token_num_gpt35,
+                }
+            })
+            
         model_info.update({
             "yi-34b-chat-0205": {
                 "fn_with_ui": yimodel_4k_ui,
diff --git a/request_llms/bridge_yi_version.py b/request_llms/bridge_yi_version.py
new file mode 100644
index 000000000..eca0f9150
--- /dev/null
+++ b/request_llms/bridge_yi_version.py
@@ -0,0 +1,88 @@
+from toolbox import encode_image, every_image_file_in_path ,read_one_api_model_name
+from .oai_version_std import multiple_picture_types
+
+timeout_bot_msg = (
+    "[Local Message] Request timeout. Network error. Please check proxy settings in config.py."
+    + "网络错误，检查代理服务器是否可用，以及代理设置的格式是否正确，格式须是[协议]://[地址]:[端口]，缺一不可。"
+)
+
+def yi_version_generate_message_version(
+    chatbot, input, model, key, history, max_output_token, system_prompt, temperature
+):
+    """
+    整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
+    """
+    if chatbot != None:
+        have_recent_file, image_paths = every_image_file_in_path(chatbot)
+    else:
+        have_recent_file = False
+        image_paths = []
+    conversation_cnt = len(history) // 2
+    messages = []
+    input = system_prompt + "\n" + input
+
+    # def make_media_input(inputs, image_paths):
+    #     for image_path in image_paths:
+    #         inputs = (
+    #             inputs
+    #             + f'<br/><br/><div align="center"><img src="file={os.path.abspath(image_path)}"></div>'
+    #         )
+    #     return inputs
+
+    # if have_recent_file and chatbot != None:
+    #     chatbot.append((make_media_input(input, image_paths), ""))
+
+    if conversation_cnt:
+        for index in range(0, 2 * conversation_cnt, 2):
+            what_i_have_asked = {}
+            what_i_have_asked["role"] = "user"
+            what_i_have_asked["content"] = [{"type": "text", "text": history[index]}]
+            what_gpt_answer = {}
+            what_gpt_answer["role"] = "assistant"
+            what_gpt_answer["content"] = [{"type": "text", "text": history[index + 1]}]
+            if what_i_have_asked["content"][0]["text"] != "":
+                if what_i_have_asked["content"][0]["text"] == "":
+                    continue
+                if what_i_have_asked["content"][0]["text"] == timeout_bot_msg:
+                    continue
+                messages.append(what_i_have_asked)
+                messages.append(what_gpt_answer)
+            else:
+                messages[-1]["content"][0]["text"] = what_gpt_answer["content"][0][
+                    "text"
+                ]
+
+    what_i_ask_now = {}
+    what_i_ask_now["role"] = "user"
+    what_i_ask_now["content"] = []
+    if have_recent_file:
+        for image_path in image_paths:
+            what_i_ask_now["content"].append(
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:{multiple_picture_types(image_path)};base64,{encode_image(image_path)}"
+                    },
+                }
+            )
+    what_i_ask_now["content"].append({"type": "text", "text": input})
+
+    messages.append(what_i_ask_now)
+    # 开始整理headers与message
+    api_key = f"Bearer {key}"
+    headers = {"Content-Type": "application/json", "Authorization": api_key}
+    if model.startswith("one-api-version-"):
+        model,_ = read_one_api_model_name(model)
+        model = model.replace("one-api-version-", "")
+    playload = {
+        "model": model,
+        "messages": messages,
+        "temperature": temperature,
+        "stream": True,
+        "max_tokens": max_output_token,
+    }
+    try:
+        print(f" {model} : {conversation_cnt} : {input[:100]} ..........")
+    except:
+        print("输入中可能存在乱码。")
+    return headers, playload

From 5d17f0cb71d66521b74a557acf7996b48e604697 Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Thu, 30 May 2024 17:33:01 +0800
Subject: [PATCH 09/12] Fix spelling errors

---
 config.py                                          |  4 ++--
 request_llms/bridge_all.py                         | 14 +++++++-------
 .../{bridge_yi_version.py => bridge_yi_vision.py}  |  6 +++---
 .../{oai_version_std.py => oai_vision_std.py}      |  4 ++--
 4 files changed, 14 insertions(+), 14 deletions(-)
 rename request_llms/{bridge_yi_version.py => bridge_yi_vision.py} (95%)
 rename request_llms/{oai_version_std.py => oai_vision_std.py} (97%)

diff --git a/config.py b/config.py
index 14bafb494..e44ab331f 100644
--- a/config.py
+++ b/config.py
@@ -56,8 +56,8 @@
 # 此外，您还可以在接入one-api/vllm/ollama时，
 # 使用"one-api-*","vllm-*","ollama-*"前缀直接使用非标准方式接入的模型，例如
 # AVAIL_LLM_MODELS = ["one-api-claude-3-sonnet-20240229(max_token=100000)", "ollama-phi3(max_token=4096)"]
-# 在接入多模态模型时，可以使用"one-api-version-*"前缀接入，例如
-# AVAIL_LLM_MODELS = ["one-api-version-gpt-4o(max_token=32000)"]
+# 在接入多模态模型时，可以使用"one-api-vision-*"前缀接入，例如
+# AVAIL_LLM_MODELS = ["one-api-vision-gpt-4o(max_token=32000)"]
 # --- --- --- ---
 
 
diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py
index 30c25940d..7f7ecaebe 100644
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -716,7 +716,7 @@ def decode(self, *args, **kwargs):
             api_key_conf_name="YIMODEL_API_KEY", max_output_token=4096, disable_proxy=False
             )
         if "yi-vision" in AVAIL_LLM_MODELS:
-            from .bridge_yi_version import yi_version_generate_message_version
+            from .bridge_yi_vision import yi_version_generate_message_version
             yimodel_version_noui, yimodel_version_ui = get_predict_function(
                 api_key_conf_name="YIMODEL_API_KEY", max_output_token=600, disable_proxy=False, encode_call=yi_version_generate_message_version
                 )
@@ -1005,20 +1005,20 @@ def decode(self, *args, **kwargs):
             "token_cnt": get_token_num_gpt35,
         },
     })
-# -=-=-=-=-=-=- one-api-version 对齐支持 -=-=-=-=-=-=-
-for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-version-")]:
-    # 为了更灵活地接入one-api多模型管理界面中的多模态模型，设计了此接口，例子：AVAIL_LLM_MODELS = ["one-api-version-gpt-4o(max_token=32000)"]
+# -=-=-=-=-=-=- one-api-vision 对齐支持 -=-=-=-=-=-=-
+for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-vision-")]:
+    # 为了更灵活地接入one-api多模型管理界面中的多模态模型，设计了此接口，例子：AVAIL_LLM_MODELS = ["one-api-vision-gpt-4o(max_token=32000)"]
     # 其中
-    #   "one-api-version-"          是前缀（必要）
+    #   "one-api-vision-"          是前缀（必要）
     #   "gpt-4o"      是模型名（必要）
     #   "(max_token=32000)"  是配置（非必要）
     try:
         _, max_token_tmp = read_one_api_model_name(model)
     except:
-        print(f"one-api-version模型 {model} 的 max_token 配置不是整数，请检查配置文件。")
+        print(f"one-api-vision模型 {model} 的 max_token 配置不是整数，请检查配置文件。")
         continue
     try:
-        from .oai_version_std import generate_message_version
+        from .oai_vision_std import generate_message_version
         one_api_version_noui, one_api_version_ui = get_predict_function(
             api_key_conf_name="API_KEY", max_output_token=4000, disable_proxy=False, encode_call=generate_message_version
             )
diff --git a/request_llms/bridge_yi_version.py b/request_llms/bridge_yi_vision.py
similarity index 95%
rename from request_llms/bridge_yi_version.py
rename to request_llms/bridge_yi_vision.py
index eca0f9150..d0cdef53f 100644
--- a/request_llms/bridge_yi_version.py
+++ b/request_llms/bridge_yi_vision.py
@@ -1,5 +1,5 @@
 from toolbox import encode_image, every_image_file_in_path ,read_one_api_model_name
-from .oai_version_std import multiple_picture_types
+from .oai_vision_std import multiple_picture_types
 
 timeout_bot_msg = (
     "[Local Message] Request timeout. Network error. Please check proxy settings in config.py."
@@ -71,9 +71,9 @@ def yi_version_generate_message_version(
     # 开始整理headers与message
     api_key = f"Bearer {key}"
     headers = {"Content-Type": "application/json", "Authorization": api_key}
-    if model.startswith("one-api-version-"):
+    if model.startswith("one-api-vision-"):
         model,_ = read_one_api_model_name(model)
-        model = model.replace("one-api-version-", "")
+        model = model.replace("one-api-vision-", "")
     playload = {
         "model": model,
         "messages": messages,
diff --git a/request_llms/oai_version_std.py b/request_llms/oai_vision_std.py
similarity index 97%
rename from request_llms/oai_version_std.py
rename to request_llms/oai_vision_std.py
index 9d53149d2..36db27c25 100644
--- a/request_llms/oai_version_std.py
+++ b/request_llms/oai_vision_std.py
@@ -88,9 +88,9 @@ def generate_message_version(
     # 开始整理headers与message
     api_key = f"Bearer {key}"
     headers = {"Content-Type": "application/json", "Authorization": api_key}
-    if model.startswith("one-api-version-"):
+    if model.startswith("one-api-vision-"):
         model,_ = read_one_api_model_name(model)
-        model = model.replace("one-api-version-", "")
+        model = model.replace("one-api-vision-", "")
     playload = {
         "model": model,
         "messages": messages,

From a2ed536b818b195a5c1f420868fd91007fb183b0 Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Thu, 27 Jun 2024 21:02:46 +0800
Subject: [PATCH 10/12] Add support for qwen-2 and some bug fix

---
 config.py                              |  1 +
 request_llms/bridge_all.py             | 54 +++++++++++++++++++++++++-
 request_llms/oai_std_model_template.py |  4 +-
 3 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/config.py b/config.py
index e44ab331f..eedac2837 100644
--- a/config.py
+++ b/config.py
@@ -44,6 +44,7 @@
 #   "qianfan", "deepseekcoder",
 #   "spark", "sparkv2", "sparkv3", "sparkv3.5",
 #   "qwen-turbo", "qwen-plus", "qwen-max", "qwen-max-longcontext", "qwen-long", "qwen-local",
+#   "qwen2-72b-instruct","qwen2-57b-a14b-instruct","qwen2-7b-instruct","qwen1.5-110b-chat",
 #   "moonshot-v1-128k", "moonshot-v1-32k", "moonshot-v1-8k",
 #   "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0125", "gpt-4o-2024-05-13"
 #   "claude-3-haiku-20240307","claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2",
diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py
index 7f7ecaebe..ea7612889 100644
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -644,7 +644,17 @@ def decode(self, *args, **kwargs):
     except:
         print(trimmed_format_exc())
 # -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=-
-qwen_models = ["qwen-turbo","qwen-plus","qwen-max","qwen-max-longcontext","qwen-long"]
+qwen_models = [
+    "qwen-turbo",
+    "qwen-plus",
+    "qwen-max",
+    "qwen-max-longcontext",
+    "qwen-long",
+    "qwen2-72b-instruct",
+    "qwen2-57b-a14b-instruct",
+    "qwen2-7b-instruct",
+    "qwen1.5-110b-chat",
+]
 if any(item in qwen_models for item in AVAIL_LLM_MODELS):
     try:
         qwen_1500_noui, qwen_1500_ui = get_predict_function(
@@ -653,6 +663,12 @@ def decode(self, *args, **kwargs):
         qwen_2000_noui, qwen_2000_ui = get_predict_function(
             api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=2000, disable_proxy=False
             )
+        qwen_6144_noui, qwen_6144_ui = get_predict_function(
+            api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=6144, disable_proxy=False
+            )
+        qwen_8000_noui, qwen_8000_ui = get_predict_function(
+            api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=8000, disable_proxy=False
+            )
         model_info.update({
             "qwen-turbo": {
                 "fn_with_ui": qwen_1500_ui,
@@ -699,6 +715,42 @@ def decode(self, *args, **kwargs):
                 "tokenizer": tokenizer_gpt35,
                 "token_cnt": get_token_num_gpt35,
             },
+            "qwen2-72b-instruct": {
+                "fn_with_ui": qwen_6144_ui,
+                "fn_without_ui": qwen_6144_noui,
+                "can_multi_thread": True,
+                "endpoint": qwenapi_endpoint,
+                "max_token": 128000,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+            "qwen2-57b-a14b-instruct": {
+                "fn_with_ui": qwen_6144_ui,
+                "fn_without_ui": qwen_6144_noui,
+                "can_multi_thread": True,
+                "endpoint": qwenapi_endpoint,
+                "max_token": 30720,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+            "qwen2-7b-instruct": {
+                "fn_with_ui": qwen_6144_ui,
+                "fn_without_ui": qwen_6144_noui,
+                "can_multi_thread": True,
+                "endpoint": qwenapi_endpoint,
+                "max_token": 128000,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+            "qwen1.5-110b-chat": {
+                "fn_with_ui": qwen_8000_ui,
+                "fn_without_ui": qwen_8000_noui,
+                "can_multi_thread": True,
+                "endpoint": qwenapi_endpoint,
+                "max_token": 32000,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
         })
     except:
         print(trimmed_format_exc())
diff --git a/request_llms/oai_std_model_template.py b/request_llms/oai_std_model_template.py
index f8a89a805..e3bdbbb97 100644
--- a/request_llms/oai_std_model_template.py
+++ b/request_llms/oai_std_model_template.py
@@ -140,7 +140,7 @@ def predict_no_ui_long_connection(
         inputs,
         llm_kwargs,
         history=[],
-        sys_prompt="",
+        sys_prompt="Serve me as a writing and programming assistant.",
         observe_window=None,
         console_slience=False,
     ):
@@ -256,7 +256,7 @@ def predict(
         plugin_kwargs,
         chatbot,
         history=[],
-        system_prompt="",
+        system_prompt="Serve me as a writing and programming assistant.",
         stream=True,
         additional_fn=None,
     ):

From 4bf350cea7a50fb197acea66aab4d34f086443c3 Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Thu, 27 Jun 2024 21:06:34 +0800
Subject: [PATCH 11/12] Some bug fix for some models

---
 request_llms/oai_std_model_template.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/request_llms/oai_std_model_template.py b/request_llms/oai_std_model_template.py
index e3bdbbb97..647ce3c72 100644
--- a/request_llms/oai_std_model_template.py
+++ b/request_llms/oai_std_model_template.py
@@ -66,7 +66,9 @@ def decode_chunk(chunk):
     return respose, finish_reason
 
 
-def generate_message(chatbot, input, model, key, history, max_output_token, system_prompt, temperature):
+def generate_message(
+    chatbot, input, model, key, history, max_output_token, system_prompt, temperature
+):
     """
     整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
     """
@@ -76,6 +78,11 @@ def generate_message(chatbot, input, model, key, history, max_output_token, syst
 
     conversation_cnt = len(history) // 2
 
+    system_prompt = (
+        "Serve me as a writing and programming assistant."
+        if system_prompt == ""
+        else system_prompt
+    )
     messages = [{"role": "system", "content": system_prompt}]
     if conversation_cnt:
         for index in range(0, 2 * conversation_cnt, 2):
@@ -113,12 +120,12 @@ def generate_message(chatbot, input, model, key, history, max_output_token, syst
 
 
 def get_predict_function(
-        api_key_conf_name,
-        max_output_token,
-        disable_proxy = False,
-        encode_call = generate_message,
-        decode_call = decode_chunk
-    ):
+    api_key_conf_name,
+    max_output_token,
+    disable_proxy=False,
+    encode_call=generate_message,
+    decode_call=decode_chunk,
+):
     """
     为openai格式的API生成响应函数，其中传入参数：
     api_key_conf_name：

From e2efe864b011107e6ab0b39f6bd33cf9119d3b10 Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Thu, 27 Jun 2024 21:12:19 +0800
Subject: [PATCH 12/12] Fit some models

---
 request_llms/oai_std_model_template.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/request_llms/oai_std_model_template.py b/request_llms/oai_std_model_template.py
index 647ce3c72..9fa4362b4 100644
--- a/request_llms/oai_std_model_template.py
+++ b/request_llms/oai_std_model_template.py
@@ -235,9 +235,6 @@ def predict_no_ui_long_connection(
                 )
             if chunk:
                 try:
-                    if finish_reason == "stop":
-                        logging.info(f"[response] {result}")
-                        break
                     result += response_text
                     if not console_slience:
                         print(response_text, end="")
@@ -249,7 +246,10 @@ def predict_no_ui_long_connection(
                         if len(observe_window) >= 2:
                             if (time.time() - observe_window[1]) > watch_dog_patience:
                                 raise RuntimeError("用户取消了程序。")
-                except Exception as e:
+                    if finish_reason == "stop":
+                        logging.info(f"[response] {result}")
+                        break
+                except Exception:
                     chunk = get_full_error(chunk, stream_response)
                     chunk_decoded = chunk.decode()
                     error_msg = chunk_decoded
@@ -296,7 +296,7 @@ def predict(
         if is_the_upload_folder(inputs):
             chatbot[-1] = (
                 inputs,
-                f"[Local Message] 检测到操作错误！当您上传文档之后，需点击“**函数插件区**”按钮进行处理，请勿点击“提交”按钮或者“基础功能区”按钮。",
+                "[Local Message] 检测到操作错误！当您上传文档之后，需点击“**函数插件区**”按钮进行处理，请勿点击“提交”按钮或者“基础功能区”按钮。",
             )
             yield from update_ui(
                 chatbot=chatbot, history=history, msg="正常"
@@ -386,9 +386,6 @@ def predict(
                         print(chunk_decoded)
                         return
 
-                    if finish_reason == "stop":
-                        logging.info(f"[response] {gpt_replying_buffer}")
-                        break
                     status_text = f"finish_reason: {finish_reason}"
                     gpt_replying_buffer += response_text
                     # 如果这里抛出异常，一般是文本过长，详情见get_full_error的输出
@@ -397,7 +394,11 @@ def predict(
                     yield from update_ui(
                         chatbot=chatbot, history=history, msg=status_text
                     )  # 刷新界面
-                except Exception as e:
+                    if finish_reason == "stop":
+                        logging.info(f"[response] {gpt_replying_buffer}")
+                        break
+
+                except Exception:
                     yield from update_ui(
                         chatbot=chatbot, history=history, msg="Json解析不合常规"
                     )  # 刷新界面