From 824ef42c6ceb8724c3cbb2cec913387b3c4a18f7 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Tue, 21 May 2024 12:41:07 +0800 Subject: [PATCH 01/12] Add function call for oai_std, prepare to access the graphical dialogue --- request_llms/oai_std_model_template.py | 20 ++++++++++++++------ shared_utils/cookie_manager.py | 2 +- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/request_llms/oai_std_model_template.py b/request_llms/oai_std_model_template.py index 648dbe41c..f8a89a805 100644 --- a/request_llms/oai_std_model_template.py +++ b/request_llms/oai_std_model_template.py @@ -66,7 +66,7 @@ def decode_chunk(chunk): return respose, finish_reason -def generate_message(input, model, key, history, max_output_token, system_prompt, temperature): +def generate_message(chatbot, input, model, key, history, max_output_token, system_prompt, temperature): """ 整合所有信息,选择LLM模型,生成http请求,为发送请求做准备 """ @@ -115,7 +115,9 @@ def generate_message(input, model, key, history, max_output_token, system_prompt def get_predict_function( api_key_conf_name, max_output_token, - disable_proxy = False + disable_proxy = False, + encode_call = generate_message, + decode_call = decode_chunk ): """ 为openai格式的API生成响应函数,其中传入参数: @@ -126,6 +128,10 @@ def get_predict_function( ⚠️请不要与模型的最大token数量相混淆。 disable_proxy: 是否使用代理,True为不使用,False为使用。 + encode_call: + 是否使用自定义的encode函数,如果不是,则使用默认的generate_message + decode_call: + 是否使用自定义的decode函数,如果不是,则使用默认的decode_chunk """ APIKEY = get_conf(api_key_conf_name) @@ -156,7 +162,8 @@ def predict_no_ui_long_connection( raise RuntimeError(f"APIKEY为空,请检查配置文件的{APIKEY}") if inputs == "": inputs = "你好👋" - headers, playload = generate_message( + headers, playload = encode_call( + chatbot=None, input=inputs, model=llm_kwargs["llm_model"], key=APIKEY, @@ -206,7 +213,7 @@ def predict_no_ui_long_connection( break except requests.exceptions.ConnectionError: chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。 - response_text, finish_reason = decode_chunk(chunk) + response_text, finish_reason = decode_call(chunk) # 返回的数据流第一次为空,继续等待 if response_text == "" and finish_reason != "False": continue @@ -289,7 +296,8 @@ def predict( ) # 刷新界面 time.sleep(2) - headers, playload = generate_message( + headers, playload = encode_call( + chatbot=chatbot, input=inputs, model=llm_kwargs["llm_model"], key=APIKEY, @@ -347,7 +355,7 @@ def predict( break except requests.exceptions.ConnectionError: chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。 - response_text, finish_reason = decode_chunk(chunk) + response_text, finish_reason = decode_call(chunk) # 返回的数据流第一次为空,继续等待 if response_text == "" and finish_reason != "False": continue diff --git a/shared_utils/cookie_manager.py b/shared_utils/cookie_manager.py index c0994324c..b5df1e241 100644 --- a/shared_utils/cookie_manager.py +++ b/shared_utils/cookie_manager.py @@ -102,7 +102,7 @@ def process_history_cache(history_cache): output_list = [txt, txtx] input_name_list = ["txt(input)", "txtx(input)"] output_name_list = ["txt", "txtx"] - js_callback = """(txt, txtx)=>{console.log(txt); console.log(txtx);}""" + js_callback = "(txt, txtx)=>{console.log(txt); console.log(txtx);}" def function(txt, txtx): return "booo", "goooo" create_button_with_javascript_callback(btn_value, elem_id, variant, js_callback, input_list, output_list, function, input_name_list, output_name_list) From 524775502986a1322d7e9de73197dcfcade8fe76 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Tue, 21 May 2024 13:27:20 +0800 Subject: [PATCH 02/12] Update qwenapi --- request_llms/bridge_all.py | 51 ++++++++++---- request_llms/bridge_qwen.py | 131 +++++++++++++++++++++--------------- 2 files changed, 114 insertions(+), 68 deletions(-) diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index 06e695835..6e1527c37 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -72,6 +72,7 @@ def decode(self, *args, **kwargs): ollama_endpoint = "http://localhost:11434/api/chat" yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions" deepseekapi_endpoint = "https://api.deepseek.com/v1/chat/completions" +qwenapi_endpoint = "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions" if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/' azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15' @@ -93,6 +94,7 @@ def decode(self, *args, **kwargs): if ollama_endpoint in API_URL_REDIRECT: ollama_endpoint = API_URL_REDIRECT[ollama_endpoint] if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint] if deepseekapi_endpoint in API_URL_REDIRECT: deepseekapi_endpoint = API_URL_REDIRECT[deepseekapi_endpoint] +if qwenapi_endpoint in API_URL_REDIRECT: qwenapi_endpoint = API_URL_REDIRECT[qwenapi_endpoint] # 获取tokenizer tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo") @@ -640,38 +642,61 @@ def decode(self, *args, **kwargs): except: print(trimmed_format_exc()) # -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=- -if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS: # zhipuai +qwen_models = ["qwen-turbo","qwen-plus","qwen-max","qwen-max-longcontext","qwen-long"] +if any(item in qwen_models for item in AVAIL_LLM_MODELS): try: - from .bridge_qwen import predict_no_ui_long_connection as qwen_noui - from .bridge_qwen import predict as qwen_ui + qwen_1500_noui, qwen_1500_ui = get_predict_function( + api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=1500, disable_proxy=False + ) + qwen_2000_noui, qwen_2000_ui = get_predict_function( + api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=2000, disable_proxy=False + ) model_info.update({ "qwen-turbo": { - "fn_with_ui": qwen_ui, - "fn_without_ui": qwen_noui, + "fn_with_ui": qwen_1500_ui, + "fn_without_ui": qwen_1500_noui, "can_multi_thread": True, - "endpoint": None, + "endpoint": qwenapi_endpoint, "max_token": 6144, "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, "qwen-plus": { - "fn_with_ui": qwen_ui, - "fn_without_ui": qwen_noui, + "fn_with_ui": qwen_2000_ui, + "fn_without_ui": qwen_2000_noui, "can_multi_thread": True, - "endpoint": None, + "endpoint": qwenapi_endpoint, "max_token": 30720, "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, "qwen-max": { - "fn_with_ui": qwen_ui, - "fn_without_ui": qwen_noui, + "fn_with_ui": qwen_2000_ui, + "fn_without_ui": qwen_2000_noui, "can_multi_thread": True, - "endpoint": None, + "endpoint": qwenapi_endpoint, + "max_token": 6144, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "qwen-max-longcontext": { + "fn_with_ui": qwen_2000_ui, + "fn_without_ui": qwen_2000_noui, + "can_multi_thread": True, + "endpoint": qwenapi_endpoint, "max_token": 28672, "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, - } + }, + "qwen-long": { + "fn_with_ui": qwen_2000_ui, + "fn_without_ui": qwen_2000_noui, + "can_multi_thread": True, + "endpoint": qwenapi_endpoint, + "max_token": 1000000, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, }) except: print(trimmed_format_exc()) diff --git a/request_llms/bridge_qwen.py b/request_llms/bridge_qwen.py index 2b1eeed27..7ee4524bd 100644 --- a/request_llms/bridge_qwen.py +++ b/request_llms/bridge_qwen.py @@ -1,66 +1,87 @@ -import time -import os -from toolbox import update_ui, get_conf, update_ui_lastest_msg -from toolbox import check_packages, report_exception +import json -model_name = 'Qwen' +timeout_bot_msg = ( + "[Local Message] Request timeout. Network error. Please check proxy settings in config.py." + + "网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。" +) -def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[], sys_prompt:str="", - observe_window:list=[], console_slience:bool=False): +def decode_chunk(chunk): """ - ⭐多线程方法 - 函数的说明请见 request_llms/bridge_all.py + 用于解读"content"和"finish_reason"的内容 """ - watch_dog_patience = 5 - response = "" - - from .com_qwenapi import QwenRequestInstance - sri = QwenRequestInstance() - for response in sri.generate(inputs, llm_kwargs, history, sys_prompt): - if len(observe_window) >= 1: - observe_window[0] = response - if len(observe_window) >= 2: - if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。") - return response - -def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): - """ - ⭐单线程方法 - 函数的说明请见 request_llms/bridge_all.py - """ - chatbot.append((inputs, "")) - yield from update_ui(chatbot=chatbot, history=history) + chunk = chunk.decode() + respose = "" + finish_reason = "False" + try: + chunk = json.loads(chunk[6:]) + except: + finish_reason = "JSON_ERROR" + # 错误处理部分 + if "error" in chunk: + respose = "API_ERROR" + try: + chunk = json.loads(chunk) + finish_reason = chunk["error"]["code"] + except: + finish_reason = "API_ERROR" + return respose, finish_reason - # 尝试导入依赖,如果缺少依赖,则给出安装建议 try: - check_packages(["dashscope"]) + respose = chunk["choices"][0]["delta"]["content"] except: - yield from update_ui_lastest_msg(f"导入软件依赖失败。使用该模型需要额外依赖,安装方法```pip install --upgrade dashscope```。", - chatbot=chatbot, history=history, delay=0) - return + pass + try: + finish_reason = chunk["choices"][0]["finish_reason"] + except: + pass + return respose, finish_reason + - # 检查DASHSCOPE_API_KEY - if get_conf("DASHSCOPE_API_KEY") == "": - yield from update_ui_lastest_msg(f"请配置 DASHSCOPE_API_KEY。", - chatbot=chatbot, history=history, delay=0) - return +def generate_message(chatbot, input, model, key, history, max_output_token, system_prompt, temperature): + """ + 整合所有信息,选择LLM模型,生成http请求,为发送请求做准备 + """ + api_key = f"Bearer {key}" - if additional_fn is not None: - from core_functional import handle_core_functionality - inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot) - chatbot[-1] = (inputs, "") - yield from update_ui(chatbot=chatbot, history=history) + headers = {"Content-Type": "application/json", "Authorization": api_key} - # 开始接收回复 - from .com_qwenapi import QwenRequestInstance - sri = QwenRequestInstance() - response = f"[Local Message] 等待{model_name}响应中 ..." - for response in sri.generate(inputs, llm_kwargs, history, system_prompt): - chatbot[-1] = (inputs, response) - yield from update_ui(chatbot=chatbot, history=history) + conversation_cnt = len(history) // 2 - # 总结输出 - if response == f"[Local Message] 等待{model_name}响应中 ...": - response = f"[Local Message] {model_name}响应异常 ..." - history.extend([inputs, response]) - yield from update_ui(chatbot=chatbot, history=history) \ No newline at end of file + messages = [{"role": "system", "content": system_prompt}] + if conversation_cnt: + for index in range(0, 2 * conversation_cnt, 2): + what_i_have_asked = {} + what_i_have_asked["role"] = "user" + what_i_have_asked["content"] = history[index] + what_gpt_answer = {} + what_gpt_answer["role"] = "assistant" + what_gpt_answer["content"] = history[index + 1] + if what_i_have_asked["content"] != "": + if what_gpt_answer["content"] == "": + continue + if what_gpt_answer["content"] == timeout_bot_msg: + continue + messages.append(what_i_have_asked) + messages.append(what_gpt_answer) + else: + messages[-1]["content"] = what_gpt_answer["content"] + what_i_ask_now = {} + what_i_ask_now["role"] = "user" + what_i_ask_now["content"] = input + messages.append(what_i_ask_now) + if temperature == 2: temperature -= 1e-5 + playload = { + "model": model, + "input": messages, + "parameters":{ + "result_format": "message", + "temperature": temperature, + "incremental_output": True, + "max_tokens": max_output_token, + } + } + try: + print(f" {model} : {conversation_cnt} : {input[:100]} ..........") + except: + print("输入中可能存在乱码。") + return headers, playload \ No newline at end of file From 3b438df55f55756c3a0c0a3c29053a135bf7fd52 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Tue, 21 May 2024 13:27:40 +0800 Subject: [PATCH 03/12] No need now --- request_llms/bridge_qwen.py | 87 -------------------------------- request_llms/com_qwenapi.py | 98 ------------------------------------- 2 files changed, 185 deletions(-) delete mode 100644 request_llms/bridge_qwen.py delete mode 100644 request_llms/com_qwenapi.py diff --git a/request_llms/bridge_qwen.py b/request_llms/bridge_qwen.py deleted file mode 100644 index 7ee4524bd..000000000 --- a/request_llms/bridge_qwen.py +++ /dev/null @@ -1,87 +0,0 @@ -import json - -timeout_bot_msg = ( - "[Local Message] Request timeout. Network error. Please check proxy settings in config.py." - + "网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。" -) - -def decode_chunk(chunk): - """ - 用于解读"content"和"finish_reason"的内容 - """ - chunk = chunk.decode() - respose = "" - finish_reason = "False" - try: - chunk = json.loads(chunk[6:]) - except: - finish_reason = "JSON_ERROR" - # 错误处理部分 - if "error" in chunk: - respose = "API_ERROR" - try: - chunk = json.loads(chunk) - finish_reason = chunk["error"]["code"] - except: - finish_reason = "API_ERROR" - return respose, finish_reason - - try: - respose = chunk["choices"][0]["delta"]["content"] - except: - pass - try: - finish_reason = chunk["choices"][0]["finish_reason"] - except: - pass - return respose, finish_reason - - -def generate_message(chatbot, input, model, key, history, max_output_token, system_prompt, temperature): - """ - 整合所有信息,选择LLM模型,生成http请求,为发送请求做准备 - """ - api_key = f"Bearer {key}" - - headers = {"Content-Type": "application/json", "Authorization": api_key} - - conversation_cnt = len(history) // 2 - - messages = [{"role": "system", "content": system_prompt}] - if conversation_cnt: - for index in range(0, 2 * conversation_cnt, 2): - what_i_have_asked = {} - what_i_have_asked["role"] = "user" - what_i_have_asked["content"] = history[index] - what_gpt_answer = {} - what_gpt_answer["role"] = "assistant" - what_gpt_answer["content"] = history[index + 1] - if what_i_have_asked["content"] != "": - if what_gpt_answer["content"] == "": - continue - if what_gpt_answer["content"] == timeout_bot_msg: - continue - messages.append(what_i_have_asked) - messages.append(what_gpt_answer) - else: - messages[-1]["content"] = what_gpt_answer["content"] - what_i_ask_now = {} - what_i_ask_now["role"] = "user" - what_i_ask_now["content"] = input - messages.append(what_i_ask_now) - if temperature == 2: temperature -= 1e-5 - playload = { - "model": model, - "input": messages, - "parameters":{ - "result_format": "message", - "temperature": temperature, - "incremental_output": True, - "max_tokens": max_output_token, - } - } - try: - print(f" {model} : {conversation_cnt} : {input[:100]} ..........") - except: - print("输入中可能存在乱码。") - return headers, playload \ No newline at end of file diff --git a/request_llms/com_qwenapi.py b/request_llms/com_qwenapi.py deleted file mode 100644 index 2cde52c1b..000000000 --- a/request_llms/com_qwenapi.py +++ /dev/null @@ -1,98 +0,0 @@ -from http import HTTPStatus -from toolbox import get_conf -import threading -import logging - -timeout_bot_msg = '[Local Message] Request timeout. Network error.' - -class QwenRequestInstance(): - def __init__(self): - import dashscope - self.time_to_yield_event = threading.Event() - self.time_to_exit_event = threading.Event() - self.result_buf = "" - - def validate_key(): - DASHSCOPE_API_KEY = get_conf("DASHSCOPE_API_KEY") - if DASHSCOPE_API_KEY == '': return False - return True - - if not validate_key(): - raise RuntimeError('请配置 DASHSCOPE_API_KEY') - dashscope.api_key = get_conf("DASHSCOPE_API_KEY") - - - def generate(self, inputs, llm_kwargs, history, system_prompt): - # import _thread as thread - from dashscope import Generation - QWEN_MODEL = { - 'qwen-turbo': Generation.Models.qwen_turbo, - 'qwen-plus': Generation.Models.qwen_plus, - 'qwen-max': Generation.Models.qwen_max, - }[llm_kwargs['llm_model']] - top_p = llm_kwargs.get('top_p', 0.8) - if top_p == 0: top_p += 1e-5 - if top_p == 1: top_p -= 1e-5 - - self.result_buf = "" - responses = Generation.call( - model=QWEN_MODEL, - messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt), - top_p=top_p, - temperature=llm_kwargs.get('temperature', 1.0), - result_format='message', - stream=True, - incremental_output=True - ) - - for response in responses: - if response.status_code == HTTPStatus.OK: - if response.output.choices[0].finish_reason == 'stop': - try: - self.result_buf += response.output.choices[0].message.content - except: - pass - yield self.result_buf - break - elif response.output.choices[0].finish_reason == 'length': - self.result_buf += "[Local Message] 生成长度过长,后续输出被截断" - yield self.result_buf - break - else: - self.result_buf += response.output.choices[0].message.content - yield self.result_buf - else: - self.result_buf += f"[Local Message] 请求错误:状态码:{response.status_code},错误码:{response.code},消息:{response.message}" - yield self.result_buf - break - logging.info(f'[raw_input] {inputs}') - logging.info(f'[response] {self.result_buf}') - return self.result_buf - - -def generate_message_payload(inputs, llm_kwargs, history, system_prompt): - conversation_cnt = len(history) // 2 - if system_prompt == '': system_prompt = 'Hello!' - messages = [{"role": "user", "content": system_prompt}, {"role": "assistant", "content": "Certainly!"}] - if conversation_cnt: - for index in range(0, 2*conversation_cnt, 2): - what_i_have_asked = {} - what_i_have_asked["role"] = "user" - what_i_have_asked["content"] = history[index] - what_gpt_answer = {} - what_gpt_answer["role"] = "assistant" - what_gpt_answer["content"] = history[index+1] - if what_i_have_asked["content"] != "": - if what_gpt_answer["content"] == "": - continue - if what_gpt_answer["content"] == timeout_bot_msg: - continue - messages.append(what_i_have_asked) - messages.append(what_gpt_answer) - else: - messages[-1]['content'] = what_gpt_answer['content'] - what_i_ask_now = {} - what_i_ask_now["role"] = "user" - what_i_ask_now["content"] = inputs - messages.append(what_i_ask_now) - return messages From 0d775f2fcafa6c4c4ebfd280afb6bd4168ed3f48 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Tue, 21 May 2024 13:31:45 +0800 Subject: [PATCH 04/12] Update config --- config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.py b/config.py index ad206d022..1866b2a36 100644 --- a/config.py +++ b/config.py @@ -43,7 +43,7 @@ # AVAIL_LLM_MODELS = [ # "qianfan", "deepseekcoder", # "spark", "sparkv2", "sparkv3", "sparkv3.5", -# "qwen-turbo", "qwen-plus", "qwen-max", "qwen-local", +# "qwen-turbo", "qwen-plus", "qwen-max", "qwen-max-longcontext", "qwen-long", "qwen-local", # "moonshot-v1-128k", "moonshot-v1-32k", "moonshot-v1-8k", # "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0125", "gpt-4o-2024-05-13" # "claude-3-haiku-20240307","claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2", @@ -127,7 +127,7 @@ QWEN_LOCAL_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8" -# 接入通义千问在线大模型 https://dashscope.console.aliyun.com/ +# 接入通义千问在线大模型 https://bailian.console.aliyun.com/ DASHSCOPE_API_KEY = "" # 阿里灵积云API_KEY From 661da50c168cdb35898367d19d070e737f026f42 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Thu, 30 May 2024 15:24:57 +0800 Subject: [PATCH 05/12] Add support for one-api version model --- request_llms/bridge_all.py | 30 +++++++++ request_llms/oai_version_std.py | 105 ++++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 request_llms/oai_version_std.py diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index 6e1527c37..3068a861a 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -940,6 +940,36 @@ def decode(self, *args, **kwargs): "token_cnt": get_token_num_gpt35, }, }) +# -=-=-=-=-=-=- one-api-version 对齐支持 -=-=-=-=-=-=- +for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-version-")]: + # 为了更灵活地接入one-api多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-version-gpt-4o(max_token=32000)"] + # 其中 + # "one-api-version-" 是前缀(必要) + # "gpt-4o" 是模型名(必要) + # "(max_token=32000)" 是配置(非必要) + try: + _, max_token_tmp = read_one_api_model_name(model) + except: + print(f"one-api-version模型 {model} 的 max_token 配置不是整数,请检查配置文件。") + continue + try: + from .oai_version_std import generate_message_version + one_api_version_noui, one_api_version_ui = get_predict_function( + api_key_conf_name="API_KEY", max_output_token=4000, disable_proxy=False, encode_call=generate_message_version + ) + model_info.update({ + model: { + "fn_with_ui": one_api_version_ui, + "fn_without_ui": one_api_version_noui, + "can_multi_thread": True, + "endpoint": openai_endpoint, + "max_token": max_token_tmp, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + }) + except: + print(trimmed_format_exc()) # -=-=-=-=-=-=- vllm 对齐支持 -=-=-=-=-=-=- for model in [m for m in AVAIL_LLM_MODELS if m.startswith("vllm-")]: # 为了更灵活地接入vllm多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["vllm-/home/hmp/llm/cache/Qwen1___5-32B-Chat(max_token=6666)"] diff --git a/request_llms/oai_version_std.py b/request_llms/oai_version_std.py new file mode 100644 index 000000000..7bbae0c29 --- /dev/null +++ b/request_llms/oai_version_std.py @@ -0,0 +1,105 @@ +from toolbox import update_ui, encode_image, every_image_file_in_path ,read_one_api_model_name +import os + +timeout_bot_msg = ( + "[Local Message] Request timeout. Network error. Please check proxy settings in config.py." + + "网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。" +) + + +def multiple_picture_types(image_paths): + """ + 根据图片类型返回image/jpeg, image/png, image/gif, image/webp,无法判断则返回image/jpeg + """ + for image_path in image_paths: + if image_path.endswith(".jpeg") or image_path.endswith(".jpg"): + return "image/jpeg" + elif image_path.endswith(".png"): + return "image/png" + elif image_path.endswith(".gif"): + return "image/gif" + elif image_path.endswith(".webp"): + return "image/webp" + return "image/jpeg" + + +def generate_message_version( + chatbot, input, model, key, history, max_output_token, system_prompt, temperature +): + """ + 整合所有信息,选择LLM模型,生成http请求,为发送请求做准备 + """ + if chatbot != None: + have_recent_file, image_paths = every_image_file_in_path(chatbot) + else: + have_recent_file = False + image_paths = [] + conversation_cnt = len(history) // 2 + messages = [ + {"role": "system", "content": [{"type": "text", "text": system_prompt}]} + ] + + def make_media_input(inputs, image_paths): + for image_path in image_paths: + inputs = ( + inputs + + f'

' + ) + return inputs + + if have_recent_file and chatbot != None: + chatbot.append((make_media_input(input, image_paths), "")) + if conversation_cnt: + for index in range(0, 2 * conversation_cnt, 2): + what_i_have_asked = {} + what_i_have_asked["role"] = "user" + what_i_have_asked["content"] = [{"type": "text", "text": history[index]}] + what_gpt_answer = {} + what_gpt_answer["role"] = "assistant" + what_gpt_answer["content"] = [{"type": "text", "text": history[index + 1]}] + if what_i_have_asked["content"][0]["text"] != "": + if what_i_have_asked["content"][0]["text"] == "": + continue + if what_i_have_asked["content"][0]["text"] == timeout_bot_msg: + continue + messages.append(what_i_have_asked) + messages.append(what_gpt_answer) + else: + messages[-1]["content"][0]["text"] = what_gpt_answer["content"][0][ + "text" + ] + + what_i_ask_now = {} + what_i_ask_now["role"] = "user" + what_i_ask_now["content"] = [] + if have_recent_file: + for image_path in image_paths: + what_i_ask_now["content"].append( + { + "type": "image_url", + "image_url": { + "url": f"data:{multiple_picture_types(image_path)};base64,{encode_image(image_path)}" + }, + } + ) + what_i_ask_now["content"].append({"type": "text", "text": input}) + + messages.append(what_i_ask_now) + # 开始整理headers与message + api_key = f"Bearer {key}" + headers = {"Content-Type": "application/json", "Authorization": api_key} + if model.startswith("one-api-version-"): + model,_ = read_one_api_model_name(model) + model = model.replace("one-api-version-", "") + playload = { + "model": model, + "messages": messages, + "temperature": temperature, + "stream": True, + "max_tokens": max_output_token, + } + try: + print(f" {model} : {conversation_cnt} : {input[:100]} ..........") + except: + print("输入中可能存在乱码。") + return headers, playload From 69b64948caf2176aa6e7effae83c562908da8fd8 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Thu, 30 May 2024 16:37:02 +0800 Subject: [PATCH 06/12] Solve the problem input will show twice... --- request_llms/oai_version_std.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/request_llms/oai_version_std.py b/request_llms/oai_version_std.py index 7bbae0c29..9d53149d2 100644 --- a/request_llms/oai_version_std.py +++ b/request_llms/oai_version_std.py @@ -39,16 +39,16 @@ def generate_message_version( {"role": "system", "content": [{"type": "text", "text": system_prompt}]} ] - def make_media_input(inputs, image_paths): - for image_path in image_paths: - inputs = ( - inputs - + f'

' - ) - return inputs + # def make_media_input(inputs, image_paths): + # for image_path in image_paths: + # inputs = ( + # inputs + # + f'

' + # ) + # return inputs - if have_recent_file and chatbot != None: - chatbot.append((make_media_input(input, image_paths), "")) + # if have_recent_file and chatbot != None: + # chatbot.append((make_media_input(input, image_paths), "")) if conversation_cnt: for index in range(0, 2 * conversation_cnt, 2): what_i_have_asked = {} From 829f07f0a2ed6d2c738a29d74b3e99c94dc8d244 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Thu, 30 May 2024 17:15:02 +0800 Subject: [PATCH 07/12] Add support for groq --- config.py | 9 +++++++ request_llms/bridge_all.py | 50 +++++++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/config.py b/config.py index 1866b2a36..d278b0f9e 100644 --- a/config.py +++ b/config.py @@ -49,12 +49,15 @@ # "claude-3-haiku-20240307","claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2", # "moss", "llama2", "chatglm_onnx", "internlm", "jittorllms_pangualpha", "jittorllms_llama", # "deepseek-chat" ,"deepseek-coder", +# "llama3-8b-8192", "gemma-7b-it", "mixtral-8x7b-32768", "llama3-70b-8192", # "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview", # ] # --- --- --- --- # 此外,您还可以在接入one-api/vllm/ollama时, # 使用"one-api-*","vllm-*","ollama-*"前缀直接使用非标准方式接入的模型,例如 # AVAIL_LLM_MODELS = ["one-api-claude-3-sonnet-20240229(max_token=100000)", "ollama-phi3(max_token=4096)"] +# 在接入多模态模型时,可以使用"one-api-version-*"前缀接入,例如 +# AVAIL_LLM_MODELS = ["one-api-version-gpt-4o(max_token=32000)"] # --- --- --- --- @@ -229,14 +232,20 @@ # 零一万物(Yi Model) API KEY YIMODEL_API_KEY = "" + # 深度求索(DeepSeek) API KEY,默认请求地址为"https://api.deepseek.com/v1/chat/completions" DEEPSEEK_API_KEY = "" + # Mathpix 拥有执行PDF的OCR功能,但是需要注册账号 MATHPIX_APPID = "" MATHPIX_APPKEY = "" +# Groq API KEY,默认请求地址为"https://api.groq.com/openai/v1/chat/completions" +GROQ_API_KEY = "" + + # DOC2X的PDF解析服务,注册账号并获取API KEY: https://doc2x.noedgeai.com/login DOC2X_API_KEY = "" diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index 3068a861a..21b4a526e 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -73,6 +73,7 @@ def decode(self, *args, **kwargs): yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions" deepseekapi_endpoint = "https://api.deepseek.com/v1/chat/completions" qwenapi_endpoint = "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions" +groq_endpoint = "https://api.groq.com/openai/v1/chat/completions" if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/' azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15' @@ -95,6 +96,7 @@ def decode(self, *args, **kwargs): if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint] if deepseekapi_endpoint in API_URL_REDIRECT: deepseekapi_endpoint = API_URL_REDIRECT[deepseekapi_endpoint] if qwenapi_endpoint in API_URL_REDIRECT: qwenapi_endpoint = API_URL_REDIRECT[qwenapi_endpoint] +if groq_endpoint in API_URL_REDIRECT: groq_endpoint = API_URL_REDIRECT[groq_endpoint] # 获取tokenizer tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo") @@ -917,6 +919,52 @@ def decode(self, *args, **kwargs): }) except: print(trimmed_format_exc()) +# -=-=-=-=-=-=- groq -=-=-=-=-=-=- +groq_models = ["llama3-8b-8192", "gemma-7b-it", "mixtral-8x7b-32768", "llama3-70b-8192"] +if any(item in groq_models for item in AVAIL_LLM_MODELS): + try: + groq_8k_noui, groq_8k_ui = get_predict_function( + api_key_conf_name="GROQ_API_KEY", max_output_token=8192, disable_proxy=False + ) + groq_32k_noui, groq_32k_ui = get_predict_function( + api_key_conf_name="GROQ_API_KEY", max_output_token=32768, disable_proxy=False + ) + model_info.update({ + "llama3-8b-8192": { + "fn_with_ui": groq_8k_ui, + "fn_without_ui": groq_8k_noui, + "endpoint": groq_endpoint, + "max_token": 8192, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "gemma-7b-it": { + "fn_with_ui": groq_8k_ui, + "fn_without_ui": groq_8k_noui, + "endpoint": groq_endpoint, + "max_token": 8192, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "mixtral-8x7b-32768": { + "fn_with_ui": groq_32k_ui, + "fn_without_ui": groq_32k_noui, + "endpoint": groq_endpoint, + "max_token": 32768, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "llama3-70b-8192": { + "fn_with_ui": groq_8k_ui, + "fn_without_ui": groq_8k_noui, + "endpoint": groq_endpoint, + "max_token": 8192, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + }) + except: + print(trimmed_format_exc()) # -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=- for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]: # 为了更灵活地接入one-api多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"] @@ -942,7 +990,7 @@ def decode(self, *args, **kwargs): }) # -=-=-=-=-=-=- one-api-version 对齐支持 -=-=-=-=-=-=- for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-version-")]: - # 为了更灵活地接入one-api多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-version-gpt-4o(max_token=32000)"] + # 为了更灵活地接入one-api多模型管理界面中的多模态模型,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-version-gpt-4o(max_token=32000)"] # 其中 # "one-api-version-" 是前缀(必要) # "gpt-4o" 是模型名(必要) From 0dd8647466557dac862da8ad2d6ad31f0705087a Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Thu, 30 May 2024 17:26:23 +0800 Subject: [PATCH 08/12] Add support for yi-vision --- config.py | 2 +- request_llms/bridge_all.py | 19 ++++++- request_llms/bridge_yi_version.py | 88 +++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 request_llms/bridge_yi_version.py diff --git a/config.py b/config.py index d278b0f9e..14bafb494 100644 --- a/config.py +++ b/config.py @@ -50,7 +50,7 @@ # "moss", "llama2", "chatglm_onnx", "internlm", "jittorllms_pangualpha", "jittorllms_llama", # "deepseek-chat" ,"deepseek-coder", # "llama3-8b-8192", "gemma-7b-it", "mixtral-8x7b-32768", "llama3-70b-8192", -# "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview", +# "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview","yi-vision" # ] # --- --- --- --- # 此外,您还可以在接入one-api/vllm/ollama时, diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index 21b4a526e..30c25940d 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -703,7 +703,7 @@ def decode(self, *args, **kwargs): except: print(trimmed_format_exc()) # -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=- -yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview"] +yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview","yi-vision"] if any(item in yi_models for item in AVAIL_LLM_MODELS): try: yimodel_4k_noui, yimodel_4k_ui = get_predict_function( @@ -715,6 +715,23 @@ def decode(self, *args, **kwargs): yimodel_200k_noui, yimodel_200k_ui = get_predict_function( api_key_conf_name="YIMODEL_API_KEY", max_output_token=4096, disable_proxy=False ) + if "yi-vision" in AVAIL_LLM_MODELS: + from .bridge_yi_version import yi_version_generate_message_version + yimodel_version_noui, yimodel_version_ui = get_predict_function( + api_key_conf_name="YIMODEL_API_KEY", max_output_token=600, disable_proxy=False, encode_call=yi_version_generate_message_version + ) + model_info.update({ + "yi-vision": { + "fn_with_ui": yimodel_version_ui, + "fn_without_ui": yimodel_version_noui, + "can_multi_thread": True, + "endpoint": yimodel_endpoint, + "max_token": 4000, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + } + }) + model_info.update({ "yi-34b-chat-0205": { "fn_with_ui": yimodel_4k_ui, diff --git a/request_llms/bridge_yi_version.py b/request_llms/bridge_yi_version.py new file mode 100644 index 000000000..eca0f9150 --- /dev/null +++ b/request_llms/bridge_yi_version.py @@ -0,0 +1,88 @@ +from toolbox import encode_image, every_image_file_in_path ,read_one_api_model_name +from .oai_version_std import multiple_picture_types + +timeout_bot_msg = ( + "[Local Message] Request timeout. Network error. Please check proxy settings in config.py." + + "网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。" +) + +def yi_version_generate_message_version( + chatbot, input, model, key, history, max_output_token, system_prompt, temperature +): + """ + 整合所有信息,选择LLM模型,生成http请求,为发送请求做准备 + """ + if chatbot != None: + have_recent_file, image_paths = every_image_file_in_path(chatbot) + else: + have_recent_file = False + image_paths = [] + conversation_cnt = len(history) // 2 + messages = [] + input = system_prompt + "\n" + input + + # def make_media_input(inputs, image_paths): + # for image_path in image_paths: + # inputs = ( + # inputs + # + f'

' + # ) + # return inputs + + # if have_recent_file and chatbot != None: + # chatbot.append((make_media_input(input, image_paths), "")) + + if conversation_cnt: + for index in range(0, 2 * conversation_cnt, 2): + what_i_have_asked = {} + what_i_have_asked["role"] = "user" + what_i_have_asked["content"] = [{"type": "text", "text": history[index]}] + what_gpt_answer = {} + what_gpt_answer["role"] = "assistant" + what_gpt_answer["content"] = [{"type": "text", "text": history[index + 1]}] + if what_i_have_asked["content"][0]["text"] != "": + if what_i_have_asked["content"][0]["text"] == "": + continue + if what_i_have_asked["content"][0]["text"] == timeout_bot_msg: + continue + messages.append(what_i_have_asked) + messages.append(what_gpt_answer) + else: + messages[-1]["content"][0]["text"] = what_gpt_answer["content"][0][ + "text" + ] + + what_i_ask_now = {} + what_i_ask_now["role"] = "user" + what_i_ask_now["content"] = [] + if have_recent_file: + for image_path in image_paths: + what_i_ask_now["content"].append( + { + "type": "image_url", + "image_url": { + "url": f"data:{multiple_picture_types(image_path)};base64,{encode_image(image_path)}" + }, + } + ) + what_i_ask_now["content"].append({"type": "text", "text": input}) + + messages.append(what_i_ask_now) + # 开始整理headers与message + api_key = f"Bearer {key}" + headers = {"Content-Type": "application/json", "Authorization": api_key} + if model.startswith("one-api-version-"): + model,_ = read_one_api_model_name(model) + model = model.replace("one-api-version-", "") + playload = { + "model": model, + "messages": messages, + "temperature": temperature, + "stream": True, + "max_tokens": max_output_token, + } + try: + print(f" {model} : {conversation_cnt} : {input[:100]} ..........") + except: + print("输入中可能存在乱码。") + return headers, playload From 5d17f0cb71d66521b74a557acf7996b48e604697 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Thu, 30 May 2024 17:33:01 +0800 Subject: [PATCH 09/12] Fix spelling errors --- config.py | 4 ++-- request_llms/bridge_all.py | 14 +++++++------- .../{bridge_yi_version.py => bridge_yi_vision.py} | 6 +++--- .../{oai_version_std.py => oai_vision_std.py} | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) rename request_llms/{bridge_yi_version.py => bridge_yi_vision.py} (95%) rename request_llms/{oai_version_std.py => oai_vision_std.py} (97%) diff --git a/config.py b/config.py index 14bafb494..e44ab331f 100644 --- a/config.py +++ b/config.py @@ -56,8 +56,8 @@ # 此外,您还可以在接入one-api/vllm/ollama时, # 使用"one-api-*","vllm-*","ollama-*"前缀直接使用非标准方式接入的模型,例如 # AVAIL_LLM_MODELS = ["one-api-claude-3-sonnet-20240229(max_token=100000)", "ollama-phi3(max_token=4096)"] -# 在接入多模态模型时,可以使用"one-api-version-*"前缀接入,例如 -# AVAIL_LLM_MODELS = ["one-api-version-gpt-4o(max_token=32000)"] +# 在接入多模态模型时,可以使用"one-api-vision-*"前缀接入,例如 +# AVAIL_LLM_MODELS = ["one-api-vision-gpt-4o(max_token=32000)"] # --- --- --- --- diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index 30c25940d..7f7ecaebe 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -716,7 +716,7 @@ def decode(self, *args, **kwargs): api_key_conf_name="YIMODEL_API_KEY", max_output_token=4096, disable_proxy=False ) if "yi-vision" in AVAIL_LLM_MODELS: - from .bridge_yi_version import yi_version_generate_message_version + from .bridge_yi_vision import yi_version_generate_message_version yimodel_version_noui, yimodel_version_ui = get_predict_function( api_key_conf_name="YIMODEL_API_KEY", max_output_token=600, disable_proxy=False, encode_call=yi_version_generate_message_version ) @@ -1005,20 +1005,20 @@ def decode(self, *args, **kwargs): "token_cnt": get_token_num_gpt35, }, }) -# -=-=-=-=-=-=- one-api-version 对齐支持 -=-=-=-=-=-=- -for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-version-")]: - # 为了更灵活地接入one-api多模型管理界面中的多模态模型,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-version-gpt-4o(max_token=32000)"] +# -=-=-=-=-=-=- one-api-vision 对齐支持 -=-=-=-=-=-=- +for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-vision-")]: + # 为了更灵活地接入one-api多模型管理界面中的多模态模型,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-vision-gpt-4o(max_token=32000)"] # 其中 - # "one-api-version-" 是前缀(必要) + # "one-api-vision-" 是前缀(必要) # "gpt-4o" 是模型名(必要) # "(max_token=32000)" 是配置(非必要) try: _, max_token_tmp = read_one_api_model_name(model) except: - print(f"one-api-version模型 {model} 的 max_token 配置不是整数,请检查配置文件。") + print(f"one-api-vision模型 {model} 的 max_token 配置不是整数,请检查配置文件。") continue try: - from .oai_version_std import generate_message_version + from .oai_vision_std import generate_message_version one_api_version_noui, one_api_version_ui = get_predict_function( api_key_conf_name="API_KEY", max_output_token=4000, disable_proxy=False, encode_call=generate_message_version ) diff --git a/request_llms/bridge_yi_version.py b/request_llms/bridge_yi_vision.py similarity index 95% rename from request_llms/bridge_yi_version.py rename to request_llms/bridge_yi_vision.py index eca0f9150..d0cdef53f 100644 --- a/request_llms/bridge_yi_version.py +++ b/request_llms/bridge_yi_vision.py @@ -1,5 +1,5 @@ from toolbox import encode_image, every_image_file_in_path ,read_one_api_model_name -from .oai_version_std import multiple_picture_types +from .oai_vision_std import multiple_picture_types timeout_bot_msg = ( "[Local Message] Request timeout. Network error. Please check proxy settings in config.py." @@ -71,9 +71,9 @@ def yi_version_generate_message_version( # 开始整理headers与message api_key = f"Bearer {key}" headers = {"Content-Type": "application/json", "Authorization": api_key} - if model.startswith("one-api-version-"): + if model.startswith("one-api-vision-"): model,_ = read_one_api_model_name(model) - model = model.replace("one-api-version-", "") + model = model.replace("one-api-vision-", "") playload = { "model": model, "messages": messages, diff --git a/request_llms/oai_version_std.py b/request_llms/oai_vision_std.py similarity index 97% rename from request_llms/oai_version_std.py rename to request_llms/oai_vision_std.py index 9d53149d2..36db27c25 100644 --- a/request_llms/oai_version_std.py +++ b/request_llms/oai_vision_std.py @@ -88,9 +88,9 @@ def generate_message_version( # 开始整理headers与message api_key = f"Bearer {key}" headers = {"Content-Type": "application/json", "Authorization": api_key} - if model.startswith("one-api-version-"): + if model.startswith("one-api-vision-"): model,_ = read_one_api_model_name(model) - model = model.replace("one-api-version-", "") + model = model.replace("one-api-vision-", "") playload = { "model": model, "messages": messages, From a2ed536b818b195a5c1f420868fd91007fb183b0 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Thu, 27 Jun 2024 21:02:46 +0800 Subject: [PATCH 10/12] Add support for qwen-2 and some bug fix --- config.py | 1 + request_llms/bridge_all.py | 54 +++++++++++++++++++++++++- request_llms/oai_std_model_template.py | 4 +- 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/config.py b/config.py index e44ab331f..eedac2837 100644 --- a/config.py +++ b/config.py @@ -44,6 +44,7 @@ # "qianfan", "deepseekcoder", # "spark", "sparkv2", "sparkv3", "sparkv3.5", # "qwen-turbo", "qwen-plus", "qwen-max", "qwen-max-longcontext", "qwen-long", "qwen-local", +# "qwen2-72b-instruct","qwen2-57b-a14b-instruct","qwen2-7b-instruct","qwen1.5-110b-chat", # "moonshot-v1-128k", "moonshot-v1-32k", "moonshot-v1-8k", # "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0125", "gpt-4o-2024-05-13" # "claude-3-haiku-20240307","claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2", diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index 7f7ecaebe..ea7612889 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -644,7 +644,17 @@ def decode(self, *args, **kwargs): except: print(trimmed_format_exc()) # -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=- -qwen_models = ["qwen-turbo","qwen-plus","qwen-max","qwen-max-longcontext","qwen-long"] +qwen_models = [ + "qwen-turbo", + "qwen-plus", + "qwen-max", + "qwen-max-longcontext", + "qwen-long", + "qwen2-72b-instruct", + "qwen2-57b-a14b-instruct", + "qwen2-7b-instruct", + "qwen1.5-110b-chat", +] if any(item in qwen_models for item in AVAIL_LLM_MODELS): try: qwen_1500_noui, qwen_1500_ui = get_predict_function( @@ -653,6 +663,12 @@ def decode(self, *args, **kwargs): qwen_2000_noui, qwen_2000_ui = get_predict_function( api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=2000, disable_proxy=False ) + qwen_6144_noui, qwen_6144_ui = get_predict_function( + api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=6144, disable_proxy=False + ) + qwen_8000_noui, qwen_8000_ui = get_predict_function( + api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=8000, disable_proxy=False + ) model_info.update({ "qwen-turbo": { "fn_with_ui": qwen_1500_ui, @@ -699,6 +715,42 @@ def decode(self, *args, **kwargs): "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, + "qwen2-72b-instruct": { + "fn_with_ui": qwen_6144_ui, + "fn_without_ui": qwen_6144_noui, + "can_multi_thread": True, + "endpoint": qwenapi_endpoint, + "max_token": 128000, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "qwen2-57b-a14b-instruct": { + "fn_with_ui": qwen_6144_ui, + "fn_without_ui": qwen_6144_noui, + "can_multi_thread": True, + "endpoint": qwenapi_endpoint, + "max_token": 30720, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "qwen2-7b-instruct": { + "fn_with_ui": qwen_6144_ui, + "fn_without_ui": qwen_6144_noui, + "can_multi_thread": True, + "endpoint": qwenapi_endpoint, + "max_token": 128000, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "qwen1.5-110b-chat": { + "fn_with_ui": qwen_8000_ui, + "fn_without_ui": qwen_8000_noui, + "can_multi_thread": True, + "endpoint": qwenapi_endpoint, + "max_token": 32000, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, }) except: print(trimmed_format_exc()) diff --git a/request_llms/oai_std_model_template.py b/request_llms/oai_std_model_template.py index f8a89a805..e3bdbbb97 100644 --- a/request_llms/oai_std_model_template.py +++ b/request_llms/oai_std_model_template.py @@ -140,7 +140,7 @@ def predict_no_ui_long_connection( inputs, llm_kwargs, history=[], - sys_prompt="", + sys_prompt="Serve me as a writing and programming assistant.", observe_window=None, console_slience=False, ): @@ -256,7 +256,7 @@ def predict( plugin_kwargs, chatbot, history=[], - system_prompt="", + system_prompt="Serve me as a writing and programming assistant.", stream=True, additional_fn=None, ): From 4bf350cea7a50fb197acea66aab4d34f086443c3 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Thu, 27 Jun 2024 21:06:34 +0800 Subject: [PATCH 11/12] Some bug fix for some models --- request_llms/oai_std_model_template.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/request_llms/oai_std_model_template.py b/request_llms/oai_std_model_template.py index e3bdbbb97..647ce3c72 100644 --- a/request_llms/oai_std_model_template.py +++ b/request_llms/oai_std_model_template.py @@ -66,7 +66,9 @@ def decode_chunk(chunk): return respose, finish_reason -def generate_message(chatbot, input, model, key, history, max_output_token, system_prompt, temperature): +def generate_message( + chatbot, input, model, key, history, max_output_token, system_prompt, temperature +): """ 整合所有信息,选择LLM模型,生成http请求,为发送请求做准备 """ @@ -76,6 +78,11 @@ def generate_message(chatbot, input, model, key, history, max_output_token, syst conversation_cnt = len(history) // 2 + system_prompt = ( + "Serve me as a writing and programming assistant." + if system_prompt == "" + else system_prompt + ) messages = [{"role": "system", "content": system_prompt}] if conversation_cnt: for index in range(0, 2 * conversation_cnt, 2): @@ -113,12 +120,12 @@ def generate_message(chatbot, input, model, key, history, max_output_token, syst def get_predict_function( - api_key_conf_name, - max_output_token, - disable_proxy = False, - encode_call = generate_message, - decode_call = decode_chunk - ): + api_key_conf_name, + max_output_token, + disable_proxy=False, + encode_call=generate_message, + decode_call=decode_chunk, +): """ 为openai格式的API生成响应函数,其中传入参数: api_key_conf_name: From e2efe864b011107e6ab0b39f6bd33cf9119d3b10 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Thu, 27 Jun 2024 21:12:19 +0800 Subject: [PATCH 12/12] Fit some models --- request_llms/oai_std_model_template.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/request_llms/oai_std_model_template.py b/request_llms/oai_std_model_template.py index 647ce3c72..9fa4362b4 100644 --- a/request_llms/oai_std_model_template.py +++ b/request_llms/oai_std_model_template.py @@ -235,9 +235,6 @@ def predict_no_ui_long_connection( ) if chunk: try: - if finish_reason == "stop": - logging.info(f"[response] {result}") - break result += response_text if not console_slience: print(response_text, end="") @@ -249,7 +246,10 @@ def predict_no_ui_long_connection( if len(observe_window) >= 2: if (time.time() - observe_window[1]) > watch_dog_patience: raise RuntimeError("用户取消了程序。") - except Exception as e: + if finish_reason == "stop": + logging.info(f"[response] {result}") + break + except Exception: chunk = get_full_error(chunk, stream_response) chunk_decoded = chunk.decode() error_msg = chunk_decoded @@ -296,7 +296,7 @@ def predict( if is_the_upload_folder(inputs): chatbot[-1] = ( inputs, - f"[Local Message] 检测到操作错误!当您上传文档之后,需点击“**函数插件区**”按钮进行处理,请勿点击“提交”按钮或者“基础功能区”按钮。", + "[Local Message] 检测到操作错误!当您上传文档之后,需点击“**函数插件区**”按钮进行处理,请勿点击“提交”按钮或者“基础功能区”按钮。", ) yield from update_ui( chatbot=chatbot, history=history, msg="正常" @@ -386,9 +386,6 @@ def predict( print(chunk_decoded) return - if finish_reason == "stop": - logging.info(f"[response] {gpt_replying_buffer}") - break status_text = f"finish_reason: {finish_reason}" gpt_replying_buffer += response_text # 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出 @@ -397,7 +394,11 @@ def predict( yield from update_ui( chatbot=chatbot, history=history, msg=status_text ) # 刷新界面 - except Exception as e: + if finish_reason == "stop": + logging.info(f"[response] {gpt_replying_buffer}") + break + + except Exception: yield from update_ui( chatbot=chatbot, history=history, msg="Json解析不合常规" ) # 刷新界面