Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

添加qwen系列,groq,yi_vision的支持,增加接入oneapi中多模态llm的接口 #1814

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
16 changes: 13 additions & 3 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,22 @@
# AVAIL_LLM_MODELS = [
# "qianfan", "deepseekcoder",
# "spark", "sparkv2", "sparkv3", "sparkv3.5",
# "qwen-turbo", "qwen-plus", "qwen-max", "qwen-local",
# "qwen-turbo", "qwen-plus", "qwen-max", "qwen-max-longcontext", "qwen-long", "qwen-local",
# "qwen2-72b-instruct","qwen2-57b-a14b-instruct","qwen2-7b-instruct","qwen1.5-110b-chat",
# "moonshot-v1-128k", "moonshot-v1-32k", "moonshot-v1-8k",
# "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0125", "gpt-4o-2024-05-13"
# "claude-3-haiku-20240307","claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2",
# "moss", "llama2", "chatglm_onnx", "internlm", "jittorllms_pangualpha", "jittorllms_llama",
# "deepseek-chat" ,"deepseek-coder",
# "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview",
# "llama3-8b-8192", "gemma-7b-it", "mixtral-8x7b-32768", "llama3-70b-8192",
# "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview","yi-vision"
# ]
# --- --- --- ---
# 此外,您还可以在接入one-api/vllm/ollama时,
# 使用"one-api-*","vllm-*","ollama-*"前缀直接使用非标准方式接入的模型,例如
# AVAIL_LLM_MODELS = ["one-api-claude-3-sonnet-20240229(max_token=100000)", "ollama-phi3(max_token=4096)"]
# 在接入多模态模型时,可以使用"one-api-vision-*"前缀接入,例如
# AVAIL_LLM_MODELS = ["one-api-vision-gpt-4o(max_token=32000)"]
# --- --- --- ---


Expand Down Expand Up @@ -127,7 +131,7 @@
QWEN_LOCAL_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8"


# 接入通义千问在线大模型 https://dashscope.console.aliyun.com/
# 接入通义千问在线大模型 https://bailian.console.aliyun.com/
DASHSCOPE_API_KEY = "" # 阿里灵积云API_KEY


Expand Down Expand Up @@ -229,14 +233,20 @@
# 零一万物(Yi Model) API KEY
YIMODEL_API_KEY = ""


# 深度求索(DeepSeek) API KEY,默认请求地址为"https://api.deepseek.com/v1/chat/completions"
DEEPSEEK_API_KEY = ""


# Mathpix 拥有执行PDF的OCR功能,但是需要注册账号
MATHPIX_APPID = ""
MATHPIX_APPKEY = ""


# Groq API KEY,默认请求地址为"https://api.groq.com/openai/v1/chat/completions"
GROQ_API_KEY = ""


# DOC2X的PDF解析服务,注册账号并获取API KEY: https://doc2x.noedgeai.com/login
DOC2X_API_KEY = ""

Expand Down
200 changes: 186 additions & 14 deletions request_llms/bridge_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ def decode(self, *args, **kwargs):
ollama_endpoint = "http://localhost:11434/api/chat"
yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions"
deepseekapi_endpoint = "https://api.deepseek.com/v1/chat/completions"
qwenapi_endpoint = "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions"
groq_endpoint = "https://api.groq.com/openai/v1/chat/completions"

if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
Expand All @@ -93,6 +95,8 @@ def decode(self, *args, **kwargs):
if ollama_endpoint in API_URL_REDIRECT: ollama_endpoint = API_URL_REDIRECT[ollama_endpoint]
if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint]
if deepseekapi_endpoint in API_URL_REDIRECT: deepseekapi_endpoint = API_URL_REDIRECT[deepseekapi_endpoint]
if qwenapi_endpoint in API_URL_REDIRECT: qwenapi_endpoint = API_URL_REDIRECT[qwenapi_endpoint]
if groq_endpoint in API_URL_REDIRECT: groq_endpoint = API_URL_REDIRECT[groq_endpoint]

# 获取tokenizer
tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
Expand Down Expand Up @@ -640,43 +644,118 @@ def decode(self, *args, **kwargs):
except:
print(trimmed_format_exc())
# -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=-
if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS: # zhipuai
qwen_models = [
"qwen-turbo",
"qwen-plus",
"qwen-max",
"qwen-max-longcontext",
"qwen-long",
"qwen2-72b-instruct",
"qwen2-57b-a14b-instruct",
"qwen2-7b-instruct",
"qwen1.5-110b-chat",
]
if any(item in qwen_models for item in AVAIL_LLM_MODELS):
try:
from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
from .bridge_qwen import predict as qwen_ui
qwen_1500_noui, qwen_1500_ui = get_predict_function(
api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=1500, disable_proxy=False
)
qwen_2000_noui, qwen_2000_ui = get_predict_function(
api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=2000, disable_proxy=False
)
qwen_6144_noui, qwen_6144_ui = get_predict_function(
api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=6144, disable_proxy=False
)
qwen_8000_noui, qwen_8000_ui = get_predict_function(
api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=8000, disable_proxy=False
)
model_info.update({
"qwen-turbo": {
"fn_with_ui": qwen_ui,
"fn_without_ui": qwen_noui,
"fn_with_ui": qwen_1500_ui,
"fn_without_ui": qwen_1500_noui,
"can_multi_thread": True,
"endpoint": None,
"endpoint": qwenapi_endpoint,
"max_token": 6144,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen-plus": {
"fn_with_ui": qwen_ui,
"fn_without_ui": qwen_noui,
"fn_with_ui": qwen_2000_ui,
"fn_without_ui": qwen_2000_noui,
"can_multi_thread": True,
"endpoint": None,
"endpoint": qwenapi_endpoint,
"max_token": 30720,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen-max": {
"fn_with_ui": qwen_ui,
"fn_without_ui": qwen_noui,
"fn_with_ui": qwen_2000_ui,
"fn_without_ui": qwen_2000_noui,
"can_multi_thread": True,
"endpoint": None,
"endpoint": qwenapi_endpoint,
"max_token": 6144,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen-max-longcontext": {
"fn_with_ui": qwen_2000_ui,
"fn_without_ui": qwen_2000_noui,
"can_multi_thread": True,
"endpoint": qwenapi_endpoint,
"max_token": 28672,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
}
},
"qwen-long": {
"fn_with_ui": qwen_2000_ui,
"fn_without_ui": qwen_2000_noui,
"can_multi_thread": True,
"endpoint": qwenapi_endpoint,
"max_token": 1000000,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen2-72b-instruct": {
"fn_with_ui": qwen_6144_ui,
"fn_without_ui": qwen_6144_noui,
"can_multi_thread": True,
"endpoint": qwenapi_endpoint,
"max_token": 128000,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen2-57b-a14b-instruct": {
"fn_with_ui": qwen_6144_ui,
"fn_without_ui": qwen_6144_noui,
"can_multi_thread": True,
"endpoint": qwenapi_endpoint,
"max_token": 30720,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen2-7b-instruct": {
"fn_with_ui": qwen_6144_ui,
"fn_without_ui": qwen_6144_noui,
"can_multi_thread": True,
"endpoint": qwenapi_endpoint,
"max_token": 128000,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen1.5-110b-chat": {
"fn_with_ui": qwen_8000_ui,
"fn_without_ui": qwen_8000_noui,
"can_multi_thread": True,
"endpoint": qwenapi_endpoint,
"max_token": 32000,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
})
except:
print(trimmed_format_exc())
# -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=-
yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview"]
yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview","yi-vision"]
if any(item in yi_models for item in AVAIL_LLM_MODELS):
try:
yimodel_4k_noui, yimodel_4k_ui = get_predict_function(
Expand All @@ -688,6 +767,23 @@ def decode(self, *args, **kwargs):
yimodel_200k_noui, yimodel_200k_ui = get_predict_function(
api_key_conf_name="YIMODEL_API_KEY", max_output_token=4096, disable_proxy=False
)
if "yi-vision" in AVAIL_LLM_MODELS:
from .bridge_yi_vision import yi_version_generate_message_version
yimodel_version_noui, yimodel_version_ui = get_predict_function(
api_key_conf_name="YIMODEL_API_KEY", max_output_token=600, disable_proxy=False, encode_call=yi_version_generate_message_version
)
model_info.update({
"yi-vision": {
"fn_with_ui": yimodel_version_ui,
"fn_without_ui": yimodel_version_noui,
"can_multi_thread": True,
"endpoint": yimodel_endpoint,
"max_token": 4000,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
}
})

model_info.update({
"yi-34b-chat-0205": {
"fn_with_ui": yimodel_4k_ui,
Expand Down Expand Up @@ -892,6 +988,52 @@ def decode(self, *args, **kwargs):
})
except:
print(trimmed_format_exc())
# -=-=-=-=-=-=- groq -=-=-=-=-=-=-
groq_models = ["llama3-8b-8192", "gemma-7b-it", "mixtral-8x7b-32768", "llama3-70b-8192"]
if any(item in groq_models for item in AVAIL_LLM_MODELS):
try:
groq_8k_noui, groq_8k_ui = get_predict_function(
api_key_conf_name="GROQ_API_KEY", max_output_token=8192, disable_proxy=False
)
groq_32k_noui, groq_32k_ui = get_predict_function(
api_key_conf_name="GROQ_API_KEY", max_output_token=32768, disable_proxy=False
)
model_info.update({
"llama3-8b-8192": {
"fn_with_ui": groq_8k_ui,
"fn_without_ui": groq_8k_noui,
"endpoint": groq_endpoint,
"max_token": 8192,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"gemma-7b-it": {
"fn_with_ui": groq_8k_ui,
"fn_without_ui": groq_8k_noui,
"endpoint": groq_endpoint,
"max_token": 8192,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"mixtral-8x7b-32768": {
"fn_with_ui": groq_32k_ui,
"fn_without_ui": groq_32k_noui,
"endpoint": groq_endpoint,
"max_token": 32768,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"llama3-70b-8192": {
"fn_with_ui": groq_8k_ui,
"fn_without_ui": groq_8k_noui,
"endpoint": groq_endpoint,
"max_token": 8192,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
})
except:
print(trimmed_format_exc())
# -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=-
for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
# 为了更灵活地接入one-api多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"]
Expand All @@ -915,6 +1057,36 @@ def decode(self, *args, **kwargs):
"token_cnt": get_token_num_gpt35,
},
})
# -=-=-=-=-=-=- one-api-vision 对齐支持 -=-=-=-=-=-=-
for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-vision-")]:
# 为了更灵活地接入one-api多模型管理界面中的多模态模型,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-vision-gpt-4o(max_token=32000)"]
# 其中
# "one-api-vision-" 是前缀(必要)
# "gpt-4o" 是模型名(必要)
# "(max_token=32000)" 是配置(非必要)
try:
_, max_token_tmp = read_one_api_model_name(model)
except:
print(f"one-api-vision模型 {model} 的 max_token 配置不是整数,请检查配置文件。")
continue
try:
from .oai_vision_std import generate_message_version
one_api_version_noui, one_api_version_ui = get_predict_function(
api_key_conf_name="API_KEY", max_output_token=4000, disable_proxy=False, encode_call=generate_message_version
)
model_info.update({
model: {
"fn_with_ui": one_api_version_ui,
"fn_without_ui": one_api_version_noui,
"can_multi_thread": True,
"endpoint": openai_endpoint,
"max_token": max_token_tmp,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
})
except:
print(trimmed_format_exc())
# -=-=-=-=-=-=- vllm 对齐支持 -=-=-=-=-=-=-
for model in [m for m in AVAIL_LLM_MODELS if m.startswith("vllm-")]:
# 为了更灵活地接入vllm多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["vllm-/home/hmp/llm/cache/Qwen1___5-32B-Chat(max_token=6666)"]
Expand Down
66 changes: 0 additions & 66 deletions request_llms/bridge_qwen.py

This file was deleted.

Loading