From c94d5054a2103958e2cf27a6e45719798a87d848 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Tue, 25 Jun 2024 08:53:28 +0000 Subject: [PATCH 01/10] move fn --- main.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/main.py b/main.py index d80d3f260..81acc4d78 100644 --- a/main.py +++ b/main.py @@ -24,6 +24,20 @@ def enable_log(PATH_LOGGING): logging.getLogger("httpx").setLevel(logging.WARNING) print(f"所有对话记录将自动保存在本地目录{log_dir}, 请注意自我隐私保护哦!") +def encode_plugin_info(k, plugin)->str: + import copy + from themes.theme import to_cookie_str + plugin_ = copy.copy(plugin) + plugin_.pop("Function", None) + plugin_.pop("Class", None) + plugin_.pop("Button", None) + plugin_["Info"] = plugin.get("Info", k) + if plugin.get("AdvancedArgs", False): + plugin_["Label"] = f"插件[{k}]的高级参数说明:" + plugin.get("ArgsReminder", f"没有提供高级参数功能说明") + else: + plugin_["Label"] = f"插件[{k}]不需要高级参数。" + return to_cookie_str(plugin_) + def main(): import gradio as gr if gr.__version__ not in ['3.32.9', '3.32.10']: @@ -106,7 +120,7 @@ def main(): with gr.Row(): audio_mic = gr.Audio(source="microphone", type="numpy", elem_id="elem_audio", streaming=True, show_label=False).style(container=False) with gr.Row(): - status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {proxy_info}", elem_id="state-panel") + status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。支持将文件直接粘贴到输入区。", elem_id="state-panel") with gr.Accordion("基础功能区", open=True, elem_id="basic-panel") as area_basic_fn: with gr.Row(): @@ -217,20 +231,6 @@ def fn_area_visibility_2(a): file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2, cookies]).then(None, None, None, _js=r"()=>{toast_push('上传完毕 ...'); cancel_loading_status();}") file_upload_2.upload(on_file_uploaded, [file_upload_2, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2, cookies]).then(None, None, None, _js=r"()=>{toast_push('上传完毕 ...'); cancel_loading_status();}") # 函数插件-固定按钮区 - def encode_plugin_info(k, plugin)->str: - import copy - from themes.theme import to_cookie_str - plugin_ = copy.copy(plugin) - plugin_.pop("Function", None) - plugin_.pop("Class", None) - plugin_.pop("Button", None) - plugin_["Info"] = plugin.get("Info", k) - if plugin.get("AdvancedArgs", False): - plugin_["Label"] = f"插件[{k}]的高级参数说明:" + plugin.get("ArgsReminder", f"没有提供高级参数功能说明") - else: - plugin_["Label"] = f"插件[{k}]不需要高级参数。" - return to_cookie_str(plugin_) - for k in plugins: register_advanced_plugin_init_arr += f"""register_plugin_init("{k}","{encode_plugin_info(k, plugins[k])}");""" if plugins[k].get("Class", None): From 26e7677dc38d8be89e5a370b7305f8b1b2ede85d Mon Sep 17 00:00:00 2001 From: binary-husky Date: Wed, 26 Jun 2024 15:18:11 +0000 Subject: [PATCH 02/10] fix new api for taichu --- request_llms/com_taichu.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/request_llms/com_taichu.py b/request_llms/com_taichu.py index 17250ddb5..f8eb39819 100644 --- a/request_llms/com_taichu.py +++ b/request_llms/com_taichu.py @@ -43,7 +43,8 @@ def generate_chat(self, inputs:str, llm_kwargs:dict, history:list, system_prompt if response.status_code == 200: response.encoding = 'utf-8' for line in response.iter_lines(decode_unicode=True): - delta = json.loads(line)['choices'][0]['text'] + try: delta = json.loads(line)['data']['content'] + except: delta = json.loads(line)['choices'][0]['text'] results += delta yield delta, results else: From b7b4e201cbbe4148041cb0405b50d79eff6e5842 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Thu, 27 Jun 2024 06:49:10 +0000 Subject: [PATCH 03/10] fix latex auto correct --- crazy_functions/latex_fns/latex_pickle_io.py | 8 +++++-- tests/test_latex_auto_correct.py | 22 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 tests/test_latex_auto_correct.py diff --git a/crazy_functions/latex_fns/latex_pickle_io.py b/crazy_functions/latex_fns/latex_pickle_io.py index 451d735b8..7b93ea87a 100644 --- a/crazy_functions/latex_fns/latex_pickle_io.py +++ b/crazy_functions/latex_fns/latex_pickle_io.py @@ -4,12 +4,14 @@ class SafeUnpickler(pickle.Unpickler): def get_safe_classes(self): - from .latex_actions import LatexPaperFileGroup, LatexPaperSplit + from crazy_functions.latex_fns.latex_actions import LatexPaperFileGroup, LatexPaperSplit + from crazy_functions.latex_fns.latex_toolbox import LinkedListNode # 定义允许的安全类 safe_classes = { # 在这里添加其他安全的类 'LatexPaperFileGroup': LatexPaperFileGroup, - 'LatexPaperSplit' : LatexPaperSplit, + 'LatexPaperSplit': LatexPaperSplit, + 'LinkedListNode': LinkedListNode, } return safe_classes @@ -20,6 +22,8 @@ def find_class(self, module, name): for class_name in self.safe_classes.keys(): if (class_name in f'{module}.{name}'): match_class_name = class_name + if module == 'numpy' or module.startswith('numpy.'): + return super().find_class(module, name) if match_class_name is not None: return self.safe_classes[match_class_name] # 如果尝试加载未授权的类,则抛出异常 diff --git a/tests/test_latex_auto_correct.py b/tests/test_latex_auto_correct.py new file mode 100644 index 000000000..c51e7414e --- /dev/null +++ b/tests/test_latex_auto_correct.py @@ -0,0 +1,22 @@ +""" +对项目中的各个插件进行测试。运行方法:直接运行 python tests/test_plugins.py +""" + + +import os, sys, importlib + + +def validate_path(): + dir_name = os.path.dirname(__file__) + root_dir_assume = os.path.abspath(dir_name + "/..") + os.chdir(root_dir_assume) + sys.path.append(root_dir_assume) + + +validate_path() # 返回项目根路径 + +if __name__ == "__main__": + plugin_test = importlib.import_module('test_utils').plugin_test + + + plugin_test(plugin='crazy_functions.Latex_Function->Latex翻译中文并重新编译PDF', main_input="2203.01927") From cbc6ec16ac5bf91c8692c958ef717d46d89b720c Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Fri, 28 Jun 2024 11:05:13 +0800 Subject: [PATCH 04/10] Change default to Mixed option --- crazy_functions/Internet_GPT.py | 4 ++-- crazy_functions/Internet_GPT_Wrap.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crazy_functions/Internet_GPT.py b/crazy_functions/Internet_GPT.py index 840990fd2..ce78a3141 100644 --- a/crazy_functions/Internet_GPT.py +++ b/crazy_functions/Internet_GPT.py @@ -21,8 +21,8 @@ def searxng_request(query, proxies, categories='general', searxng_url=None, engi else: url = searxng_url - if engines is None: - engines = 'bing' + if engines == "Mixed": + engines = None if categories == 'general': params = { diff --git a/crazy_functions/Internet_GPT_Wrap.py b/crazy_functions/Internet_GPT_Wrap.py index 8d3aa43fc..8ee1629c1 100644 --- a/crazy_functions/Internet_GPT_Wrap.py +++ b/crazy_functions/Internet_GPT_Wrap.py @@ -26,7 +26,7 @@ def define_arg_selection_menu(self): "categories": ArgProperty(title="搜索分类", options=["网页", "学术论文"], default_value="网页", description="无", type="dropdown").model_dump_json(), "engine": - ArgProperty(title="选择搜索引擎", options=["bing", "google", "duckduckgo"], default_value="bing", description="无", type="dropdown").model_dump_json(), + ArgProperty(title="选择搜索引擎", options=["Mixed", "bing", "google", "duckduckgo"], default_value="Mixed", description="无", type="dropdown").model_dump_json(), "searxng_url": ArgProperty(title="Searxng服务地址", description="输入Searxng的地址", default_value=get_conf("SEARXNG_URL"), type="string").model_dump_json(), # 主输入,自动从输入框同步 From 897d788284cb700ebd2234b8c6e49c1dcd9be4de Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Fri, 28 Jun 2024 11:13:09 +0800 Subject: [PATCH 05/10] Add option optimizer --- crazy_functions/Internet_GPT_Wrap.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/crazy_functions/Internet_GPT_Wrap.py b/crazy_functions/Internet_GPT_Wrap.py index 8ee1629c1..dbc13af9f 100644 --- a/crazy_functions/Internet_GPT_Wrap.py +++ b/crazy_functions/Internet_GPT_Wrap.py @@ -22,11 +22,13 @@ def define_arg_selection_menu(self): """ gui_definition = { "main_input": - ArgProperty(title="输入问题", description="待通过互联网检索的问题", default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步 + ArgProperty(title="输入问题", description="待通过互联网检索的问题,会自动读取输入框内容", default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步 "categories": ArgProperty(title="搜索分类", options=["网页", "学术论文"], default_value="网页", description="无", type="dropdown").model_dump_json(), "engine": ArgProperty(title="选择搜索引擎", options=["Mixed", "bing", "google", "duckduckgo"], default_value="Mixed", description="无", type="dropdown").model_dump_json(), + "optimizer": + ArgProperty(title="搜索优化", options=["关闭", "开启", "开启(增强)"], default_value="关闭", description="是否使用搜索增强。注意这可能会消耗较多token", type="dropdown").model_dump_json(), "searxng_url": ArgProperty(title="Searxng服务地址", description="输入Searxng的地址", default_value=get_conf("SEARXNG_URL"), type="string").model_dump_json(), # 主输入,自动从输入框同步 @@ -39,6 +41,7 @@ def execute(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, use """ if plugin_kwargs["categories"] == "网页": plugin_kwargs["categories"] = "general" if plugin_kwargs["categories"] == "学术论文": plugin_kwargs["categories"] = "science" - + optimizer_options=["关闭", "开启", "开启(增强)"] + plugin_kwargs["optimizer"] = optimizer_options.index(plugin_kwargs["optimizer"]) yield from 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request) From bdbd7776e54c1d9351f85b9573c235d337ccae40 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Fri, 28 Jun 2024 12:54:06 +0800 Subject: [PATCH 06/10] Add search optimizer prompts --- crazy_functions/Internet_GPT.py | 5 ++ crazy_functions/prompts/Search.py | 87 +++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 crazy_functions/prompts/Search.py diff --git a/crazy_functions/Internet_GPT.py b/crazy_functions/Internet_GPT.py index ce78a3141..b6e28a41c 100644 --- a/crazy_functions/Internet_GPT.py +++ b/crazy_functions/Internet_GPT.py @@ -7,6 +7,11 @@ import random from functools import lru_cache from check_proxy import check_proxy +from .prompts.Search import Search_optimizer, Search_academic_optimizer + +def optimizer(query, proxies, categories='general', searxng_url=None, engines=None): + # ------------- < 第1步:尝试进行搜索优化 > ------------- + pass @lru_cache def get_auth_ip(): diff --git a/crazy_functions/prompts/Search.py b/crazy_functions/prompts/Search.py new file mode 100644 index 000000000..5e45745f2 --- /dev/null +++ b/crazy_functions/prompts/Search.py @@ -0,0 +1,87 @@ +Search_optimizer="""作为一个网页搜索助手,你的任务是结合历史记录,从不同角度,为“原问题”生成个不同版本的“检索词”,从而提高网页检索的精度。生成的问题要求指向对象清晰明确,并与“原问题语言相同”。例如: +历史记录: +" +Q: 对话背景。 +A: 当前对话是关于 Nginx 的介绍和在Ubuntu上的使用等。 +" +原问题: 怎么下载 +检索词: ["Nginx 下载","Ubuntu Nginx","Ubuntu安装Nginx"] +---------------- +历史记录: +" +Q: 对话背景。 +A: 当前对话是关于 Nginx 的介绍和使用等。 +Q: 报错 "no connection" +A: 报错"no connection"可能是因为…… +" +原问题: 怎么解决 +检索词: ["Nginx报错"no connection" 解决","Nginx'no connection'报错 原因","Nginx提示'no connection'"] +---------------- +历史记录: +" + +" +原问题: 你知道 Python 么? +检索词: ["Python","Python 使用教程。","Python 特点和优势"] +---------------- +历史记录: +" +Q: 列出Java的三种特点? +A: 1. Java 是一种编译型语言。 + 2. Java 是一种面向对象的编程语言。 + 3. Java 是一种跨平台的编程语言。 +" +原问题: 介绍下第2点。 +检索词: ["Java 面向对象特点","Java 面向对象编程优势。","Java 面向对象编程"] +---------------- +现在有历史记录: +" +{his} +" +有其原问题: {query} +直接给出最多{num}个检索词,必须以json形式给出,不得有多余字符: +""" + +Search_academic_optimizer="""作为一个学术论文搜索助手,你的任务是结合历史记录,从不同角度,为“原问题”生成个不同版本的“检索词”,从而提高学术论文检索的精度。生成的问题要求指向对象清晰明确,并与“原问题语言相同”。例如: +历史记录: +" +Q: 对话背景。 +A: 当前对话是关于深度学习的介绍和在图像识别中的应用等。 +" +原问题: 怎么下载相关论文 +检索词: ["深度学习 图像识别 论文下载","图像识别 深度学习 研究论文","深度学习 图像识别 论文资源","Deep Learning Image Recognition Paper Download","Image Recognition Deep Learning Research Paper"] +---------------- +历史记录: +" +Q: 对话背景。 +A: 当前对话是关于深度学习的介绍和应用等。 +Q: 报错 "模型不收敛" +A: 报错"模型不收敛"可能是因为…… +" +原问题: 怎么解决 +检索词: ["深度学习 模型不收敛 解决方案 论文","深度学习 模型不收敛 原因 研究","深度学习 模型不收敛 论文","Deep Learning Model Convergence Issue Solution Paper","Deep Learning Model Convergence Problem Research"] +---------------- +历史记录: +" + +" +原问题: 你知道 GAN 么? +检索词: ["生成对抗网络 论文","GAN 使用教程 论文","GAN 特点和优势 研究","Generative Adversarial Network Paper","GAN Usage Tutorial Paper"] +---------------- +历史记录: +" +Q: 列出机器学习的三种应用? +A: 1. 机器学习在图像识别中的应用。 + 2. 机器学习在自然语言处理中的应用。 + 3. 机器学习在推荐系统中的应用。 +" +原问题: 介绍下第2点。 +检索词: ["机器学习 自然语言处理 应用 论文","机器学习 自然语言处理 研究","机器学习 NLP 应用 论文","Machine Learning Natural Language Processing Application Paper","Machine Learning NLP Research"] +---------------- +现在有历史记录: +" +{his} +" +有其原问题: {query} +直接给出最多{num}个检索词,必须以json形式给出,不得有多余字符: +""" \ No newline at end of file From c2d0213a8d7af590629e5f67d4685734faaac21b Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Fri, 28 Jun 2024 13:30:35 +0800 Subject: [PATCH 07/10] Enhanced Processing --- crazy_functions/Internet_GPT.py | 61 ++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/crazy_functions/Internet_GPT.py b/crazy_functions/Internet_GPT.py index b6e28a41c..1fffc54da 100644 --- a/crazy_functions/Internet_GPT.py +++ b/crazy_functions/Internet_GPT.py @@ -100,7 +100,7 @@ def scrape_text(url, proxies) -> str: @CatchException def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request): - + optimizer_history = history[:-8] history = [] # 清空历史,以免输入溢出 chatbot.append((f"请结合互联网信息回答以下问题:{txt}", "检索中...")) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 @@ -111,6 +111,7 @@ def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, s categories = plugin_kwargs.get('categories', 'general') searxng_url = plugin_kwargs.get('searxng_url', None) engines = plugin_kwargs.get('engine', None) + optimizer = plugin_kwargs.get('optimizer', 0) urls = searxng_request(txt, proxies, categories, searxng_url, engines=engines) history = [] if len(urls) == 0: @@ -120,7 +121,7 @@ def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, s return # ------------- < 第2步:依次访问网页 > ------------- max_search_result = 5 # 最多收纳多少个网页的结果 - chatbot.append([f"联网检索中 ...", None]) + chatbot.append(["联网检索中 ...", None]) for index, url in enumerate(urls[:max_search_result]): res = scrape_text(url['link'], proxies) prefix = f"第{index}份搜索结果 [源自{url['source'][0]}搜索] ({url['title'][:25]}):" @@ -130,18 +131,46 @@ def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, s yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # ------------- < 第3步:ChatGPT综合 > ------------- - i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}" - i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token - inputs=i_say, - history=history, - max_token_limit=min(model_info[llm_kwargs['llm_model']]['max_token']*3//4, 8192) - ) - gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( - inputs=i_say, inputs_show_user=i_say, - llm_kwargs=llm_kwargs, chatbot=chatbot, history=history, - sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。" - ) - chatbot[-1] = (i_say, gpt_say) - history.append(i_say);history.append(gpt_say) - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新 + if optimizer == 0: + i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}" + i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token + inputs=i_say, + history=history, + max_token_limit=min(model_info[llm_kwargs['llm_model']]['max_token']*3//4, 8192) + ) + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=i_say, inputs_show_user=i_say, + llm_kwargs=llm_kwargs, chatbot=chatbot, history=history, + sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。" + ) + chatbot[-1] = (i_say, gpt_say) + history.append(i_say);history.append(gpt_say) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新 + #* 或者使用搜索优化器,这样可以保证后续问答能读取到有效的历史记录 + else: + i_say = f"从以上搜索结果中抽取与问题:{txt} 相关的信息," + i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token + inputs=i_say, + history=history, + max_token_limit=min(model_info[llm_kwargs['llm_model']]['max_token']*3//4, 8192) + ) + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=i_say, inputs_show_user=i_say, + llm_kwargs=llm_kwargs, chatbot=chatbot, history=history, + sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的三个搜索结果进行总结" + ) + chatbot[-1] = (i_say, gpt_say) + history = [] + history.append(i_say);history.append(gpt_say) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新 + # ------------- < 第4步:根据综合回答问题 > ------------- + i_say = f"请根据以上搜索结果回答问题:{txt}" + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=i_say, inputs_show_user=i_say, + llm_kwargs=llm_kwargs, chatbot=chatbot, history=history, + sys_prompt="请根据给定的若干条搜索结果回答问题" + ) + chatbot[-1] = (i_say, gpt_say) + history.append(i_say);history.append(gpt_say) + yield from update_ui(chatbot=chatbot, history=history) \ No newline at end of file From 25df539d7565e7e44e3c24b3fb998a60d9e89e56 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Fri, 28 Jun 2024 15:02:27 +0800 Subject: [PATCH 08/10] Finish search_optimizer part --- crazy_functions/Internet_GPT.py | 105 +++++++++++++++++- .../prompts/{Search.py => Internet_GPT.py} | 0 2 files changed, 100 insertions(+), 5 deletions(-) rename crazy_functions/prompts/{Search.py => Internet_GPT.py} (100%) diff --git a/crazy_functions/Internet_GPT.py b/crazy_functions/Internet_GPT.py index 1fffc54da..6f7b76921 100644 --- a/crazy_functions/Internet_GPT.py +++ b/crazy_functions/Internet_GPT.py @@ -3,15 +3,104 @@ import requests from bs4 import BeautifulSoup from request_llms.bridge_all import model_info -import urllib.request import random from functools import lru_cache from check_proxy import check_proxy -from .prompts.Search import Search_optimizer, Search_academic_optimizer +from request_llms.bridge_all import predict_no_ui_long_connection +from .prompts.Internet_GPT import Search_optimizer, Search_academic_optimizer +import time +import re +import json +from itertools import zip_longest -def optimizer(query, proxies, categories='general', searxng_url=None, engines=None): +def search_optimizer( + query, + proxies, + history, + llm_kwargs, + optimizer=1, + categories="general", + searxng_url=None, + engines=None, +): # ------------- < 第1步:尝试进行搜索优化 > ------------- - pass + # * 增强优化,会尝试结合历史记录进行搜索优化 + if optimizer == 2: + his = "" + if len(history) == 0: + pass + else: + for temp in history[:-1]: + his += f"Q: {temp[0]}\n" + his += f"A: {temp[1]}\n" + if categories == "general": + sys_prompt = Search_optimizer.format(query=query, history=his, num=4) + elif categories == "science": + sys_prompt = Search_academic_optimizer.format(query=query, history=his, num=4) + else: + his = "" + if categories == "general": + sys_prompt = Search_optimizer.format(query=query, history=his, num=3) + elif categories == "science": + sys_prompt = Search_academic_optimizer.format(query=query, history=his, num=3) + + mutable = ["", time.time(), ""] + llm_kwargs["temperature"] = 0.8 + try: + querys_json = predict_no_ui_long_connection( + inputs=query, + llm_kwargs=llm_kwargs, + history=[], + sys_prompt=sys_prompt, + observe_window=mutable, + ) + except Exception: + querys_json = json.dumps([query]) + #* 尝试解码优化后的搜索结果 + querys_json = re.sub(r"```json|```", "", querys_json) + try: + querys = json.loads(querys_json) + except Exception: + #* 如果解码失败,降低温度再试一次 + try: + llm_kwargs["temperature"] = 0.4 + querys_json = predict_no_ui_long_connection( + inputs=query, + llm_kwargs=llm_kwargs, + history=[], + sys_prompt=sys_prompt, + observe_window=mutable, + ) + querys_json = re.sub(r"```json|```", "", querys_json) + querys = json.loads(querys_json) + except Exception: + #* 如果再次失败,直接返回原始问题 + querys = [query] + links = [] + success = 0 + Exceptions = "" + for q in querys: + try: + link = searxng_request(q, proxies, categories, searxng_url, engines=engines) + if len(link) > 0: + links.append(link[:-5]) + success += 1 + except Exception: + Exceptions = Exception + pass + if success == 0: + raise ValueError(f"在线搜索失败!\n{Exceptions}") + # * 清洗搜索结果,依次放入每组第一,第二个搜索结果,并清洗重复的搜索结果 + seen_links = set() + result = [] + for tuple in zip_longest(*links, fillvalue=None): + for item in tuple: + if item is not None: + link = item["link"] + if link not in seen_links: + seen_links.add(link) + result.append(item) + return result @lru_cache def get_auth_ip(): @@ -112,15 +201,21 @@ def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, s searxng_url = plugin_kwargs.get('searxng_url', None) engines = plugin_kwargs.get('engine', None) optimizer = plugin_kwargs.get('optimizer', 0) - urls = searxng_request(txt, proxies, categories, searxng_url, engines=engines) + if optimizer == 0: + urls = searxng_request(txt, proxies, categories, searxng_url, engines=engines) + else: + urls = search_optimizer(txt, proxies, optimizer_history, llm_kwargs, optimizer, categories, searxng_url, engines) history = [] if len(urls) == 0: chatbot.append((f"结论:{txt}", "[Local Message] 受到限制,无法从searxng获取信息!请尝试更换搜索引擎。")) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return + # ------------- < 第2步:依次访问网页 > ------------- max_search_result = 5 # 最多收纳多少个网页的结果 + if optimizer == 2: + max_search_result = 8 chatbot.append(["联网检索中 ...", None]) for index, url in enumerate(urls[:max_search_result]): res = scrape_text(url['link'], proxies) diff --git a/crazy_functions/prompts/Search.py b/crazy_functions/prompts/Internet_GPT.py similarity index 100% rename from crazy_functions/prompts/Search.py rename to crazy_functions/prompts/Internet_GPT.py From e81d5960381b3b3e028522f493fb78d8cd758462 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Fri, 28 Jun 2024 15:10:44 +0800 Subject: [PATCH 09/10] prompts bug fix --- crazy_functions/Internet_GPT.py | 4 ++-- crazy_functions/prompts/Internet_GPT.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crazy_functions/Internet_GPT.py b/crazy_functions/Internet_GPT.py index 6f7b76921..b92180cd1 100644 --- a/crazy_functions/Internet_GPT.py +++ b/crazy_functions/Internet_GPT.py @@ -26,7 +26,7 @@ def search_optimizer( # ------------- < 第1步:尝试进行搜索优化 > ------------- # * 增强优化,会尝试结合历史记录进行搜索优化 if optimizer == 2: - his = "" + his = " " if len(history) == 0: pass else: @@ -38,7 +38,7 @@ def search_optimizer( elif categories == "science": sys_prompt = Search_academic_optimizer.format(query=query, history=his, num=4) else: - his = "" + his = " " if categories == "general": sys_prompt = Search_optimizer.format(query=query, history=his, num=3) elif categories == "science": diff --git a/crazy_functions/prompts/Internet_GPT.py b/crazy_functions/prompts/Internet_GPT.py index 5e45745f2..a623808d0 100644 --- a/crazy_functions/prompts/Internet_GPT.py +++ b/crazy_functions/prompts/Internet_GPT.py @@ -36,7 +36,7 @@ ---------------- 现在有历史记录: " -{his} +{history} " 有其原问题: {query} 直接给出最多{num}个检索词,必须以json形式给出,不得有多余字符: @@ -80,7 +80,7 @@ ---------------- 现在有历史记录: " -{his} +{history} " 有其原问题: {query} 直接给出最多{num}个检索词,必须以json形式给出,不得有多余字符: From 1cc5806117dd0579afe922fb0c617735b6d66b6e Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Fri, 28 Jun 2024 15:34:13 +0800 Subject: [PATCH 10/10] Bug fix --- crazy_functions/Internet_GPT.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/crazy_functions/Internet_GPT.py b/crazy_functions/Internet_GPT.py index b92180cd1..b43cdd48a 100644 --- a/crazy_functions/Internet_GPT.py +++ b/crazy_functions/Internet_GPT.py @@ -30,9 +30,11 @@ def search_optimizer( if len(history) == 0: pass else: - for temp in history[:-1]: - his += f"Q: {temp[0]}\n" - his += f"A: {temp[1]}\n" + for i, h in enumerate(history): + if i % 2 == 0: + his += f"Q: {h}\n" + else: + his += f"A: {h}\n" if categories == "general": sys_prompt = Search_optimizer.format(query=query, history=his, num=4) elif categories == "science": @@ -55,7 +57,7 @@ def search_optimizer( observe_window=mutable, ) except Exception: - querys_json = json.dumps([query]) + querys_json = "1234" #* 尝试解码优化后的搜索结果 querys_json = re.sub(r"```json|```", "", querys_json) try: @@ -226,7 +228,7 @@ def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, s yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # ------------- < 第3步:ChatGPT综合 > ------------- - if optimizer == 0: + if (optimizer == 0 or optimizer == 1): i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}" i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token inputs=i_say, @@ -243,7 +245,7 @@ def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, s yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新 #* 或者使用搜索优化器,这样可以保证后续问答能读取到有效的历史记录 else: - i_say = f"从以上搜索结果中抽取与问题:{txt} 相关的信息," + i_say = f"从以上搜索结果中抽取与问题:{txt} 相关的信息:" i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token inputs=i_say, history=history,