Skip to content

Commit

Permalink
searxng based information gathering
Browse files Browse the repository at this point in the history
  • Loading branch information
binary-husky committed Jun 16, 2024
1 parent 0b5385e commit 12aebf9
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 1 deletion.
122 changes: 122 additions & 0 deletions crazy_functions/Internet_GPT.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
from toolbox import CatchException, update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
import requests
from bs4 import BeautifulSoup
from request_llms.bridge_all import model_info
import urllib.request
from functools import lru_cache


@lru_cache
def get_auth_ip():
try:
external_ip = urllib.request.urlopen('https://v4.ident.me/').read().decode('utf8')
return external_ip
except:
return '114.114.114.114'

def searxng_request(query, proxies):
url = 'https://cloud-1.agent-matrix.com/' # 请替换为实际的API URL
params = {
'q': query, # 搜索查询
'format': 'json', # 输出格式为JSON
'language': 'zh', # 搜索语言
}
headers = {
'Accept-Language': 'zh-CN,zh;q=0.9',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'X-Forwarded-For': get_auth_ip(),
'X-Real-IP': get_auth_ip()
}
results = []
response = requests.post(url, params=params, headers=headers, proxies=proxies)
if response.status_code == 200:
json_result = response.json()
for result in json_result['results']:
item = {
"title": result["title"],
"content": result["content"],
"link": result["url"],
}
results.append(item)
return results
else:
raise ValueError("搜索失败,状态码: " + str(response.status_code) + '\t' + response.content.decode('utf-8'))

def scrape_text(url, proxies) -> str:
"""Scrape text from a webpage
Args:
url (str): The URL to scrape text from
Returns:
str: The scraped text
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
'Content-Type': 'text/plain',
}
try:
response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
except:
return "无法连接到该网页"
soup = BeautifulSoup(response.text, "html.parser")
for script in soup(["script", "style"]):
script.extract()
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = "\n".join(chunk for chunk in chunks if chunk)
return text

@CatchException
def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
plugin_kwargs 插件模型的参数,暂时没有用武之地
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
user_request 当前用户的请求信息(IP地址等)
"""
history = [] # 清空历史,以免输入溢出
chatbot.append((f"请结合互联网信息回答以下问题:{txt}",
"[Local Message] 请注意,您正在调用一个[函数插件]的模板,该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板。您若希望分享新的功能模组,请不吝PR!"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新

# ------------- < 第1步:爬取搜索引擎的结果 > -------------
from toolbox import get_conf
proxies = get_conf('proxies')
urls = searxng_request(txt, proxies)
history = []
if len(urls) == 0:
chatbot.append((f"结论:{txt}",
"[Local Message] 受到google限制,无法从google获取信息!"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
return
# ------------- < 第2步:依次访问网页 > -------------
max_search_result = 5 # 最多收纳多少个网页的结果
for index, url in enumerate(urls[:max_search_result]):
res = scrape_text(url['link'], proxies)
history.extend([f"第{index}份搜索结果:", res])
chatbot.append([f"第{index}份搜索结果:", res[:500]+"......"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新

# ------------- < 第3步:ChatGPT综合 > -------------
i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}"
i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token
inputs=i_say,
history=history,
max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
)
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=i_say,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。"
)
chatbot[-1] = (i_say, gpt_say)
history.append(i_say);history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新

4 changes: 3 additions & 1 deletion tests/test_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ def validate_path():
if __name__ == "__main__":
from tests.test_utils import plugin_test

plugin_test(plugin='crazy_functions.Internet_GPT->连接网络回答问题', main_input="谁是应急食品?")

# plugin_test(plugin='crazy_functions.函数动态生成->函数动态生成', main_input='交换图像的蓝色通道和红色通道', advanced_arg={"file_path_arg": "./build/ants.jpg"})

# plugin_test(plugin='crazy_functions.Latex_Function->Latex翻译中文并重新编译PDF', main_input="2307.07522")

plugin_test(plugin='crazy_functions.PDF_Translate->批量翻译PDF文档', main_input='build/pdf/t1.pdf')
# plugin_test(plugin='crazy_functions.PDF_Translate->批量翻译PDF文档', main_input='build/pdf/t1.pdf')

# plugin_test(
# plugin="crazy_functions.Latex_Function->Latex翻译中文并重新编译PDF",
Expand Down

0 comments on commit 12aebf9

Please sign in to comment.