diff --git a/huixiangdou/frontend/lark.py b/huixiangdou/frontend/lark.py index d4f855ac..090bacb9 100644 --- a/huixiangdou/frontend/lark.py +++ b/huixiangdou/frontend/lark.py @@ -53,7 +53,7 @@ def post(self, data): headers=self.headers, data=post_data, verify=False, - timeout=5) + timeout=60) except requests.exceptions.HTTPError as exc: code = exc.response.status_code reason = exc.response.reason @@ -96,5 +96,5 @@ def post(self, data): requests.post(self.webhook, headers=self.headers, data=json.dumps(error_data), - timeout=5) + timeout=60) return result diff --git a/huixiangdou/main.py b/huixiangdou/main.py index eac50d3d..bbfbf663 100755 --- a/huixiangdou/main.py +++ b/huixiangdou/main.py @@ -45,7 +45,7 @@ def check_env(args): f'{CONFIG_NAME} not found, download a template from {CONFIG_URL}.') try: - response = requests.get(CONFIG_URL, timeout=5) + response = requests.get(CONFIG_URL, timeout=60) response.raise_for_status() with open(CONFIG_NAME, 'wb') as f: f.write(response.content) diff --git a/huixiangdou/service/feature_store.py b/huixiangdou/service/feature_store.py index 7140955a..24e8e7e7 100644 --- a/huixiangdou/service/feature_store.py +++ b/huixiangdou/service/feature_store.py @@ -215,11 +215,22 @@ def load_feature(self, feature_reject: str = 'db_reject'): """Load extracted feature.""" # https://api.python.langchain.com/en/latest/vectorstores/langchain.vectorstores.faiss.FAISS.html#langchain.vectorstores.faiss.FAISS + + resp_dir = os.path.join(work_dir, feature_response) + reject_dir = os.path.join(work_dir, feature_reject) + + if not os.path.exists(resp_dir) or not os.path.exists(reject_dir): + logger.error( + 'Please check README.md first and `python3 -m huixiangdou.service.feature_store`' # noqa E501 + ) + raise Exception( + f'{resp_dir} or {reject_dir} not exist, please initialize with feature_store.' # noqa E501 + ) + self.vector_store_reject = Vectorstore.load_local( - os.path.join(work_dir, feature_response), - embeddings=self.embeddings) + reject_dir, embeddings=self.embeddings) self.vector_store_db = Vectorstore.load_local( - os.path.join(work_dir, feature_reject), embeddings=self.embeddings) + resp_dir, embeddings=self.embeddings) def get_doc_by_id(self, _id, vector_store): """Get doc by search id.""" diff --git a/huixiangdou/service/llm_client.py b/huixiangdou/service/llm_client.py index 2a454245..d8e077d6 100644 --- a/huixiangdou/service/llm_client.py +++ b/huixiangdou/service/llm_client.py @@ -116,7 +116,7 @@ def generate_response(self, prompt, history=[], remote=False): resp = requests.post(url, headers=header, data=json.dumps(data), - timeout=5) + timeout=300) if resp.status_code != 200: raise Exception(str((resp.status_code, resp.reason))) return resp.json()['text'] diff --git a/huixiangdou/service/llm_server_hybrid.py b/huixiangdou/service/llm_server_hybrid.py index bdc47f1b..fd034912 100644 --- a/huixiangdou/service/llm_server_hybrid.py +++ b/huixiangdou/service/llm_server_hybrid.py @@ -152,10 +152,14 @@ def generate_response(self, prompt, history=[], remote=False): else: prompt = prompt[0:self.local_max_length] + """# Caution: For the results of this software to be reliable and verifiable, # noqa E501 + it's essential to ensure reproducibility. Thus `GenerationMode.GREEDY_SEARCH` # noqa E501 + must enabled.""" output_text, _ = self.model.chat(self.tokenizer, prompt, history, - top_k=1) + top_k=1, + do_sample=False) print((prompt, output_text)) time_finish = time.time() diff --git a/huixiangdou/service/web_search.py b/huixiangdou/service/web_search.py index 8e813e18..39ecc2a9 100644 --- a/huixiangdou/service/web_search.py +++ b/huixiangdou/service/web_search.py @@ -95,7 +95,7 @@ def google(self, query: str, max_article): url, headers=headers, data=payload, - timeout=3) # noqa E501 + timeout=60) # noqa E501 jsonobj = json.loads(response.text) # 带偏序的 url 连接拾取 @@ -137,7 +137,7 @@ def google(self, query: str, max_article): while life < self.retry: try: logger.info(f'extract: {target_link}') - response = requests.get(target_link, timeout=5) + response = requests.get(target_link, timeout=60) if len(response.text) < 1: break @@ -233,7 +233,7 @@ def fetch_web_content(target_link: str): Extracts the main content and title from the HTML of the page. Returns the title and content as a single string. """ - response = requests.get(target_link, timeout=5) + response = requests.get(target_link, timeout=60) doc = Document(response.text) content_html = doc.summary() diff --git a/huixiangdou/service/worker.py b/huixiangdou/service/worker.py index e1577faf..3ccd491c 100644 --- a/huixiangdou/service/worker.py +++ b/huixiangdou/service/worker.py @@ -264,7 +264,6 @@ def generate(self, query, history, groupname): default=0): reborn_code = ErrorCode.BAD_ANSWER - reborn_code = ErrorCode.BAD_ANSWER if self.config['worker']['enable_sg_search']: if reborn_code == ErrorCode.BAD_ANSWER or reborn_code == ErrorCode.NO_SEARCH_RESULT: # noqa E501 # reborn @@ -292,7 +291,7 @@ def generate(self, query, history, groupname): default=0): return ErrorCode.BAD_ANSWER, response - if response is not None and len(response) >= 500: + if response is not None and len(response) >= 800: # reply too long, summarize it response = self.llm.generate_response( prompt=self.SUMMARIZE_TEMPLATE.format(response)) diff --git a/tests/test_internlm2.py b/tests/test_internlm2.py new file mode 100644 index 00000000..2be761bb --- /dev/null +++ b/tests/test_internlm2.py @@ -0,0 +1,19 @@ +import pdb + +from transformers import AutoModelForCausalLM, AutoTokenizer + +model_path = '/internlm/ampere_7b_v1_7_0' + +tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) +model = AutoModelForCausalLM.from_pretrained(model_path, + trust_remote_code=True, + device_map='auto', + torch_dtype='auto').eval() + +# 不能像某些 LLM 一样 AutoModelForCausalLM.from_pretrained(.. fp16=True) 这样写,会 Internlm2Config.__init__() 报错 + +queries = ['how to install mmdeploy ?'] +for query in queries: + pdb.set_trace() + output_text, _ = model.chat(tokenizer, query, top_k=1, do_sample=False) + print(query, output_text)