From 74896cdd010a603c8c54bff729ae62977ec8f41c Mon Sep 17 00:00:00 2001 From: FangYin Cheng Date: Wed, 29 Nov 2023 11:31:10 +0800 Subject: [PATCH] fix(core): Fix api error when upload files (#866) --- pilot/conversation.py | 333 ------------------------- pilot/language/lang_content_mapping.py | 82 ------ pilot/language/translation_handler.py | 8 - pilot/log/__init__.py | 0 pilot/log/json_handler.py | 17 -- pilot/logs.py | 295 ---------------------- pilot/openapi/api_v1/api_v1.py | 5 +- pilot/openapi/api_view_model.py | 2 +- setup.py | 57 +++-- 9 files changed, 39 insertions(+), 760 deletions(-) delete mode 100644 pilot/conversation.py delete mode 100644 pilot/language/lang_content_mapping.py delete mode 100644 pilot/language/translation_handler.py delete mode 100644 pilot/log/__init__.py delete mode 100644 pilot/log/json_handler.py delete mode 100644 pilot/logs.py diff --git a/pilot/conversation.py b/pilot/conversation.py deleted file mode 100644 index 814943a49..000000000 --- a/pilot/conversation.py +++ /dev/null @@ -1,333 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding:utf-8 -*- - -import dataclasses -from enum import auto, Enum -from typing import List, Any -from pilot.language.translation_handler import get_lang_text - -from pilot.configs.config import Config - -CFG = Config() - -DB_SETTINGS = { - "user": CFG.LOCAL_DB_USER, - "password": CFG.LOCAL_DB_PASSWORD, - "host": CFG.LOCAL_DB_HOST, - "port": CFG.LOCAL_DB_PORT, -} - -ROLE_USER = "USER" -ROLE_ASSISTANT = "Assistant" - - -class SeparatorStyle(Enum): - SINGLE = auto() - TWO = auto() - THREE = auto() - FOUR = auto() - - -@dataclasses.dataclass -class OldConversation: - """This class keeps all conversation history.""" - - system: str - roles: List[str] - messages: List[List[str]] - offset: int - sep_style: SeparatorStyle = SeparatorStyle.SINGLE - sep: str = "###" - sep2: str = None - - # Used for gradio server - skip_next: bool = False - conv_id: Any = None - last_user_input: Any = None - - def get_prompt(self): - if self.sep_style == SeparatorStyle.SINGLE: - ret = self.system + self.sep - for role, message in self.messages: - if message: - ret += role + ": " + message + self.sep - else: - ret += role + ":" - return ret - - elif self.sep_style == SeparatorStyle.TWO: - seps = [self.sep, self.sep2] - ret = self.system + seps[0] - for i, (role, message) in enumerate(self.messages): - if message: - ret += role + ":" + message + seps[i % 2] - else: - ret += role + ":" - return ret - else: - raise ValueError(f"Invalid style: {self.sep_style}") - - def append_message(self, role, message): - self.messages.append([role, message]) - - def to_gradio_chatbot(self): - ret = [] - for i, (role, msg) in enumerate(self.messages[self.offset :]): - if i % 2 == 0: - ret.append([msg, None]) - else: - ret[-1][-1] = msg - - return ret - - def copy(self): - return OldConversation( - system=self.system, - roles=self.roles, - messages=[[x, y] for x, y in self.messages], - offset=self.offset, - sep_style=self.sep_style, - sep=self.sep, - sep2=self.sep2, - conv_id=self.conv_id, - ) - - def dict(self): - return { - "system": self.system, - "roles": self.roles, - "messages": self.messages, - "offset": self.offset, - "sep": self.sep, - "sep2": self.sep2, - "conv_id": self.conv_id, - } - - -conv_default = OldConversation( - system=None, - roles=("human", "ai"), - messages=[], - offset=0, - sep_style=SeparatorStyle.SINGLE, - sep="###", -) - -# -# conv_one_shot = Conversation( -# system="A chat between a curious user and an artificial intelligence assistant, who very familiar with database related knowledge. " -# "The assistant gives helpful, detailed, professional and polite answers to the user's questions. ", -# roles=("USER", "Assistant"), -# messages=( -# ( -# "USER", -# "What are the key differences between mysql and postgres?", -# ), -# ( -# "Assistant", -# "MySQL and PostgreSQL are both popular open-source relational database management systems (RDBMS) " -# "that have many similarities but also some differences. Here are some key differences: \n" -# "1. Data Types: PostgreSQL has a more extensive set of data types, " -# "including support for array, hstore, JSON, and XML, whereas MySQL has a more limited set.\n" -# "2. ACID compliance: Both MySQL and PostgreSQL support ACID compliance (Atomicity, Consistency, Isolation, Durability), " -# "but PostgreSQL is generally considered to be more strict in enforcing it.\n" -# "3. Replication: MySQL has a built-in replication feature, which allows you to replicate data across multiple servers," -# "whereas PostgreSQL has a similar feature, but it is not as mature as MySQL's.\n" -# "4. Performance: MySQL is generally considered to be faster and more efficient in handling large datasets, " -# "whereas PostgreSQL is known for its robustness and reliability.\n" -# "5. Licensing: MySQL is licensed under the GPL (General Public License), which means that it is free and open-source software, " -# "whereas PostgreSQL is licensed under the PostgreSQL License, which is also free and open-source but with different terms.\n" -# "Ultimately, the choice between MySQL and PostgreSQL depends on the specific needs and requirements of your application. " -# "Both are excellent database management systems, and choosing the right one " -# "for your project requires careful consideration of your application's requirements, performance needs, and scalability.", -# ), -# ), -# offset=2, -# sep_style=SeparatorStyle.SINGLE, -# sep="###", -# ) - - -conv_one_shot = OldConversation( - system="You are a DB-GPT. Please provide me with user input and all table information known in the database, so I can accurately query tables are involved in the user input. If there are multiple tables involved, I will separate them by comma. Here is an example:", - roles=("USER", "ASSISTANT"), - messages=( - ( - "USER", - "please query there are how many orders?" - "Querying the table involved in the user input?" - "database schema:" - "database name:db_test, database type:MYSQL, table infos:table name:carts,table description:购物车表;table name:categories,table description:商品分类表;table name:chat_groups,table description:群组表;table name:chat_users,table description:聊天用户表;table name:friends,table description:好友表;table name:messages,table description:消息表;table name:orders,table description:订单表;table name:products,table description:商品表;table name:table_test,table description:;table name:users,table description:用户表," - "You should only respond in JSON format as described below and ensure the response can be parsed by Python json.loads" - """Response Format: - { - "table": ["orders", "products"] - } - """, - ), - ( - "Assistant", - """ - { - "table": ["orders", "products"] - } - """, - ), - ), - offset=2, - sep_style=SeparatorStyle.SINGLE, - sep="###", -) - -conv_vicuna_v1 = OldConversation( - system="A chat between a curious user and an artificial intelligence assistant. who very familiar with database related knowledge. " - "The assistant gives helpful, detailed, professional and polite answers to the user's questions. ", - roles=("USER", "ASSISTANT"), - messages=(), - offset=0, - sep_style=SeparatorStyle.TWO, - sep=" ", - sep2="", -) - -auto_dbgpt_one_shot = OldConversation( - system="You are DB-GPT, an AI designed to answer questions about HackerNews by query `hackerbews` database in MySQL. " - "Your decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.", - roles=("USER", "ASSISTANT"), - messages=( - ( - "USER", - """ Answer how many users does app_users have by query ob database - Constraints: - 1. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember. - 2. No user assistance - 3. Exclusively use the commands listed in double quotes e.g. "command name" - - - Schema: - Database gpt-user Schema information as follows: users(city,create_time,email,last_login_time,phone,user_name); - - - Commands: - 1. analyze_code: Analyze Code, args: "code": "" - 2. execute_python_file: Execute Python File, args: "filename": "" - 3. append_to_file: Append to file, args: "filename": "", "text": "" - 4. delete_file: Delete file, args: "filename": "" - 5. list_files: List Files in Directory, args: "directory": "" - 6. read_file: Read file, args: "filename": "" - 7. write_to_file: Write to file, args: "filename": "", "text": "" - 8. db_sql_executor: "Execute SQL in Database.", args: "sql": "" - - You should only respond in JSON format as described below and ensure the response can be parsed by Python json.loads - Response Format: - { - "thoughts": { - "text": "thought", - "reasoning": "reasoning", - "plan": "- short bulleted\n- list that conveys\n- long-term plan", - "criticism": "constructive self-criticism", - "speak": "thoughts summary to say to user" - }, - "command": { - "name": "command name", - "args": { - "arg name": "value" - } - } - } - """, - ), - ( - "ASSISTANT", - """ - { - "thoughts": { - "text": "To answer how many users by query database we need to write SQL query to get the count of the distinct users from the database. We can use db_sql_executor command to execute the SQL query in database.", - "reasoning": "We can use the sql_executor command to execute the SQL query for getting count of distinct users from the users database. We can select the count of the distinct users from the users table.", - "plan": "- Write SQL query to get count of distinct users from users database\n- Use db_sql_executor to execute the SQL query in OB database\n- Parse the SQL result to get the count\n- Respond with the count as the answer", - "criticism": "None", - "speak": "To get the number of users in users, I will execute an SQL query in OB database using the db_sql_executor command and respond with the count." - }, - "command": { - "name": "db_sql_executor", - "args": { - "sql": "SELECT COUNT(DISTINCT(user_name)) FROM users ;" - } - } - } - """, - ), - ), - offset=0, - sep_style=SeparatorStyle.SINGLE, - sep="###", -) - -auto_dbgpt_without_shot = OldConversation( - system="You are DB-GPT, an AI designed to answer questions about users by query `users` database in MySQL. " - "Your decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.", - roles=("USER", "ASSISTANT"), - messages=(), - offset=0, - sep_style=SeparatorStyle.SINGLE, - sep=" ", - sep2="", -) - -conv_qa_prompt_template = """ 基于以下已知的信息, 专业、简要的回答用户的问题, - 如果无法从提供的内容中获取答案, 请说: "知识库中提供的内容不足以回答此问题" 禁止胡乱编造。 - 已知内容: - {context} - 问题: - {question} - -""" - -# conv_qa_prompt_template = """ Please provide the known information so that I can professionally and briefly answer the user's question. If the answer cannot be obtained from the provided content, -# please say: "The information provided in the knowledge base is insufficient to answer this question." Fabrication is prohibited.。 -# known information: -# {context} -# question: -# {question} -# """ -default_conversation = conv_default - - -chat_mode_title = { - "sql_generate_diagnostics": get_lang_text("sql_generate_diagnostics"), - "chat_use_plugin": get_lang_text("chat_use_plugin"), - "knowledge_qa": get_lang_text("knowledge_qa"), -} - -conversation_sql_mode = { - "auto_execute_ai_response": get_lang_text("sql_generate_mode_direct"), - "dont_execute_ai_response": get_lang_text("sql_generate_mode_none"), -} - -conversation_types = { - "native": get_lang_text("knowledge_qa_type_llm_native_dialogue"), - "default_knownledge": get_lang_text( - "knowledge_qa_type_default_knowledge_base_dialogue" - ), - "custome": get_lang_text("knowledge_qa_type_add_knowledge_base_dialogue"), - "url": get_lang_text("knowledge_qa_type_url_knowledge_dialogue"), -} - -conv_templates = { - "conv_one_shot": conv_one_shot, - "vicuna_v1": conv_vicuna_v1, - "auto_dbgpt_one_shot": auto_dbgpt_one_shot, -} - -conv_db_summary_templates = """ -Based on the following known database information?, answer which tables are involved in the user input. -Known database information:{db_profile_summary} -Input:{db_input} -You should only respond in JSON format as described below and ensure the response can be parsed by Python json.loads -The response format must be JSON, and the key of JSON must be "table". - -""" - -if __name__ == "__main__": - message = gen_sqlgen_conversation("dbgpt") - print(message) diff --git a/pilot/language/lang_content_mapping.py b/pilot/language/lang_content_mapping.py deleted file mode 100644 index 54bd63e35..000000000 --- a/pilot/language/lang_content_mapping.py +++ /dev/null @@ -1,82 +0,0 @@ -## 短期内在该文件中配置,长期考虑将会存储在默认的数据库中存储,并可以支持多种语言的配置 - -lang_dicts = { - "zh": { - "unique_id": "中文内容", - "db_gpt_introduction": "[DB-GPT](https://github.com/csunny/DB-GPT) 是一个开源的以数据库为基础的GPT实验项目,使用本地化的GPT大模型与您的数据和环境进行交互,无数据泄露风险,100% 私密,100% 安全。", - "learn_more_markdown": "该服务是仅供非商业用途的研究预览。受 Vicuna-13B 模型 [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) 的约束", - "model_control_param": "模型参数", - "sql_generate_mode_direct": "直接执行结果", - "sql_generate_mode_none": "DB问答", - "max_input_token_size": "最大输出Token数", - "please_choose_database": "请选择数据", - "sql_generate_diagnostics": "SQL生成与诊断", - "knowledge_qa_type_llm_native_dialogue": "LLM原生对话", - "knowledge_qa_type_default_knowledge_base_dialogue": "默认知识库对话", - "knowledge_qa_type_add_knowledge_base_dialogue": "新增知识库对话", - "knowledge_qa_type_url_knowledge_dialogue": "URL网页知识对话", - "create_knowledge_base": "新建知识库", - "sql_schema_info": "数据库{}的Schema信息如下: {}\n", - "current_dialogue_mode": "当前对话模式", - "database_smart_assistant": "数据库智能助手", - "sql_vs_setting": "自动执行模式下, DB-GPT可以具备执行SQL、从网络读取知识自动化存储学习的能力", - "knowledge_qa": "知识问答", - "chat_use_plugin": "插件模式", - "dialogue_use_plugin": "对话使用插件", - "select_plugin": "选择插件", - "configure_knowledge_base": "配置知识库", - "new_klg_name": "新知识库名称", - "url_input_label": "输入网页地址", - "add_as_new_klg": "添加为新知识库", - "add_file_to_klg": "向知识库中添加文件", - "upload_file": "上传文件", - "add_file": "添加文件", - "upload_and_load_to_klg": "上传并加载到知识库", - "upload_folder": "上传文件夹", - "add_folder": "添加文件夹", - "send": "发送", - "regenerate": "重新生成", - "clear_box": "清理", - }, - "en": { - "unique_id": "English Content", - "db_gpt_introduction": "[DB-GPT](https://github.com/csunny/DB-GPT) is an experimental open-source project that uses localized GPT large models to interact with your data and environment. With this solution, you can be assured that there is no risk of data leakage, and your data is 100% private and secure.", - "learn_more_markdown": "The service is a research preview intended for non-commercial use only. subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of Vicuna-13B", - "model_control_param": "Model Parameters", - "sql_generate_mode_direct": "Execute directly", - "sql_generate_mode_none": "DB chat", - "max_input_token_size": "Maximum output token size", - "please_choose_database": "Please choose database", - "sql_generate_diagnostics": "SQL Generation & Diagnostics", - "knowledge_qa_type_llm_native_dialogue": "LLM native dialogue", - "knowledge_qa_type_default_knowledge_base_dialogue": "Default documents", - "knowledge_qa_type_add_knowledge_base_dialogue": "New documents", - "knowledge_qa_type_url_knowledge_dialogue": "Chat with url", - "dialogue_use_plugin": "Dialogue Extension", - "create_knowledge_base": "Create Knowledge Base", - "sql_schema_info": "the schema information of database {}: {}\n", - "current_dialogue_mode": "Current dialogue mode", - "database_smart_assistant": "Database smart assistant", - "sql_vs_setting": "In the automatic execution mode, DB-GPT can have the ability to execute SQL, read data from the network, automatically store and learn", - "chat_use_plugin": "Plugin Mode", - "select_plugin": "Select Plugin", - "knowledge_qa": "Documents Chat", - "configure_knowledge_base": "Configure Documents", - "url_input_label": "Please input url", - "new_klg_name": "New document name", - "add_as_new_klg": "Add as new documents", - "add_file_to_klg": "Add file to documents", - "upload_file": "Upload file", - "add_file": "Add file", - "upload_and_load_to_klg": "Upload and load to documents", - "upload_folder": "Upload folder", - "add_folder": "Add folder", - "send": "Send", - "regenerate": "Regenerate", - "clear_box": "Clear", - }, -} - - -def get_lang_content(key, language="zh"): - return lang_dicts.get(language, {}).get(key, "") diff --git a/pilot/language/translation_handler.py b/pilot/language/translation_handler.py deleted file mode 100644 index 0a46d09ab..000000000 --- a/pilot/language/translation_handler.py +++ /dev/null @@ -1,8 +0,0 @@ -from pilot.configs.config import Config -from pilot.language.lang_content_mapping import get_lang_content - -CFG = Config() - - -def get_lang_text(key): - return get_lang_content(key, CFG.LANGUAGE) diff --git a/pilot/log/__init__.py b/pilot/log/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/pilot/log/json_handler.py b/pilot/log/json_handler.py deleted file mode 100644 index e45ee9f13..000000000 --- a/pilot/log/json_handler.py +++ /dev/null @@ -1,17 +0,0 @@ -import json -import logging - - -class JsonFileHandler(logging.FileHandler): - def __init__(self, filename, mode="a", encoding=None, delay=False): - super().__init__(filename, mode, encoding, delay) - - def emit(self, record): - json_data = json.loads(self.format(record)) - with open(self.baseFilename, "w", encoding="utf-8") as f: - json.dump(json_data, f, ensure_ascii=False, indent=4) - - -class JsonFormatter(logging.Formatter): - def format(self, record): - return record.msg diff --git a/pilot/logs.py b/pilot/logs.py deleted file mode 100644 index 32528bdaa..000000000 --- a/pilot/logs.py +++ /dev/null @@ -1,295 +0,0 @@ -import logging -import os -import random -import re -import time -from logging import LogRecord -from typing import Any - -from colorama import Fore, Style - -from pilot.singleton import Singleton - - -class Logger(metaclass=Singleton): - """ - Logger that handle titles in different colors. - Outputs logs in console, activity.log, and errors.log - For console handler: simulates typing - """ - - def __init__(self): - # create log directory if it doesn't exist - this_files_dir_path = os.path.dirname(__file__) - log_dir = os.path.join(this_files_dir_path, "../logs") - if not os.path.exists(log_dir): - os.makedirs(log_dir) - - log_file = "activity.log" - error_file = "error.log" - - console_formatter = DbGptFormatter("%(title_color)s %(message)s") - - # Create a handler for console which simulate typing - self.typing_console_handler = TypingConsoleHandler() - self.typing_console_handler.setLevel(logging.INFO) - self.typing_console_handler.setFormatter(console_formatter) - - # Create a handler for console without typing simulation - self.console_handler = ConsoleHandler() - self.console_handler.setLevel(logging.DEBUG) - self.console_handler.setFormatter(console_formatter) - - # Info handler in activity.log - self.file_handler = logging.FileHandler( - os.path.join(log_dir, log_file), "a", "utf-8" - ) - self.file_handler.setLevel(logging.DEBUG) - info_formatter = DbGptFormatter( - "%(asctime)s %(levelname)s %(title)s %(message_no_color)s" - ) - self.file_handler.setFormatter(info_formatter) - - # Error handler error.log - error_handler = logging.FileHandler( - os.path.join(log_dir, error_file), "a", "utf-8" - ) - error_handler.setLevel(logging.ERROR) - error_formatter = DbGptFormatter( - "%(asctime)s %(levelname)s %(module)s:%(funcName)s:%(lineno)d %(title)s" - " %(message_no_color)s" - ) - error_handler.setFormatter(error_formatter) - - self.typing_logger = logging.getLogger("TYPER") - self.typing_logger.addHandler(self.typing_console_handler) - self.typing_logger.addHandler(self.file_handler) - self.typing_logger.addHandler(error_handler) - self.typing_logger.setLevel(logging.DEBUG) - - self.logger = logging.getLogger("LOGGER") - self.logger.addHandler(self.console_handler) - self.logger.addHandler(self.file_handler) - self.logger.addHandler(error_handler) - self.logger.setLevel(logging.DEBUG) - - self.json_logger = logging.getLogger("JSON_LOGGER") - self.json_logger.addHandler(self.file_handler) - self.json_logger.addHandler(error_handler) - self.json_logger.setLevel(logging.DEBUG) - - self.speak_mode = False - self.chat_plugins = [] - - def typewriter_log( - self, title="", title_color="", content="", speak_text=False, level=logging.INFO - ): - from pilot.speech.say import say_text - - if speak_text and self.speak_mode: - say_text(f"{title}. {content}") - - for plugin in self.chat_plugins: - plugin.report(f"{title}. {content}") - - if content: - if isinstance(content, list): - content = " ".join(content) - else: - content = "" - - self.typing_logger.log( - level, content, extra={"title": title, "color": title_color} - ) - - def debug( - self, - message, - title="", - title_color="", - ): - self._log(title, title_color, message, logging.DEBUG) - - def info( - self, - message, - title="", - title_color="", - ): - self._log(title, title_color, message, logging.INFO) - - def warn( - self, - message, - title="", - title_color="", - ): - self._log(title, title_color, message, logging.WARN) - - def error(self, title, message=""): - self._log(title, Fore.RED, message, logging.ERROR) - - def _log( - self, - title: str = "", - title_color: str = "", - message: str = "", - level=logging.INFO, - ): - if message: - if isinstance(message, list): - message = " ".join(message) - self.logger.log( - level, message, extra={"title": str(title), "color": str(title_color)} - ) - - def set_level(self, level): - self.logger.setLevel(level) - self.typing_logger.setLevel(level) - - def double_check(self, additionalText=None): - if not additionalText: - additionalText = ( - "Please ensure you've setup and configured everything" - " correctly. Read https://github.com/Torantulino/Auto-GPT#readme to " - "double check. You can also create a github issue or join the discord" - " and ask there!" - ) - - self.typewriter_log("DOUBLE CHECK CONFIGURATION", Fore.YELLOW, additionalText) - - def log_json(self, data: Any, file_name: str) -> None: - from pilot.log.json_handler import JsonFileHandler, JsonFormatter - - # Define log directory - this_files_dir_path = os.path.dirname(__file__) - log_dir = os.path.join(this_files_dir_path, "../logs") - - # Create a handler for JSON files - json_file_path = os.path.join(log_dir, file_name) - json_data_handler = JsonFileHandler(json_file_path) - json_data_handler.setFormatter(JsonFormatter()) - - # Log the JSON data using the custom file handler - self.json_logger.addHandler(json_data_handler) - self.json_logger.debug(data) - self.json_logger.removeHandler(json_data_handler) - - def get_log_directory(self): - this_files_dir_path = os.path.dirname(__file__) - log_dir = os.path.join(this_files_dir_path, "../logs") - return os.path.abspath(log_dir) - - -""" -Output stream to console using simulated typing -""" - - -class TypingConsoleHandler(logging.StreamHandler): - def emit(self, record): - min_typing_speed = 0.05 - max_typing_speed = 0.01 - - msg = self.format(record) - try: - words = msg.split() - for i, word in enumerate(words): - print(word, end="", flush=True) - if i < len(words) - 1: - print(" ", end="", flush=True) - typing_speed = random.uniform(min_typing_speed, max_typing_speed) - time.sleep(typing_speed) - # type faster after each word - min_typing_speed = min_typing_speed * 0.95 - max_typing_speed = max_typing_speed * 0.95 - print() - except Exception: - self.handleError(record) - - -class ConsoleHandler(logging.StreamHandler): - def emit(self, record) -> None: - msg = self.format(record) - try: - print(msg) - except Exception: - self.handleError(record) - - -class DbGptFormatter(logging.Formatter): - """ - Allows to handle custom placeholders 'title_color' and 'message_no_color'. - To use this formatter, make sure to pass 'color', 'title' as log extras. - """ - - def format(self, record: LogRecord) -> str: - if hasattr(record, "color"): - record.title_color = ( - getattr(record, "color") - + getattr(record, "title", "") - + " " - + Style.RESET_ALL - ) - else: - record.title_color = getattr(record, "title", "") - - # Add this line to set 'title' to an empty string if it doesn't exist - record.title = getattr(record, "title", "") - - if hasattr(record, "msg"): - record.message_no_color = remove_color_codes(getattr(record, "msg")) - else: - record.message_no_color = "" - return super().format(record) - - -def remove_color_codes(s: str) -> str: - ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") - return ansi_escape.sub("", s) - - -# Remove current logger -# logger: Logger = Logger() - - -def print_assistant_thoughts( - ai_name: object, - assistant_reply_json_valid: object, - speak_mode: bool = False, -) -> None: - from pilot.speech.say import say_text - - assistant_thoughts_reasoning = None - assistant_thoughts_plan = None - assistant_thoughts_speak = None - assistant_thoughts_criticism = None - - assistant_thoughts = assistant_reply_json_valid.get("thoughts", {}) - assistant_thoughts_text = assistant_thoughts.get("text") - if assistant_thoughts: - assistant_thoughts_reasoning = assistant_thoughts.get("reasoning") - assistant_thoughts_plan = assistant_thoughts.get("plan") - assistant_thoughts_criticism = assistant_thoughts.get("criticism") - assistant_thoughts_speak = assistant_thoughts.get("speak") - logger.typewriter_log( - f"{ai_name.upper()} THOUGHTS:", Fore.YELLOW, f"{assistant_thoughts_text}" - ) - logger.typewriter_log("REASONING:", Fore.YELLOW, f"{assistant_thoughts_reasoning}") - if assistant_thoughts_plan: - logger.typewriter_log("PLAN:", Fore.YELLOW, "") - # If it's a list, join it into a string - if isinstance(assistant_thoughts_plan, list): - assistant_thoughts_plan = "\n".join(assistant_thoughts_plan) - elif isinstance(assistant_thoughts_plan, dict): - assistant_thoughts_plan = str(assistant_thoughts_plan) - - # Split the input_string using the newline character and dashes - lines = assistant_thoughts_plan.split("\n") - for line in lines: - line = line.lstrip("- ") - logger.typewriter_log("- ", Fore.GREEN, line.strip()) - logger.typewriter_log("CRITICISM:", Fore.YELLOW, f"{assistant_thoughts_criticism}") - # Speak the assistant's thoughts - if speak_mode and assistant_thoughts_speak: - say_text(assistant_thoughts_speak) diff --git a/pilot/openapi/api_v1/api_v1.py b/pilot/openapi/api_v1/api_v1.py index 262ff6f6c..0f567481c 100644 --- a/pilot/openapi/api_v1/api_v1.py +++ b/pilot/openapi/api_v1/api_v1.py @@ -75,7 +75,10 @@ def __get_conv_user_message(conversations: dict): def __new_conversation(chat_mode, user_name: str, sys_code: str) -> ConversationVo: unique_id = uuid.uuid1() return ConversationVo( - conv_uid=str(unique_id), chat_mode=chat_mode, sys_code=sys_code + conv_uid=str(unique_id), + chat_mode=chat_mode, + user_name=user_name, + sys_code=sys_code, ) diff --git a/pilot/openapi/api_view_model.py b/pilot/openapi/api_view_model.py index 5212076e8..46c4edb3a 100644 --- a/pilot/openapi/api_view_model.py +++ b/pilot/openapi/api_view_model.py @@ -46,7 +46,7 @@ class ConversationVo(BaseModel): """ user """ - user_name: str = "" + user_name: str = None """ the scene of chat """ diff --git a/setup.py b/setup.py index d1386c79e..548879ae8 100644 --- a/setup.py +++ b/setup.py @@ -22,6 +22,7 @@ BUILD_FROM_SOURCE_URL_FAST_CHAT = os.getenv( "BUILD_FROM_SOURCE_URL_FAST_CHAT", "git+https://github.com/lm-sys/FastChat.git" ) +BUILD_VERSION_OPENAI = os.getenv("BUILD_VERSION_OPENAI") def parse_requirements(file_name: str) -> List[str]: @@ -391,6 +392,10 @@ def core_requires(): # for cache, TODO pympler has not been updated for a long time and needs to find a new toolkit. "pympler", "aiofiles", + # for cache + "msgpack", + # for agent + "GitPython", ] if BUILD_FROM_SOURCE: setup_spec.extras["framework"].append( @@ -413,6 +418,7 @@ def knowledge_requires(): "python-docx", "pypdf", "python-multipart", + "sentence-transformers", ] @@ -479,26 +485,27 @@ def quantization_requires(): # For chatglm2-6b-int4 pkgs += ["cpm_kernels"] - # Since transformers 4.35.0, the GPT-Q/AWQ model can be loaded using AutoModelForCausalLM. - # autoawq requirements: - # 1. Compute Capability 7.5 (sm75). Turing and later architectures are supported. - # 2. CUDA Toolkit 11.8 and later. - autoawq_url = _build_wheels( - "autoawq", - "0.1.7", - base_url_func=lambda v, x, y: f"https://github.com/casper-hansen/AutoAWQ/releases/download/v{v}", - supported_cuda_versions=["11.8"], - ) - if autoawq_url: - print(f"Install autoawq from {autoawq_url}") - pkgs.append(f"autoawq @ {autoawq_url}") - else: - pkgs.append("autoawq") + if os_type != OSType.DARWIN: + # Since transformers 4.35.0, the GPT-Q/AWQ model can be loaded using AutoModelForCausalLM. + # autoawq requirements: + # 1. Compute Capability 7.5 (sm75). Turing and later architectures are supported. + # 2. CUDA Toolkit 11.8 and later. + autoawq_url = _build_wheels( + "autoawq", + "0.1.7", + base_url_func=lambda v, x, y: f"https://github.com/casper-hansen/AutoAWQ/releases/download/v{v}", + supported_cuda_versions=["11.8"], + ) + if autoawq_url: + print(f"Install autoawq from {autoawq_url}") + pkgs.append(f"autoawq @ {autoawq_url}") + else: + pkgs.append("autoawq") - auto_gptq_pkg = _build_autoawq_requires() - if auto_gptq_pkg: - pkgs.append(auto_gptq_pkg) - pkgs.append("optimum") + auto_gptq_pkg = _build_autoawq_requires() + if auto_gptq_pkg: + pkgs.append(auto_gptq_pkg) + pkgs.append("optimum") setup_spec.extras["quantization"] = pkgs @@ -526,7 +533,13 @@ def openai_requires(): """ pip install "db-gpt[openai]" """ - setup_spec.extras["openai"] = ["openai", "tiktoken"] + setup_spec.extras["openai"] = ["tiktoken"] + if BUILD_VERSION_OPENAI: + # Read openai sdk version from env + setup_spec.extras["openai"].append(f"openai=={BUILD_VERSION_OPENAI}") + else: + setup_spec.extras["openai"].append("openai") + setup_spec.extras["openai"] += setup_spec.extras["framework"] setup_spec.extras["openai"] += setup_spec.extras["knowledge"] @@ -549,7 +562,7 @@ def cache_requires(): """ pip install "db-gpt[cache]" """ - setup_spec.extras["cache"] = ["rocksdict", "msgpack"] + setup_spec.extras["cache"] = ["rocksdict"] def default_requires(): @@ -560,12 +573,10 @@ def default_requires(): # "tokenizers==0.13.3", "tokenizers>=0.14", "accelerate>=0.20.3", - "sentence-transformers", "protobuf==3.20.3", "zhipuai", "dashscope", "chardet", - "GitPython", ] setup_spec.extras["default"] += setup_spec.extras["framework"] setup_spec.extras["default"] += setup_spec.extras["knowledge"]