diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/DemoGPT.iml b/.idea/DemoGPT.iml new file mode 100644 index 0000000..8a05c6e --- /dev/null +++ b/.idea/DemoGPT.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..65b1d5d --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,21 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..dc9ea49 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..24e72a4 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/README.md b/README.md index 0347a8d..9928b54 100644 --- a/README.md +++ b/README.md @@ -202,6 +202,16 @@ If you have cloned the repository and wish to run the source code version, you c streamlit run demogpt/app.py ``` +### Use Local LLM + +First, modify the configuration in model_config.py to ensure that the desired model is correctly configured for use. +To start the LLM server by running the following command: + +```sh +cd server +python llm_api.py +``` + ## To-Do 📝 - [x] Implement new DemoGPT pipeline including plan generation, task creation, code snippet generation, and final code assembly. - [x] Add feature to allow users to select models. diff --git a/demogpt/app.py b/demogpt/app.py index 0a23359..3dbc22e 100644 --- a/demogpt/app.py +++ b/demogpt/app.py @@ -13,6 +13,7 @@ from model import DemoGPT from utils import runStreamlit +from model_config import llm_model_dict # logging.basicConfig(level = logging.DEBUG,format='%(levelname)s-%(message)s') @@ -52,32 +53,38 @@ def initCode(): initCode() -# Text input - -openai_api_key = st.sidebar.text_input( - "OpenAI API Key", - placeholder="sk-...", - value=os.getenv("OPENAI_API_KEY", ""), - type="password", -) -openai_api_base = st.sidebar.text_input( - "Open AI base URL", - placeholder="https://api.openai.com/v1", +llm = st.sidebar.selectbox( + "LLM models", + llm_model_dict.keys(), ) -models = ( - "gpt-3.5-turbo-0613", - "gpt-3.5-turbo-0301", - "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-3.5-turbo-16k-0613", - "gpt-4", - "gpt-4-0314", - "gpt-4-0613", -) +# Text input -model_name = st.sidebar.selectbox("Model", models) +# openai_api_key = st.sidebar.text_input( +# "OpenAI API Key", +# placeholder="sk-...", +# value=os.getenv("OPENAI_API_KEY", ""), +# type="password", +# ) +# +# openai_api_base = st.sidebar.text_input( +# "Open AI base URL", +# placeholder="https://api.openai.com/v1", +# ) + +# models = ( +# "gpt-3.5-turbo-0613", +# "gpt-3.5-turbo-0301", +# "gpt-3.5-turbo", +# "gpt-3.5-turbo-16k", +# "gpt-3.5-turbo-16k-0613", +# "gpt-4", +# "gpt-4-0314", +# "gpt-4-0613", +# ) +# +# model_name = st.sidebar.selectbox("Model", models) empty_idea = st.empty() demo_idea = empty_idea.text_area( @@ -119,13 +126,14 @@ def kill(): if submitted: st.session_state.messages = [] - if not openai_api_key: - st.warning("Please enter your OpenAI API Key!", icon="⚠️") + if not llm: + st.warning("Please choose llm model!", icon="⚠️") else: bar = progressBar(0) st.session_state.container = st.container() - agent = DemoGPT(openai_api_key=openai_api_key, openai_api_base=openai_api_base) - agent.setModel(model_name) + agent = DemoGPT(llm_config=llm_model_dict[llm]) + # agent = DemoGPT(openai_api_key=openai_api_key, openai_api_base=openai_api_base) + # agent.setModel(model_name) kill() code_empty = st.empty() st.session_state.container = st.container() diff --git a/demogpt/model.py b/demogpt/model.py index 9281eac..45a7b52 100644 --- a/demogpt/model.py +++ b/demogpt/model.py @@ -12,18 +12,13 @@ class DemoGPT: def __init__( self, - openai_api_key=os.getenv("OPENAI_API_KEY", ""), - model_name="gpt-3.5-turbo-0613", + llm_config, max_steps=10, - openai_api_base="", ): - assert len( - openai_api_key.strip() - ), "Either give openai_api_key as an argument or put it in the environment variable" - self.model_name = model_name - self.openai_api_key = openai_api_key + self.model_name = llm_config["model_name"] + self.openai_api_base = llm_config["api_base_url"] + self.openai_api_key = llm_config["api_key"] self.max_steps = max_steps # max iteration for refining the model purpose - self.openai_api_base = openai_api_base Chains.setLlm( self.model_name, self.openai_api_key, openai_api_base=self.openai_api_base ) diff --git a/demogpt/model_config.py b/demogpt/model_config.py new file mode 100644 index 0000000..80d81b4 --- /dev/null +++ b/demogpt/model_config.py @@ -0,0 +1,62 @@ +import os +import logging +import torch +# 日志格式 +LOG_FORMAT = "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s" +logger = logging.getLogger() +logger.setLevel(logging.INFO) +logging.basicConfig(format=LOG_FORMAT) + +llm_model_dict = { + "chatglm-6b": { + "model_name": "chatglm-6b", + "local_model_path": "/opt/ChatGLM-6B/chatglm-6b/", + "api_base_url": "http://localhost:8888/v1", # "name"修改为fastchat服务中的"api_base_url" + "api_key": "EMPTY" + }, + + "chatglm2-6b": { + "model_name": "chatglm2-6b", + "local_model_path": "/opt/ChatGLM2-6B/chatglm2-6b/", + "api_base_url": "http://localhost:8888/v1", # "name"修改为fastchat服务中的"api_base_url" + "api_key": "EMPTY" + }, + + + "vicuna-13b-hf": { + "local_model_path": "vicuna-13b-hf", + "api_base_url": "http://localhost:8888/v1", # "name"修改为fastchat服务中的"api_base_url" + "api_key": "EMPTY" + }, + + "gpt-3.5-turbo": { + "local_model_path": "gpt-3.5-turbo", + "api_base_url": "https://api.openai.com/v1", + "api_key": os.environ.get("OPENAI_API_KEY") + }, + + "baichuan-7b": { + "model_name": "baichuan-7b", + "local_model_path": "/opt/baichuan-7B", + "api_base_url": "http://localhost:8888/v1", # "name"修改为fastchat服务中的"api_base_url" + "api_key": "EMPTY" + }, + "Baichuan-13b-Chat": { + "model_name": "baichuan-13b", + "local_model_path": "baichuan-inc/Baichuan-13b-Chat", + "api_base_url": "http://localhost:8888/v1", # "name"修改为fastchat服务中的"api_base_url" + "api_key": "EMPTY" + }, + +} + +# LLM 名称 +LLM_MODEL = "chatglm-6b" + +# LLM 运行设备 +LLM_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" + +# 日志存储路径 +LOG_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs") +if not os.path.exists(LOG_PATH): + os.mkdir(LOG_PATH) diff --git a/demogpt/server/__init__.py b/demogpt/server/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/demogpt/server/llm_api.py b/demogpt/server/llm_api.py new file mode 100644 index 0000000..f18a24c --- /dev/null +++ b/demogpt/server/llm_api.py @@ -0,0 +1,235 @@ +from multiprocessing import Process, Queue +import sys +import os + +sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) +from demogpt.model_config import llm_model_dict, LLM_MODEL, LLM_DEVICE, LOG_PATH, logger + + +host_ip = "0.0.0.0" +controller_port = 20001 +model_worker_port = 20002 +openai_api_port = 8888 +base_url = "http://127.0.0.1:{}" +queue = Queue() + + +def set_httpx_timeout(timeout=60.0): + import httpx + httpx._config.DEFAULT_TIMEOUT_CONFIG.connect = timeout + httpx._config.DEFAULT_TIMEOUT_CONFIG.read = timeout + httpx._config.DEFAULT_TIMEOUT_CONFIG.write = timeout + + +def create_controller_app( + dispatch_method="shortest_queue", +): + import fastchat.constants + fastchat.constants.LOGDIR = LOG_PATH + from fastchat.serve.controller import app, Controller + + controller = Controller(dispatch_method) + sys.modules["fastchat.serve.controller"].controller = controller + + return app + + +def create_model_worker_app( + model_path=llm_model_dict[LLM_MODEL].get("local_model_path"), + model_names=[LLM_MODEL], + device=LLM_DEVICE, + load_8bit=False, + gptq_ckpt=None, + gptq_wbits=16, + gptq_groupsize=-1, + gptq_act_order=None, + gpus="0,1", + num_gpus=2, + max_gpu_memory="15GiB", + cpu_offloading=None, + worker_address=base_url.format(model_worker_port), + controller_address=base_url.format(controller_port), + limit_worker_concurrency=5, + stream_interval=2, + no_register=False, +): + import fastchat.constants + fastchat.constants.LOGDIR = LOG_PATH + from fastchat.serve.model_worker import app, GptqConfig, ModelWorker, worker_id + from fastchat.serve import model_worker + import argparse + + parser = argparse.ArgumentParser() + args = parser.parse_args() + args.model_path = model_path + args.model_names = model_names + args.device = device + args.load_8bit = load_8bit + args.gptq_ckpt = gptq_ckpt + args.gptq_wbits = gptq_wbits + args.gptq_groupsize = gptq_groupsize + args.gptq_act_order = gptq_act_order + args.gpus = gpus + args.num_gpus = num_gpus + args.max_gpu_memory = max_gpu_memory + args.cpu_offloading = cpu_offloading + args.worker_address = worker_address + args.controller_address = controller_address + args.limit_worker_concurrency = limit_worker_concurrency + args.stream_interval = stream_interval + args.no_register = no_register + + if args.gpus: + if len(args.gpus.split(",")) < args.num_gpus: + raise ValueError( + f"Larger --num-gpus ({args.num_gpus}) than --gpus {args.gpus}!" + ) + os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus + + if gpus and num_gpus is None: + num_gpus = len(gpus.split(',')) + args.num_gpus = num_gpus + + gptq_config = GptqConfig( + ckpt=gptq_ckpt or model_path, + wbits=args.gptq_wbits, + groupsize=args.gptq_groupsize, + act_order=args.gptq_act_order, + ) + # torch.multiprocessing.set_start_method('spawn') + worker = ModelWorker( + controller_addr=args.controller_address, + worker_addr=args.worker_address, + worker_id=worker_id, + model_path=args.model_path, + model_names=args.model_names, + limit_worker_concurrency=args.limit_worker_concurrency, + no_register=args.no_register, + device=args.device, + num_gpus=args.num_gpus, + max_gpu_memory=args.max_gpu_memory, + load_8bit=args.load_8bit, + cpu_offloading=args.cpu_offloading, + gptq_config=gptq_config, + stream_interval=args.stream_interval, + ) + + sys.modules["fastchat.serve.model_worker"].worker = worker + sys.modules["fastchat.serve.model_worker"].args = args + sys.modules["fastchat.serve.model_worker"].gptq_config = gptq_config + + return app + + +def create_openai_api_app( + host=host_ip, + port=openai_api_port, + controller_address=base_url.format(controller_port), + api_keys=[], +): + import fastchat.constants + fastchat.constants.LOGDIR = LOG_PATH + from fastchat.serve.openai_api_server import app, CORSMiddleware, app_settings + + app.add_middleware( + CORSMiddleware, + allow_credentials=True, + allow_origins=["*"], + allow_methods=["*"], + allow_headers=["*"], + ) + + app_settings.controller_address = controller_address + app_settings.api_keys = api_keys + + return app + + +def run_controller(q): + import uvicorn + app = create_controller_app() + + @app.on_event("startup") + async def on_startup(): + set_httpx_timeout() + q.put(1) + + uvicorn.run(app, host=host_ip, port=controller_port) + + +def run_model_worker(q, *args, **kwargs): + import uvicorn + app = create_model_worker_app(*args, **kwargs) + + @app.on_event("startup") + async def on_startup(): + set_httpx_timeout() + while True: + no = q.get() + if no != 1: + q.put(no) + else: + break + q.put(2) + + uvicorn.run(app, host=host_ip, port=model_worker_port) + + +def run_openai_api(q): + import uvicorn + app = create_openai_api_app() + + @app.on_event("startup") + async def on_startup(): + set_httpx_timeout() + while True: + no = q.get() + if no != 2: + q.put(no) + else: + break + q.put(3) + + uvicorn.run(app, host=host_ip, port=openai_api_port) + + +if __name__ == "__main__": + logger.info(llm_model_dict[LLM_MODEL]) + model_path = llm_model_dict[LLM_MODEL]["local_model_path"] + + logger.info(f"如需查看 llm_api 日志,请前往 {LOG_PATH}") + + if not model_path: + logger.error("local_model_path 不能为空") + else: + controller_process = Process( + target=run_controller, + name=f"controller({os.getpid()})", + args=(queue,), + daemon=True, + ) + controller_process.start() + + # cuda 没办法用在fork的多进程中 + # model_worker_process = Process( + # target=run_model_worker, + # name=f"model_worker({os.getpid()})", + # args=(queue,), + # # kwargs={"load_8bit": True}, + # daemon=True, + # ) + # model_worker_process.start() + + openai_api_process = Process( + target=run_openai_api, + name=f"openai_api({os.getpid()})", + args=(queue,), + daemon=True, + ) + openai_api_process.start() + + run_model_worker(queue) + + controller_process.join() + # model_worker_process.join() + openai_api_process.join()