Skip to content

Commit 8db11c7

Browse files
committed
优化代码结构
1 parent 29d3456 commit 8db11c7

File tree

2 files changed

+76
-72
lines changed

2 files changed

+76
-72
lines changed

gpt_server/serving/main.py

Lines changed: 7 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
11
import yaml
22
import os
33
import sys
4-
from multiprocessing import Process
54
import signal
6-
import json
75
import ray
8-
import torch
96

107
os.environ["OPENBLAS_NUM_THREADS"] = (
118
"1" # 解决线程不足时,OpenBLAS blas_thread_init报错
@@ -21,9 +18,8 @@
2118
sys.path.append(root_dir)
2219
os.environ["LOGDIR"] = os.path.join(root_dir, "logs")
2320
from gpt_server.utils import (
24-
start_server,
2521
start_api_server,
26-
run_cmd,
22+
start_model_worker,
2723
stop_server,
2824
delete_log,
2925
)
@@ -38,74 +34,13 @@ def signal_handler(signum, frame):
3834

3935

4036
signal.signal(signal.SIGINT, signal_handler)
37+
4138
config_path = os.path.join(root_dir, "gpt_server/script/config.yaml")
4239
with open(config_path, "r") as f:
4340
config = yaml.safe_load(f)
4441
# print(config)
45-
# ----------------------------启动 Controller 和 Openai API 服务----------------------------------------------------
46-
start_api_server(config)
47-
# ----------------------------启动 Controller 和 Openai API 服务----------------------------------------------------
48-
process = []
49-
for model_config_ in config["models"]:
50-
for model_name, model_config in model_config_.items():
51-
# 启用的模型
52-
if model_config["enable"]:
53-
# pprint(model_config)
54-
print()
55-
# 模型地址
56-
model_name_or_path = model_config["model_name_or_path"]
57-
# 模型类型
58-
model_type = model_config["model_type"]
59-
lora = model_config.get("lora", None)
60-
61-
# model type 校验
62-
# py_path = f"{root_dir}/gpt_server/model_worker/{model_type}.py"
63-
py_path = f"-m gpt_server.model_worker.{model_type}"
64-
65-
model_names = model_name
66-
if model_config["alias"]:
67-
model_names = model_name + "," + model_config["alias"]
68-
if lora: # 如果使用lora,将lora的name添加到 model_names 中
69-
lora_names = list(lora.keys())
70-
model_names += "," + ",".join(lora_names)
71-
72-
# 获取 worker 数目 并获取每个 worker 的资源
73-
workers = model_config["workers"]
74-
75-
# process = []
76-
for worker in workers:
77-
gpus = worker["gpus"]
78-
# 将gpus int ---> str
79-
gpus = [str(i) for i in gpus]
80-
gpus_str = ",".join(gpus)
81-
num_gpus = len(gpus)
82-
run_mode = "python "
83-
CUDA_VISIBLE_DEVICES = ""
84-
if (
85-
torch.cuda.is_available()
86-
and model_config["device"].lower() == "gpu"
87-
):
88-
CUDA_VISIBLE_DEVICES = f"CUDA_VISIBLE_DEVICES={gpus_str} "
89-
elif model_config["device"].lower() == "cpu":
90-
CUDA_VISIBLE_DEVICES = ""
91-
else:
92-
raise Exception("目前仅支持 CPU/GPU设备!")
93-
backend = model_config["work_mode"]
94-
95-
cmd = (
96-
CUDA_VISIBLE_DEVICES
97-
+ run_mode
98-
+ py_path
99-
+ f" --num_gpus {num_gpus}"
100-
+ f" --model_name_or_path {model_name_or_path}"
101-
+ f" --model_names {model_names}"
102-
+ f" --backend {backend}"
103-
)
104-
if lora:
105-
cmd += f" --lora '{json.dumps(lora)}'"
106-
107-
p = Process(target=run_cmd, args=(cmd,))
108-
p.start()
109-
process.append(p)
110-
for p in process:
111-
p.join()
42+
if __name__ == "__main__":
43+
# ----------------------------启动 Controller 和 Openai API 服务----------------------------------------
44+
start_api_server(config=config)
45+
# ----------------------------启动 Model Worker 服务----------------------------------------------------
46+
start_model_worker(config=config)

gpt_server/utils.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import socket
22
from typing import List, Optional
33
import os
4+
import json
45
from multiprocessing import Process
56
import subprocess
67
from loguru import logger
8+
import torch
79

810
logger.add("logs/gpt_server.log", rotation="100 MB", level="INFO")
911

@@ -52,6 +54,73 @@ def start_api_server(config: dict):
5254
)
5355

5456

57+
def start_model_worker(config: dict):
58+
process = []
59+
for model_config_ in config["models"]:
60+
for model_name, model_config in model_config_.items():
61+
# 启用的模型
62+
if model_config["enable"]:
63+
# pprint(model_config)
64+
print()
65+
# 模型地址
66+
model_name_or_path = model_config["model_name_or_path"]
67+
# 模型类型
68+
model_type = model_config["model_type"]
69+
lora = model_config.get("lora", None)
70+
71+
# model type 校验
72+
# py_path = f"{root_dir}/gpt_server/model_worker/{model_type}.py"
73+
py_path = f"-m gpt_server.model_worker.{model_type}"
74+
75+
model_names = model_name
76+
if model_config["alias"]:
77+
model_names = model_name + "," + model_config["alias"]
78+
if lora: # 如果使用lora,将lora的name添加到 model_names 中
79+
lora_names = list(lora.keys())
80+
model_names += "," + ",".join(lora_names)
81+
82+
# 获取 worker 数目 并获取每个 worker 的资源
83+
workers = model_config["workers"]
84+
85+
# process = []
86+
for worker in workers:
87+
gpus = worker["gpus"]
88+
# 将gpus int ---> str
89+
gpus = [str(i) for i in gpus]
90+
gpus_str = ",".join(gpus)
91+
num_gpus = len(gpus)
92+
run_mode = "python "
93+
CUDA_VISIBLE_DEVICES = ""
94+
if (
95+
torch.cuda.is_available()
96+
and model_config["device"].lower() == "gpu"
97+
):
98+
CUDA_VISIBLE_DEVICES = f"CUDA_VISIBLE_DEVICES={gpus_str} "
99+
elif model_config["device"].lower() == "cpu":
100+
CUDA_VISIBLE_DEVICES = ""
101+
else:
102+
raise Exception("目前仅支持 CPU/GPU设备!")
103+
backend = model_config["work_mode"]
104+
105+
cmd = (
106+
CUDA_VISIBLE_DEVICES
107+
+ run_mode
108+
+ py_path
109+
+ f" --num_gpus {num_gpus}"
110+
+ f" --model_name_or_path {model_name_or_path}"
111+
+ f" --model_names {model_names}"
112+
+ f" --backend {backend}"
113+
)
114+
if lora:
115+
cmd += f" --lora '{json.dumps(lora)}'"
116+
117+
p = Process(target=run_cmd, args=(cmd,))
118+
p.start()
119+
process.append(p)
120+
for p in process:
121+
p.join()
122+
123+
55124
def start_server(
56125
host: str = "0.0.0.0",
57126
port: int = 8081,

0 commit comments

Comments
 (0)