Skip to content

Commit 654ef94

Browse files
committed
update config.yaml
1 parent 30fabfe commit 654ef94

File tree

3 files changed

+24
-4
lines changed

3 files changed

+24
-4
lines changed

gpt_server/model_worker/base/model_worker_base.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,10 @@ def run(cls):
176176
"--model_names", type=lambda s: s.split(","), default="model_names"
177177
)
178178
parser.add_argument("--lora", type=str, default=None)
179+
parser.add_argument("--host", type=str, default="localhost")
180+
parser.add_argument(
181+
"--controller_address", type=str, default="http://localhost:21001"
182+
)
179183

180184
args = parser.parse_args()
181185
os.environ["num_gpus"] = str(args.num_gpus)
@@ -189,8 +193,9 @@ def run(cls):
189193
os.environ["backend"] = "lmdeploy-turbomind"
190194
if args.lora:
191195
os.environ["lora"] = args.lora
196+
host = args.host
197+
controller_address = args.controller_address
192198

193-
host = "localhost"
194199
port = get_free_tcp_port()
195200
worker_addr = f"http://{host}:{port}"
196201

@@ -203,6 +208,7 @@ async def startup():
203208
model_path=args.model_name_or_path,
204209
model_names=args.model_names,
205210
conv_template="chatglm3", # TODO 默认是chatglm3用于统一处理
211+
controller_addr=controller_address,
206212
)
207213

208214
uvicorn.run(app, host=host, port=port)

gpt_server/script/config.yaml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,15 @@ serve_args:
88

99
# controller
1010
controller_args:
11-
host: localhost
11+
host: 0.0.0.0
1212
port: 21001
1313
dispatch_method: shortest_queue # lottery shortest_queue
1414

1515
# model worker
16+
model_worker_args:
17+
host: 0.0.0.0
18+
controller_address: http://localhost:21001
19+
1620
models:
1721
- glm-4v: #自定义的模型名称
1822
alias: null # 别名 例如 gpt4,gpt3
@@ -52,7 +56,7 @@ models:
5256

5357
- qwen: #自定义的模型名称
5458
alias: gpt-4,gpt-3.5-turbo,gpt-3.5-turbo-16k # 别名 例如 gpt4,gpt3
55-
enable: true # false true
59+
enable: false # false true
5660
model_name_or_path: /home/dev/model/qwen/Qwen1___5-14B-Chat/
5761
model_type: qwen # qwen chatglm3 yi internlm
5862
work_mode: vllm # vllm hf lmdeploy-turbomind lmdeploy-pytorch
@@ -66,7 +70,7 @@ models:
6670
# - gpus:
6771
# - 3
6872
- qwen-72b: #自定义的模型名称
69-
alias: null # 别名 例如 gpt4,gpt3
73+
alias: qwen # 别名 例如 gpt4,gpt3
7074
enable: true # false true
7175
model_name_or_path: /home/dev/model/qwen/Qwen2-72B-Instruct-AWQ/
7276
model_type: qwen # qwen chatglm3 yi internlm

gpt_server/utils.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,14 @@ def start_api_server(config: dict):
5656

5757
def start_model_worker(config: dict):
5858
process = []
59+
try:
60+
host = config["model_worker_args"]["host"]
61+
controller_address = config["model_worker_args"]["controller_address"]
62+
except KeyError as e:
63+
error_msg = f"请参照 https://github.com/shell-nlp/gpt_server/blob/main/gpt_server/script/config.yaml 设置正确的 model_worker_args"
64+
logger.error(error_msg)
65+
raise KeyError(error_msg)
66+
5967
for model_config_ in config["models"]:
6068
for model_name, model_config in model_config_.items():
6169
# 启用的模型
@@ -110,6 +118,8 @@ def start_model_worker(config: dict):
110118
+ f" --model_name_or_path {model_name_or_path}"
111119
+ f" --model_names {model_names}"
112120
+ f" --backend {backend}"
121+
+ f" --host {host}"
122+
+ f" --controller_address {controller_address}"
113123
)
114124
if lora:
115125
cmd += f" --lora '{json.dumps(lora)}'"

0 commit comments

Comments
 (0)