Skip to content

Commit f67149b

Browse files
committed
修复对线程数为6的限制
1 parent 0d04621 commit f67149b

File tree

4 files changed

+27
-7
lines changed

4 files changed

+27
-7
lines changed

gpt_server/model_worker/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def get_worker(
147147
controller_addr: str = "http://localhost:21001",
148148
worker_id: str = str(uuid.uuid4())[:8],
149149
model_names: List[str] = [""],
150-
limit_worker_concurrency: int = 6,
150+
limit_worker_concurrency: int = 100,
151151
conv_template: str = None, # type: ignore
152152
):
153153
worker = cls(

gpt_server/model_worker/qwen.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,18 +63,18 @@ async def generate_stream_gate(self, params):
6363
add_generation_prompt=True,
6464
chat_template=self.other_config["chat_template"],
6565
)
66+
logger.info(text)
6667
input_ids = self.tokenizer([text], return_tensors="pt").input_ids
6768
elif model_type == "qwen2":
6869
logger.info("正在使用qwen-2.0 !")
6970
text = self.tokenizer.apply_chat_template(
7071
conversation=messages, tokenize=False, add_generation_prompt=True
7172
)
73+
logger.info(text)
7274
input_ids = self.tokenizer([text], return_tensors="pt").input_ids
73-
prompt = self.tokenizer.decode(input_ids.tolist()[0])
74-
logger.info(prompt)
7575
# ---------------添加额外的参数------------------------
7676
params["messages"] = messages
77-
params["prompt"] = prompt
77+
params["prompt"] = text
7878
params["stop"].extend(self.stop)
7979
params["stop_words_ids"] = self.stop_words_ids
8080
params["input_ids"] = input_ids

gpt_server/script/config.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ models:
2828
workers:
2929
- gpus:
3030
- 1
31+
# - gpus:
32+
# - 3
3133

3234
- llama3: #自定义的模型名称
3335
alias: null # 别名 例如 gpt4,gpt3
@@ -106,6 +108,16 @@ models:
106108
workers:
107109
- gpus:
108110
- 2
111+
- bge-base-zh:
112+
alias: null # 别名
113+
enable: true # false true
114+
model_name_or_path: /home/dev/model/Xorbits/bge-base-zh-v1___5/
115+
model_type: embedding
116+
work_mode: hf
117+
device: gpu # gpu / cpu
118+
workers:
119+
- gpus:
120+
- 2
109121

110122
- acge_text_embedding:
111123
alias: null # 别名

tests/test_throughput.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,18 @@ def send_request(results, i, prefill_times):
7272
if __name__ == "__main__":
7373
parser = argparse.ArgumentParser()
7474
parser.add_argument("--server-address", type=str, default="http://localhost:8082")
75+
7576
parser.add_argument("--model-name", type=str, default="qwen")
7677
parser.add_argument("--max-new-tokens", type=int, default=2048)
77-
parser.add_argument("--n-thread", type=int, default=8)
78+
parser.add_argument("--n-thread", type=int, default=20)
7879
parser.add_argument("--test-dispatch", action="store_true")
7980
args = parser.parse_args()
80-
81-
main(args)
81+
threads = []
82+
for i in range(1):
83+
t = threading.Thread(target=main, args=(args,))
84+
t.start()
85+
threads.append(t)
86+
time.sleep(1)
87+
for t in threads:
88+
t.join()
89+
# main(args)

0 commit comments

Comments
 (0)