Skip to content

Commit

Permalink
update base_model deploy example (#2803)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jintao-Huang authored Dec 30, 2024
1 parent 132adc7 commit 0af291d
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ my_model/
/data
result/
images
/custom/

# Pytorch
*.pth
Expand Down
41 changes: 41 additions & 0 deletions examples/deploy/client/llm/base/openai_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os

from openai import OpenAI

os.environ['CUDA_VISIBLE_DEVICES'] = '0'


def infer(client, model: str, messages):
query = messages[0]['content']
print(f'query: {query}')
resp = client.completions.create(model=model, prompt=query, max_tokens=64, temperature=0)
response = resp.choices[0].text
print(f'response: {response}')
# or (The two calling methods are equivalent.)
resp = client.chat.completions.create(model=model, messages=messages, max_tokens=64, temperature=0)
response = resp.choices[0].message.content
print(f'response: {response}')
return response


def run_client(host: str = '127.0.0.1', port: int = 8000):
client = OpenAI(
api_key='EMPTY',
base_url=f'http://{host}:{port}/v1',
)
model = client.models.list().data[0].id
print(f'model: {model}')

messages = [{'role': 'user', 'content': '浙江 -> 杭州\n安徽 -> 合肥\n四川 ->'}]
infer(client, model, messages)


if __name__ == '__main__':
from swift.llm import run_deploy, DeployArguments
# NOTE: In a real deployment scenario, please comment out the context of run_deploy.
with run_deploy(
DeployArguments(
model='Qwen/Qwen2.5-1.5B', verbose=False, log_interval=-1, infer_backend='pt',
use_chat_template=False)) as port:
run_client(port=port)
33 changes: 33 additions & 0 deletions examples/deploy/client/llm/base/swift_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from typing import List

os.environ['CUDA_VISIBLE_DEVICES'] = '0'


def infer_batch(engine: 'InferEngine', infer_requests: List['InferRequest']):
request_config = RequestConfig(max_tokens=64, temperature=0)

resp_list = engine.infer(infer_requests, request_config)

query0 = infer_requests[0].messages[0]['content']
print(f'query0: {query0}')
print(f'response0: {resp_list[0].choices[0].message.content}')


def run_client(host: str = '127.0.0.1', port: int = 8000):
engine = InferClient(host=host, port=port)
print(f'models: {engine.models}')

infer_requests = [InferRequest(messages=[{'role': 'user', 'content': '浙江 -> 杭州\n安徽 -> 合肥\n四川 ->'}])]
infer_batch(engine, infer_requests)


if __name__ == '__main__':
from swift.llm import InferEngine, InferRequest, InferClient, RequestConfig, run_deploy, DeployArguments
# NOTE: In a real deployment scenario, please comment out the context of run_deploy.
with run_deploy(
DeployArguments(
model='Qwen/Qwen2.5-1.5B', verbose=False, log_interval=-1, infer_backend='pt',
use_chat_template=False)) as port:
run_client(port=port)
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


def infer(client, model: str, messages):
resp = client.chat.completions.create(model=model, messages=messages, temperature=0)
resp = client.chat.completions.create(model=model, messages=messages, max_tokens=512, temperature=0)
query = messages[0]['content']
response = resp.choices[0].message.content
print(f'query: {query}')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def run_client(host: str = '127.0.0.1', port: int = 8000):
from swift.llm import (InferEngine, InferRequest, InferClient, RequestConfig, load_dataset, run_deploy,
DeployArguments)
from swift.plugin import InferStats
# TODO: The current 'pt' deployment does not support automatic batch.
# NOTE: In a real deployment scenario, please comment out the context of run_deploy.
with run_deploy(
DeployArguments(model='Qwen/Qwen2.5-1.5B-Instruct', verbose=False, log_interval=-1,
Expand Down
2 changes: 1 addition & 1 deletion examples/deploy/client/mllm/openai_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


def infer(client, model: str, messages):
resp = client.chat.completions.create(model=model, messages=messages, temperature=0)
resp = client.chat.completions.create(model=model, messages=messages, max_tokens=512, temperature=0)
query = messages[0]['content']
response = resp.choices[0].message.content
print(f'query: {query}')
Expand Down
1 change: 0 additions & 1 deletion examples/deploy/client/mllm/swift_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ def run_client(host: str = '127.0.0.1', port: int = 8000):
from swift.llm import (InferEngine, InferRequest, InferClient, RequestConfig, load_dataset, run_deploy,
DeployArguments)
from swift.plugin import InferStats
# TODO: The current 'pt' deployment does not support automatic batch.
# NOTE: In a real deployment scenario, please comment out the context of run_deploy.
with run_deploy(
DeployArguments(model='Qwen/Qwen2-VL-2B-Instruct', verbose=False, log_interval=-1,
Expand Down

0 comments on commit 0af291d

Please sign in to comment.