Skip to content

Commit

Permalink
add usage in chat response
Browse files Browse the repository at this point in the history
  • Loading branch information
hrfng committed Mar 29, 2024
1 parent 409de33 commit 81a82c5
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 7 deletions.
17 changes: 13 additions & 4 deletions python/pybackend_libs/src/pybackend_libs/dataelem/model/llm/llm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# flake8: noqa
import time
from typing import Dict, List, Literal, Optional, Union

Expand All @@ -9,6 +10,7 @@
AutoTokenizer, LlamaTokenizer)
from transformers.generation.utils import GenerationConfig


def torch_gc(devices):
if torch.cuda.is_available():
for device_id in devices:
Expand Down Expand Up @@ -64,13 +66,13 @@ def _load(self,
trust_remote_code=True)
else:
self.tokenizer = LlamaTokenizer.from_pretrained(pretrain_path,
add_eos_token=False,
add_bos_token=False,
add_eos_token=False,
add_bos_token=False,
eos_token='<eod>',
use_fast=False,
trust_remote_code=True)
self.tokenizer.add_tokens(['<sep>', '<pad>', '<mask>', '<predict>',
'<FIM_SUFFIX>', '<FIM_PREFIX>',
self.tokenizer.add_tokens(['<sep>', '<pad>', '<mask>', '<predict>',
'<FIM_SUFFIX>', '<FIM_PREFIX>',
'<FIM_MIDDLE>','<commit_before>',
'<commit_msg>','<commit_after>',
'<jupyter_start>','<jupyter_text>',
Expand Down Expand Up @@ -160,12 +162,19 @@ class ChatCompletionResponseStreamChoice(BaseModel):
finish_reason: Optional[Literal['stop', 'length']]


class UsageInfo(BaseModel):
prompt_tokens: int = 0
total_tokens: int = 0
completion_tokens: Optional[int] = 0


class ChatCompletionResponse(BaseModel):
model: str
object: Literal['chat.completion', 'chat.completion.chunk']
choices: List[Union[ChatCompletionResponseChoice,
ChatCompletionResponseStreamChoice]]
created: Optional[int] = Field(default_factory=lambda: int(time.time()))
usage: UsageInfo = UsageInfo()


class CompletionRequest(BaseModel):
Expand Down
13 changes: 12 additions & 1 deletion scripts/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,18 @@ function post_install_r0064() {
# pip3 install lib/bisheng_pybackend_libs-0.0.3-py3-none-any.whl -i ${REPO}
}

post_install_r0064

function run_container_v0065() {
LOCAL_MODEL_REPO="/public/bisheng/model_repository/"
MAPING_MODEL_REPO="/opt/bisheng-rt/models/model_repository"
MOUNT="-v $LOCAL_MODEL_REPO:$MAPING_MODEL_REPO -v $HOME:$HOME -v /public:/public"
IMAGE="cr.dataelem.com/dataelement/bisheng-rt:0.0.6.5"
docker run --gpus=all --net=host -itd --shm-size=10G \
--name bisheng_rt_v0065 ${MOUNT} $IMAGE bash
}

run_container_v0065
# post_install_r0064
# run_container_v0065_test
# run_container_v006_cpu_test
# run_container_v006_test
Expand Down
4 changes: 2 additions & 2 deletions src/tests/regression/config/models.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"pymodel_type": "llm.vLLMQwen14bChat",
"pymodel_params": "{\"temperature\": 0.0, \"stop\": [\"<|im_end|>\", \"<|im_start|>\", \"<|endoftext|>\"], \"dtype\": \"bfloat16\", \"max_model_len\": 8192}",
"gpu_memory": "42",
"instance_groups": "device=gpu;gpus=0,1|2,3",
"instance_groups": "device=gpu;gpus=0,1",
"reload": "1",
"verbose": "0"
}
Expand Down Expand Up @@ -74,7 +74,7 @@
"type": "dataelem.pymodel.huggingface_model",
"pymodel_type": "embedding.JINAEmbedding",
"gpu_memory": "3",
"instance_groups": "device=gpu;gpus=1",
"instance_groups": "device=gpu;gpus=0|1|2",
"reload": "1"
}
}
Expand Down
59 changes: 59 additions & 0 deletions src/tests/regression/test_old_protocol.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import argparse

import requests


def infer(url, model_name):
infer_urls = [
f'http://{url}/v2.1/models/{model_name}/generate_stream',
f'http://{url}/v2.1/models/{model_name}/generate',
f'http://{url}/v2.1/models/{model_name}/infer',
f'http://{url}/v1/chat/completions'
]

prompt = '以`今晚夜色真美`写一个短文,包含悬疑元素'
payload = {
'model': model_name,
'messages': [
{'role': 'system', 'content': '你是来自数据项素的智能助手'},
{'role': 'user', 'content': prompt}
],
'temperature': 0.85,
'top_p': 0.8,
'stream': False
}
headers = {'Content-type': 'application/json'}
for infer_url in infer_urls:
resp = requests.post(url=infer_url, json=payload, headers=headers)
print('url: {}, resp: {}'.format(infer_url, resp.text))


def main(args):
url = args.url
model_name = args.model_name
assert model_name, 'empty model_name'
infer(url, model_name)


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'-u',
'--url',
type=str,
required=False,
default='127.0.0.1:9001',
help='model url.',
)

parser.add_argument(
'-m',
'--model-name',
type=str,
required=False,
default=None,
help='model name',
)

args = parser.parse_args()
main(args)

0 comments on commit 81a82c5

Please sign in to comment.