Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test case bugfix and add restful interface testcases. #1271

Merged
merged 7 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions .github/workflows/daily_ete_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ env:
jobs:
test_functions:
runs-on: [self-hosted, linux-a100]
timeout-minutes: 240
timeout-minutes: 300
env:
REPORT_DIR: /nvme/qa_test_models/test-reports
container:
Expand Down Expand Up @@ -157,9 +157,9 @@ jobs:
- name: Test lmdeploy - interface pipeline turbomind case
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'interface-pipeline'))
timeout-minutes: 20
timeout-minutes: 75
run: |
pytest autotest/interface/pipeline/test_pipeline_turbomind_func.py -m 'not pr_test' --alluredir=allure-results
pytest autotest/interface/pipeline -m 'not pr_test' --alluredir=allure-results
- name: Test lmdeploy - pipeline torch
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'pipeline'))
Expand Down Expand Up @@ -323,4 +323,8 @@ jobs:
- name: fail notify
if: contains(needs.*.result, 'failure')
run: |
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- daily test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.FEISHU_USER_ID }}'"}]]}}}}' ${{ secrets.FEISHU_WEBHOOK_URL }}
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- Daily test failed!!!","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.FEISHU_USER_ID }}'"}]]}}}}' ${{ secrets.FEISHU_WEBHOOK_URL }}
- name: success notify
if: needs.test_functions.result=='success' && needs.test_triton.result=='success'
run: |
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- Daily test success","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"}]]}}}}' ${{ secrets.FEISHU_WEBHOOK_URL }}
4 changes: 4 additions & 0 deletions .github/workflows/pr_ete_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ on:
- "setup.py"
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true


env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
Expand Down
576 changes: 576 additions & 0 deletions autotest/interface/restful/test_restful_interface_func_common.py

Large diffs are not rendered by default.

286 changes: 286 additions & 0 deletions autotest/interface/restful/test_restful_interface_func_pytorch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
import pytest
from utils.restful_return_check import (assert_chat_completions_batch_return,
assert_chat_completions_stream_return,
assert_chat_interactive_batch_return,
assert_chat_interactive_stream_return)

from lmdeploy.serve.openai.api_client import APIClient

BASE_HTTP_URL = 'http://localhost'
DEFAULT_PORT = 23333
MODEL = 'internlm/internlm2-chat-20b'
MODEL_NAME = 'internlm2-chat-20b'
BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)])


@pytest.mark.order(8)
@pytest.mark.pytorch
@pytest.mark.flaky(reruns=2)
class TestRestfulInterfaceChatCompletions:

def test_chat_completions_ignore_eos_batch(self):
api_client = APIClient(BASE_URL)
for output in api_client.chat_completions_v1(
model=MODEL_NAME,
messages='Hi, what is your name?',
ignore_eos=True,
max_tokens=100,
temperature=0.01):
continue
assert_chat_completions_batch_return(output, MODEL_NAME)
assert output.get('usage').get(
'completion_tokens') == 101 or output.get('usage').get(
'completion_tokens') == 100
assert output.get('choices')[0].get('finish_reason') == 'length'

def test_chat_completions_ignore_eos_stream(self):
api_client = APIClient(BASE_URL)
outputList = []
for output in api_client.chat_completions_v1(
model=MODEL_NAME,
messages='Hi, what is your name?',
ignore_eos=True,
stream=True,
max_tokens=100,
temperature=0.01):
outputList.append(output)

assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
False)
assert_chat_completions_stream_return(outputList[-1], MODEL_NAME,
False, True)
for index in range(1, len(outputList) - 1):
assert_chat_completions_stream_return(outputList[index],
MODEL_NAME)
assert outputList[-1].get('choices')[0].get(
'finish_reason') == 'length'
assert len(outputList) == 102

def test_chat_completions_max_tokens_batch(self):
api_client = APIClient(BASE_URL)
for output in api_client.chat_completions_v1(
model=MODEL_NAME,
messages='Hi, pls intro yourself',
max_tokens=5,
temperature=0.01):
continue
assert_chat_completions_batch_return(output, MODEL_NAME)
assert output.get('choices')[0].get('finish_reason') == 'length'
assert output.get('usage').get('completion_tokens') == 6 or output.get(
'usage').get('completion_tokens') == 5

def test_chat_completions_max_tokens_stream(self):
api_client = APIClient(BASE_URL)
outputList = []
for output in api_client.chat_completions_v1(
model=MODEL_NAME,
messages='Hi, pls intro yourself',
stream=True,
max_tokens=5,
temperature=0.01):
outputList.append(output)
assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
False)
assert_chat_completions_stream_return(outputList[-1], MODEL_NAME,
False, True)
for index in range(1, len(outputList) - 1):
assert_chat_completions_stream_return(outputList[index],
MODEL_NAME)
assert outputList[-1].get('choices')[0].get(
'finish_reason') == 'length'
assert len(outputList) == 7

def test_chat_completions_repetition_penalty_stream(self):
api_client = APIClient(BASE_URL)
outputList = []
response = ''
for output in api_client.chat_completions_v1(
model=MODEL_NAME,
messages='Hi, pls intro yourself',
stream=True,
repetition_penalty=0.1,
temperature=0.01,
max_tokens=200):
outputList.append(output)
assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
False)
assert_chat_completions_stream_return(outputList[-1], MODEL_NAME,
False, True)
for index in range(1, len(outputList) - 1):
assert_chat_completions_stream_return(outputList[index],
MODEL_NAME)
response += outputList[index].get('choices')[0].get('delta').get(
'content')
assert 'pls pls ' * 5 in response or \
'Hi, pls intro yourself\n' * 5 in response

def test_chat_completions_topp_min_batch(self):
api_client = APIClient(BASE_URL)
outputList = []
for i in range(3):
for output in api_client.chat_completions_v1(
model=MODEL_NAME,
messages='Shanghai is',
top_p=0.1,
temperature=0.01):
outputList.append(output)
assert_chat_completions_batch_return(output, MODEL_NAME)
print(output)
assert outputList[0].get('choices')[0].get('message').get(
'content') == outputList[1].get('choices')[0].get('message').get(
'content')
assert outputList[1].get('choices')[0].get('message').get(
'content') == outputList[2].get('choices')[0].get('message').get(
'content')

def test_chat_completions_topp_min_stream(self):
api_client = APIClient(BASE_URL)
responseList = []
for i in range(3):
outputList = []
response = ''
for output in api_client.chat_completions_v1(
model=MODEL_NAME,
messages='Hi, pls intro yourself',
stream=True,
top_p=0.1,
temperature=0.01):
outputList.append(output)
assert_chat_completions_stream_return(outputList[0], MODEL_NAME,
True, False)
assert_chat_completions_stream_return(outputList[-1], MODEL_NAME,
False, True)
for index in range(1, len(outputList) - 1):
assert_chat_completions_stream_return(outputList[index],
MODEL_NAME)
response += outputList[index].get('choices')[0].get(
'delta').get('content')
responseList.append(response)
assert responseList[0] == responseList[1]
assert responseList[1] == responseList[2]

def test_chat_completions_longinput_stream(self):
api_client = APIClient(BASE_URL)
outputList = []
for output in api_client.chat_completions_v1(
model=MODEL_NAME,
messages='Hi, pls intro yourself' * 10000,
stream=True,
temperature=0.01):
outputList.append(output)
assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
False)
assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
False)
for index in range(1, len(outputList) - 1):
assert_chat_completions_stream_return(outputList[index],
MODEL_NAME)
assert outputList[1].get('choices')[0].get('finish_reason') == 'length'
assert outputList[1].get('choices')[0].get('delta').get(
'content') == ''
assert len(outputList) == 2


@pytest.mark.order(8)
@pytest.mark.pytorch
@pytest.mark.flaky(reruns=2)
class TestRestfulInterfaceChatInteractive:

def test_chat_interactive_ignore_eos_batch(self):
api_client = APIClient(BASE_URL)
for output in api_client.chat_interactive_v1(
prompt='Hi, what is your name?',
ignore_eos=True,
request_output_len=100,
temperature=0.01):
continue
assert_chat_interactive_batch_return(output)
assert output.get('tokens') == 100
assert output.get('finish_reason') == 'length'

def test_chat_interactive_ignore_eos_stream(self):
api_client = APIClient(BASE_URL)
outputList = []
for output in api_client.chat_interactive_v1(
prompt='Hi, what is your name?',
ignore_eos=True,
stream=True,
request_output_len=100,
temperature=0.01):
outputList.append(output)
print(output)
assert_chat_interactive_stream_return(outputList[-1],
True,
index=len(outputList) - 2)
for index in range(0, len(outputList) - 1):
assert_chat_interactive_stream_return(outputList[index],
index=index)
assert output.get('finish_reason') == 'length'
assert len(outputList) == 101

def test_chat_interactive_max_tokens_batch(self):
api_client = APIClient(BASE_URL)
for output in api_client.chat_interactive_v1(
prompt='Hi, pls intro yourself',
request_output_len=5,
temperature=0.01):
continue
assert_chat_interactive_batch_return(output)
assert output.get('finish_reason') == 'length'
assert output.get('tokens') == 5

def test_chat_interactive_max_tokens_stream(self):
api_client = APIClient(BASE_URL)
outputList = []
for output in api_client.chat_interactive_v1(
prompt='Hi, pls intro yourself',
stream=True,
request_output_len=5,
temperature=0.01):
outputList.append(output)
assert_chat_interactive_stream_return(outputList[-1],
True,
index=len(outputList) - 2)
for index in range(0, len(outputList) - 1):
assert_chat_interactive_stream_return(outputList[index],
index=index)
assert output.get('finish_reason') == 'length'
assert len(outputList) == 6

def test_chat_interactive_topp_min_batch(self):
api_client = APIClient(BASE_URL)
outputList = []
for i in range(3):
for output in api_client.chat_interactive_v1(prompt='Shanghai is',
top_p=0.01,
temperature=0.01):
continue
assert_chat_interactive_batch_return(output)
outputList.append(output)
print(output)
assert outputList[0] == outputList[1]
assert outputList[1] == outputList[2]

def test_chat_interactive_topp_min_stream(self):
api_client = APIClient(BASE_URL)
responseList = []
for i in range(3):
outputList = []
response = ''
for output in api_client.chat_interactive_v1(
model=MODEL_NAME,
prompt='Hi, pls intro yourself',
stream=True,
top_p=0.01,
temperature=0.01):
outputList.append(output)
assert_chat_interactive_stream_return(outputList[-1],
True,
index=len(outputList) - 2)
for index in range(0, len(outputList) - 1):
assert_chat_interactive_stream_return(outputList[index],
index=index)
response += outputList[index].get('text')
responseList.append(response)
assert responseList[0] == responseList[1]
assert responseList[1] == responseList[2]
Loading
Loading