From 6b21d507023ad03810cc2d0ffff63c2eec628740 Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Mon, 11 Mar 2024 10:31:17 +0800 Subject: [PATCH 1/7] bugfix and add more testcases --- .github/workflows/pr_ete_test.yml | 3 + .../test_restful_interface_turbomind.py | 807 ++++++++++++++++++ .../pipeline/test_pipeline_chat_pytorch.py | 3 +- 3 files changed, 812 insertions(+), 1 deletion(-) create mode 100644 autotest/interface/restful/test_restful_interface_turbomind.py diff --git a/.github/workflows/pr_ete_test.yml b/.github/workflows/pr_ete_test.yml index 08bf24b4b7..94f2ef719f 100644 --- a/.github/workflows/pr_ete_test.yml +++ b/.github/workflows/pr_ete_test.yml @@ -15,6 +15,9 @@ on: - "setup.py" workflow_dispatch: +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true env: HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache diff --git a/autotest/interface/restful/test_restful_interface_turbomind.py b/autotest/interface/restful/test_restful_interface_turbomind.py new file mode 100644 index 0000000000..dea7b34c79 --- /dev/null +++ b/autotest/interface/restful/test_restful_interface_turbomind.py @@ -0,0 +1,807 @@ +import random +from concurrent.futures import ThreadPoolExecutor +from random import randint + +import pytest +from tqdm import tqdm + +from lmdeploy.serve.openai.api_client import APIClient, get_model_list + +BASE_HTTP_URL = 'http://10.140.0.187' +DEFAULT_PORT = 23333 +MODEL = 'internlm/internlm2-chat-20b' +MODEL_NAME = 'internlm2-chat-20b' +BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)]) + + +@pytest.mark.order(7) +@pytest.mark.restful_interface_turbomind +@pytest.mark.flaky(reruns=2) +class TestRestfulInterfaceBase: + + def test_issue1232(self): + + def process_one(question): + api_client = APIClient(BASE_URL) + model_name = api_client.available_models[0] + + msg = [dict(role='user', content=question)] + + data = api_client.chat_interactive_v1(msg, + session_id=randint(1, 100), + repetition_penalty=1.02, + request_output_len=224) + for item in data: + pass + + data = api_client.chat_completions_v1(model=model_name, + messages=msg, + repetition_penalty=1.02, + stop=['<|im_end|>', '100'], + max_tokens=10) + + for item in data: + response = item + + return response + + with ThreadPoolExecutor(max_workers=256) as executor: + for response in tqdm(executor.map(process_one, ['你是谁'] * 500)): + continue + + def test_get_model(self): + api_client = APIClient(BASE_URL) + model_name = api_client.available_models[0] + assert model_name == MODEL_NAME, api_client.available_models + + model_list = get_model_list(BASE_URL + '/v1/models') + assert MODEL_NAME in model_list, model_list + + def test_encode(self): + api_client = APIClient(BASE_URL) + input_ids1, length1 = api_client.encode('Hi, pls intro yourself') + input_ids2, length2 = api_client.encode('Hi, pls intro yourself', + add_bos=False) + input_ids3, length3 = api_client.encode('Hi, pls intro yourself', + do_preprocess=True) + input_ids4, length4 = api_client.encode('Hi, pls intro yourself', + do_preprocess=True, + add_bos=False) + input_ids5, length5 = api_client.encode('Hi, pls intro yourself' * 100, + add_bos=False) + + assert len(input_ids1) == length1 and length1 > 0 + assert len(input_ids2) == length2 and length2 > 0 + assert len(input_ids3) == length3 and length3 > 0 + assert len(input_ids4) == length4 and length4 > 0 + assert len(input_ids5) == length5 and length5 > 0 + assert length1 == length2 + 1 + assert input_ids2 == input_ids1[1:] + assert input_ids1[0] == 1 and input_ids3[0] == 1 + assert length5 == length2 * 100 + assert input_ids5 == input_ids2 * 100 + + +@pytest.mark.order(7) +@pytest.mark.restful_interface_turbomind +@pytest.mark.flaky(reruns=2) +class TestRestfulInterfaceChatCompletions: + + def test_chat_completions_check_return_batch1(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself', + temperature=0.01): + continue + assert_chat_completions_batch_return(output) + + def test_chat_completions_check_return_batch2(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages=[{ + 'role': 'user', + 'content': 'Hi, pls intro yourself' + }], + temperature=0.01): + continue + assert_chat_completions_batch_return(output) + + def test_chat_completions_check_return_stream1(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself', + stream=True, + temperature=0.01): + outputList.append(output) + + assert_chat_completions_stream_return(outputList[0], True, False) + assert_chat_completions_stream_return(outputList[-1], False, True) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index]) + + def test_chat_completions_check_return_stream2(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages=[{ + 'role': 'user', + 'content': 'Hi, pls intro yourself' + }], + stream=True, + temperature=0.01): + outputList.append(output) + + assert_chat_completions_stream_return(outputList[0], True, False) + assert_chat_completions_stream_return(outputList[-1], False, True) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index]) + + def test_chat_completions_ignore_eos_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, what is your name?', + ignore_eos=True, + max_tokens=100, + temperature=0.01): + continue + assert_chat_completions_batch_return(output) + assert output.get('usage').get('completion_tokens') == 101 + assert output.get('choices')[0].get('finish_reason') == 'length' + + def test_chat_completions_ignore_eos_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, what is your name?', + ignore_eos=True, + stream=True, + max_tokens=100, + temperature=0.01): + outputList.append(output) + + assert_chat_completions_stream_return(outputList[0], True, False) + assert_chat_completions_stream_return(outputList[-1], False, True) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index]) + assert outputList[-1].get('choices')[0].get( + 'finish_reason') == 'length' + assert len(outputList) == 103 + + def test_chat_completions_stopwords_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_completions_v1(model=MODEL_NAME, + messages='Shanghai is', + stop=' is', + temperature=0.01): + continue + assert_chat_completions_batch_return(output) + assert ' is' not in output.get('choices')[0].get('message').get( + 'content') + assert output.get('choices')[0].get('finish_reason') == 'stop' + + for output in api_client.chat_completions_v1(model=MODEL_NAME, + messages='Shanghai is', + stop=[' is', '上海', ' to'], + temperature=0.01): + continue + assert_chat_completions_batch_return(output) + assert ' is' not in output.get('choices')[0].get('message').get( + 'content') + assert ' 上海' not in output.get('choices')[0].get('message').get( + 'content') + assert ' to' not in output.get('choices')[0].get('message').get( + 'content') + assert output.get('choices')[0].get('finish_reason') == 'stop' + + def test_chat_completions_stopwords_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_completions_v1(model=MODEL_NAME, + messages='Shanghai is', + stop=' is', + stream=True, + temperature=0.01): + outputList.append(output) + + assert_chat_completions_stream_return(outputList[0], True, False) + assert_chat_completions_stream_return(outputList[-1], False, True) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index]) + assert ' to' not in outputList[index].get('choices')[0].get( + 'delta').get('content') + assert outputList[-1].get('choices')[0].get('finish_reason') == 'stop' + + outputList = [] + for output in api_client.chat_completions_v1(model=MODEL_NAME, + messages='Shanghai is', + stop=[' is', '上海', ' to'], + stream=True, + temperature=0.01): + outputList.append(output) + + assert_chat_completions_stream_return(outputList[0], True, False) + assert_chat_completions_stream_return(outputList[-1], False, True) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index]) + assert ' is' not in outputList[index].get('choices')[0].get( + 'delta').get('content') + assert '上海' not in outputList[index].get('choices')[0].get( + 'delta').get('content') + assert ' to' not in outputList[index].get('choices')[0].get( + 'delta').get('content') + assert outputList[-1].get('choices')[0].get('finish_reason') == 'stop' + + def test_chat_completions_special_words_batch(self): + message = '<|im_start|>system\n当开启工具以及代码时,根据需求选择合适的工具进行调用\n' + \ + '<|im_end|><|im_start|>system name=<|interpreter|>\n你现在已经' + \ + '能够在一个有状态的 Jupyter 笔记本环境中运行 Python 代码。当你向 python ' + \ + '发送含有 Python >代码的消息时,它将在该环境中执行。这个工具适用于多种场景,' + \ + '如数据分析或处理(包括数据操作、统计分析、图表绘制),复杂的计算问题(解决数学和物理' + \ + '难题),编程示例(理解编程概念或特性),文本处理和分析(比如文本解析和自然语言处理),机器学习和数据科学(用于' + \ + '展示模型训练和数据可视化),以及文件操作和数据导入(处理CSV、JSON等格式的文件)。<|im_end|>\n' + \ + '<|im_start|>user\n设 $L$ 为圆周$x^2+y^2=2x$,计算曲线积分:$I=\\int_L' + \ + '{x\\mathrm{d}s}=$<|im_end|>\n<|im_start|>assistant' + api_client = APIClient(BASE_URL) + for output in api_client.chat_completions_v1(model=MODEL_NAME, + messages=message, + skip_special_tokens=False, + temperature=0.01): + continue + assert_chat_completions_batch_return(output) + assert '<|action_start|><|interpreter|>' in output.get( + 'choices')[0].get('message').get('content') + + for output in api_client.chat_completions_v1(model=MODEL_NAME, + messages=message, + skip_special_tokens=True, + temperature=0.01): + continue + assert_chat_completions_batch_return(output) + assert '<|action_start|><|interpreter|>' not in output.get( + 'choices')[0].get('message').get('content') + + def test_chat_completions_max_tokens_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself', + max_tokens=5, + temperature=0.01): + continue + assert_chat_completions_batch_return(output) + assert output.get('choices')[0].get('finish_reason') == 'length' + assert output.get('usage').get('completion_tokens') == 6 + + def test_chat_completions_max_tokens_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself', + stream=True, + max_tokens=5, + temperature=0.01): + outputList.append(output) + assert_chat_completions_stream_return(outputList[0], True, False) + assert_chat_completions_stream_return(outputList[-1], False, True) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index]) + assert outputList[-1].get('choices')[0].get( + 'finish_reason') == 'length' + assert len(outputList) == 8 + + def test_chat_completions_repetition_penalty_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_completions_v1(model=MODEL_NAME, + messages='Shanghai is', + repetition_penalty=0.1, + temperature=0.01, + max_tokens=200): + continue + assert_chat_completions_batch_return(output) + assert ' is is' * 5 in output.get('choices')[0].get('message').get( + 'content') + + def test_chat_completions_repetition_penalty_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + response = '' + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself', + stream=True, + repetition_penalty=0.1, + temperature=0.01, + max_tokens=200): + outputList.append(output) + assert_chat_completions_stream_return(outputList[0], True, False) + assert_chat_completions_stream_return(outputList[-1], False, True) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index]) + response += outputList[index].get('choices')[0].get('delta').get( + 'content') + assert 'pls pls ' * 5 in response, response + + def test_chat_completions_topp_min_batch(self): + api_client = APIClient(BASE_URL) + outputList = [] + for i in range(3): + for output in api_client.chat_completions_v1( + model=MODEL_NAME, messages='Shanghai is', top_p=0.1): + outputList.append(output) + assert_chat_completions_batch_return(output) + assert outputList[0].get('choices')[0].get('message').get( + 'content') == outputList[1].get('choices')[0].get('message').get( + 'content') + assert outputList[1].get('choices')[0].get('message').get( + 'content') == outputList[2].get('choices')[0].get('message').get( + 'content') + + def test_chat_completions_topp_min_stream(self): + api_client = APIClient(BASE_URL) + responseList = [] + for i in range(3): + outputList = [] + response = '' + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself', + stream=True, + top_p=0.1): + outputList.append(output) + assert_chat_completions_stream_return(outputList[0], True, False) + assert_chat_completions_stream_return(outputList[-1], False, True) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index]) + response += outputList[index].get('choices')[0].get( + 'delta').get('content') + responseList.append(response) + assert responseList[0] == responseList[1] + assert responseList[1] == responseList[2] + + def test_chat_completions_mis_model_name_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_completions_v1( + model='error', messages='Hi, pls intro yourself', + temperature=0.01): + continue + assert output.get('code') == 404 + assert output.get('message') == 'The model `error` does not exist.' + assert output.get('object') == 'error' + + def test_chat_completions_mis_model_name_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_completions_v1( + model='error', + messages='Hi, pls intro yourself', + stream=True, + max_tokens=5, + temperature=0.01): + outputList.append(output) + assert output.get('code') == 404 + assert output.get('message') == 'The model `error` does not exist.' + assert output.get('object') == 'error' + assert len(outputList) == 1 + + def test_chat_completions_longinput_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself' * 10000, + temperature=0.01): + continue + assert output.get('choices')[0].get('finish_reason') == 'length' + assert output.get('choices')[0].get('message').get('content') == '' + + def test_chat_completions_longinput_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself' * 10000, + stream=True, + temperature=0.01): + outputList.append(output) + assert_chat_completions_stream_return(outputList[0], True, False) + assert outputList[1].get('choices')[0].get('finish_reason') == 'length' + assert outputList[1].get('choices')[0].get('delta').get( + 'content') == '' + assert len(outputList) == 2 + + +@pytest.mark.order(7) +@pytest.mark.restful_interface_turbomind +@pytest.mark.flaky(reruns=2) +class TestRestfulInterfaceChatInteractive: + + def test_chat_interactive_check_return_batch1(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_interactive_v1( + prompt='Hi, pls intro yourself', temperature=0.01): + continue + assert_chat_interactive_batch_return(output) + + def test_chat_interactive_check_return_batch2(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_interactive_v1(prompt=[{ + 'role': + 'user', + 'content': + 'Hi, pls intro yourself' + }], + temperature=0.01): + continue + assert_chat_interactive_batch_return(output) + + def test_chat_interactive_check_return_stream1(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_interactive_v1( + prompt='Hi, pls intro yourself', stream=True, + temperature=0.01): + outputList.append(output) + assert_chat_interactive_stream_return(outputList[-1], + True, + index=len(outputList) - 2) + assert_chat_interactive_stream_return(outputList[-2], + False, + True, + index=len(outputList) - 2) + for index in range(0, len(outputList) - 2): + assert_chat_interactive_stream_return(outputList[index], + index=index) + + def test_chat_interactive_check_return_stream2(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_interactive_v1(prompt=[{ + 'role': + 'user', + 'content': + 'Hi, pls intro yourself' + }], + stream=True, + temperature=0.01): + outputList.append(output) + + assert_chat_interactive_stream_return(outputList[-1], + True, + index=len(outputList) - 2) + assert_chat_interactive_stream_return(outputList[-2], + False, + True, + index=len(outputList) - 2) + for index in range(0, len(outputList) - 2): + assert_chat_interactive_stream_return(outputList[index], + index=index) + + def test_chat_interactive_ignore_eos_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_interactive_v1( + prompt='Hi, what is your name?', + ignore_eos=True, + request_output_len=100, + temperature=0.01): + continue + assert_chat_interactive_batch_return(output) + assert output.get('tokens') == 101 + assert output.get('finish_reason') == 'length' + + def test_chat_interactive_ignore_eos_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_interactive_v1( + prompt='Hi, what is your name?', + ignore_eos=True, + stream=True, + request_output_len=100, + temperature=0.01): + outputList.append(output) + assert_chat_interactive_stream_return(outputList[-1], + True, + index=len(outputList) - 2) + for index in range(0, len(outputList) - 1): + assert_chat_interactive_stream_return(outputList[index], + index=index) + assert output.get('finish_reason') == 'length' + assert len(outputList) == 102 + + def test_chat_interactive_stopwords_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_interactive_v1(prompt='Shanghai is', + stop=' is', + temperature=0.01): + continue + assert_chat_interactive_batch_return(output) + assert ' is' not in output.get('text') + assert output.get('finish_reason') == 'stop' + + for output in api_client.chat_interactive_v1(prompt='Shanghai is', + stop=[' is', '上海', ' to'], + temperature=0.01): + continue + assert_chat_interactive_batch_return(output) + assert ' is' not in output.get('text') + assert ' 上海' not in output.get('text') + assert ' to' not in output.get('text') + assert output.get('finish_reason') == 'stop' + + def test_chat_interactive_stopwords_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_interactive_v1(prompt='Shanghai is', + stop=' is', + stream=True, + temperature=0.01): + outputList.append(output) + + assert_chat_interactive_stream_return(outputList[-1], + True, + index=len(outputList) - 2) + assert_chat_interactive_stream_return(outputList[-2], + False, + True, + index=len(outputList) - 2) + for index in range(0, len(outputList) - 2): + assert_chat_interactive_stream_return(outputList[index], + index=index) + assert ' to' not in outputList[index].get('text') + assert output.get('finish_reason') == 'stop' + + outputList = [] + for output in api_client.chat_interactive_v1(prompt='Shanghai is', + stop=[' is', '上海', ' to'], + stream=True, + temperature=0.01): + outputList.append(output) + + assert_chat_interactive_stream_return(outputList[-1], + True, + index=len(outputList) - 2) + assert_chat_interactive_stream_return(outputList[-2], + False, + True, + index=len(outputList) - 2) + for index in range(0, len(outputList) - 2): + assert_chat_interactive_stream_return(outputList[index], + index=index) + assert ' is' not in outputList[index].get('text') + assert '上海' not in outputList[index].get('text') + assert ' to' not in outputList[index].get('text') + assert output.get('finish_reason') == 'stop' + + def test_chat_interactive_special_words_batch(self): + message = '<|im_start|>system\n当开启工具以及代码时,根据需求选择合适的工具进行调用\n' + \ + '<|im_end|><|im_start|>system name=<|interpreter|>\n你现在已经' + \ + '能够在一个有状态的 Jupyter 笔记本环境中运行 Python 代码。当你向 python ' + \ + '发送含有 Python >代码的消息时,它将在该环境中执行。这个工具适用于多种场景,' + \ + '如数据分析或处理(包括数据操作、统计分析、图表绘制),复杂的计算问题(解决数学和物理' + \ + '难题),编程示例(理解编程概念或特性),文本处理和分析(比如文本解析和自然语言处理),机器学习和数据科学(用于' + \ + '展示模型训练和数据可视化),以及文件操作和数据导入(处理CSV、JSON等格式的文件)。<|im_end|>\n' + \ + '<|im_start|>user\n设 $L$ 为圆周$x^2+y^2=2x$,计算曲线积分:$I=\\int_L' + \ + '{x\\mathrm{d}s}=$<|im_end|>\n<|im_start|>assistant' + api_client = APIClient(BASE_URL) + for output in api_client.chat_interactive_v1(prompt=message, + skip_special_tokens=False, + temperature=0.01): + continue + assert_chat_interactive_batch_return(output) + assert '<|action_start|><|interpreter|>' in output.get('text') + + for output in api_client.chat_interactive_v1(prompt=message, + skip_special_tokens=True, + temperature=0.01): + continue + assert_chat_interactive_batch_return(output) + assert '<|action_start|><|interpreter|>' not in output.get('text') + + def test_chat_interactive_max_tokens_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_interactive_v1( + prompt='Hi, pls intro yourself', + request_output_len=5, + temperature=0.01): + continue + assert_chat_interactive_batch_return(output) + assert output.get('finish_reason') == 'length' + assert output.get('tokens') == 6 + + def test_chat_interactive_max_tokens_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_interactive_v1( + prompt='Hi, pls intro yourself', + stream=True, + request_output_len=5, + temperature=0.01): + outputList.append(output) + assert_chat_interactive_stream_return(outputList[-1], + True, + index=len(outputList) - 2) + for index in range(0, len(outputList) - 1): + assert_chat_interactive_stream_return(outputList[index], + index=index) + assert output.get('finish_reason') == 'length' + assert len(outputList) == 7 + + def test_chat_interactive_repetition_penalty_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_interactive_v1(prompt='Shanghai is', + repetition_penalty=0.1, + temperature=0.01, + request_output_len=512): + continue + assert_chat_interactive_batch_return(output) + assert 'a 上海 is a 上海, ' * 5 in output.get('text') + + def test_chat_interactive_with_history_batch(self): + api_client = APIClient(BASE_URL) + history = 0 + session_id = random.randint(0, 100000) + for i in range(3): + for output in api_client.chat_interactive_v1( + prompt='Shanghai is', + temperature=0.01, + interactive_mode=True, + session_id=session_id): + continue + assert_chat_interactive_batch_return(output) + assert output.get('history_tokens') == history + history += output.get('input_tokens') + output.get('tokens') + + def test_chat_interactive_with_history_stream(self): + api_client = APIClient(BASE_URL) + history = 0 + session_id = random.randint(0, 100000) + for i in range(3): + outputList = [] + for output in api_client.chat_interactive_v1( + prompt='Hi, pls intro yourself', + stream=True, + temperature=0.01, + interactive_mode=True, + session_id=session_id): + outputList.append(output) + assert_chat_interactive_stream_return(outputList[-1], + True, + index=len(outputList) - 2) + for index in range(0, len(outputList) - 1): + assert_chat_interactive_stream_return(outputList[index], + index=index) + assert outputList[-1].get('history_tokens') == history + history += outputList[-1].get('input_tokens') + outputList[-1].get( + 'tokens') + + def test_chat_interactive_topp_min_batch(self): + api_client = APIClient(BASE_URL) + outputList = [] + for i in range(3): + for output in api_client.chat_interactive_v1(prompt='Shanghai is', + top_p=0.01): + continue + assert_chat_interactive_batch_return(output) + outputList.append(output) + assert outputList[0] == outputList[1] + assert outputList[1] == outputList[2] + + def test_chat_interactive_topp_min_stream(self): + api_client = APIClient(BASE_URL) + responseList = [] + for i in range(3): + outputList = [] + response = '' + for output in api_client.chat_interactive_v1( + model=MODEL_NAME, + prompt='Hi, pls intro yourself', + stream=True, + top_p=0.01): + outputList.append(output) + assert_chat_interactive_stream_return(outputList[-1], + True, + index=len(outputList) - 2) + for index in range(0, len(outputList) - 1): + assert_chat_interactive_stream_return(outputList[index], + index=index) + response += outputList[index].get('text') + responseList.append(response) + assert responseList[0] == responseList[1] + assert responseList[1] == responseList[2] + + @pytest.mark.tmp + def test_chat_interactive_longinput_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_interactive_v1( + prompt='Hi, pls intro yourself' * 10000, temperature=0.01): + continue + assert output.get('finish_reason') == 'length' + assert output.get('text') == '' + + @pytest.mark.tmp + def test_chat_interactive_longinput_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_interactive_v1( + prompt='Hi, pls intro yourself' * 10000, + stream=True, + temperature=0.01): + outputList.append(output) + assert outputList[0].get('finish_reason') == 'length', outputList + assert outputList[0].get('text') == '' + assert len(outputList) == 1 + + +def assert_chat_completions_batch_return(output): + assert output.get('usage').get('prompt_tokens') > 0 + assert output.get('usage').get('total_tokens') > 0 + assert output.get('usage').get('completion_tokens') > 0 + assert output.get('usage').get('completion_tokens') + output.get( + 'usage').get('prompt_tokens') == output.get('usage').get( + 'total_tokens') + assert output.get('id') is not None + assert output.get('object') == 'chat.completion' + assert output.get('model') == MODEL_NAME + output_message = output.get('choices') + assert len(output_message) == 1 + for message in output_message: + assert message.get('finish_reason') in ['stop', 'length'] + assert message.get('index') == 0 + assert len(message.get('message').get('content')) > 0 + assert message.get('message').get('role') == 'assistant' + + +def assert_chat_completions_stream_return(output, + is_first: bool = False, + is_last: bool = False): + assert output.get('id') is not None + if is_first is False: + assert output.get('object') == 'chat.completion.chunk' + assert output.get('model') == MODEL_NAME + output_message = output.get('choices') + assert len(output_message) == 1 + for message in output_message: + assert message.get('delta').get('role') == 'assistant' + assert message.get('index') == 0 + if is_last is False: + assert message.get('finish_reason') is None + if is_first is False and is_last is False: + assert len(message.get('delta').get('content')) >= 0 + if is_last is True: + assert len(message.get('delta').get('content')) == 0 + assert message.get('finish_reason') in ['stop', 'length'] + + +def assert_chat_interactive_batch_return(output): + assert output.get('input_tokens') > 0 + assert output.get('tokens') > 0 + assert output.get('history_tokens') >= 0 + assert output.get('finish_reason') in ['stop', 'length'] + assert len(output.get('text')) > 0 + + +def assert_chat_interactive_stream_return(output, + is_last: bool = False, + is_text_empty: bool = False, + index: int = None): + assert output.get('input_tokens') > 0 + if index is not None: + assert output.get('tokens') >= index + 1 and output.get( + 'tokens') <= index + 6 + assert output.get('tokens') > 0 + assert output.get('history_tokens') >= 0 + if is_last: + assert len(output.get('text')) >= 0 + assert output.get('finish_reason') in ['stop', 'length'] + elif is_text_empty: + assert len(output.get('text')) == 0 + assert output.get('finish_reason') is None + else: + assert len(output.get('text')) >= 0 + assert output.get('finish_reason') is None diff --git a/autotest/tools/pipeline/test_pipeline_chat_pytorch.py b/autotest/tools/pipeline/test_pipeline_chat_pytorch.py index 7e0318eebd..eea30502bb 100644 --- a/autotest/tools/pipeline/test_pipeline_chat_pytorch.py +++ b/autotest/tools/pipeline/test_pipeline_chat_pytorch.py @@ -22,7 +22,8 @@ def getModelList(tp_num): @pytest.mark.parametrize('model', getModelList(tp_num=1)) def test_pipeline_chat_pytorch_tp1(config, common_case_config, model, worker_id): - os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id) + if 'gw' in worker_id: + os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id) p = Process(target=run_pipeline_chat_test, args=(config, common_case_config, model, 'pytorch')) p.start() From 62d7925a43aff4d604ffd1c68b1ed4f9c3064860 Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Mon, 11 Mar 2024 10:38:32 +0800 Subject: [PATCH 2/7] rename --- .github/workflows/pr_ete_test.yml | 1 + ...ful_interface_turbomind.py => test_restful_interface_func.py} | 0 2 files changed, 1 insertion(+) rename autotest/interface/restful/{test_restful_interface_turbomind.py => test_restful_interface_func.py} (100%) diff --git a/.github/workflows/pr_ete_test.yml b/.github/workflows/pr_ete_test.yml index 94f2ef719f..72daea081b 100644 --- a/.github/workflows/pr_ete_test.yml +++ b/.github/workflows/pr_ete_test.yml @@ -19,6 +19,7 @@ concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true + env: HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai diff --git a/autotest/interface/restful/test_restful_interface_turbomind.py b/autotest/interface/restful/test_restful_interface_func.py similarity index 100% rename from autotest/interface/restful/test_restful_interface_turbomind.py rename to autotest/interface/restful/test_restful_interface_func.py From 646962429ba2cf22c34a4dcc691021e85253e76c Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Mon, 11 Mar 2024 13:56:08 +0800 Subject: [PATCH 3/7] fix --- .github/workflows/daily_ete_test.yml | 6 +- ... => test_restful_interface_func_common.py} | 327 +++--------------- .../test_restful_interface_func_pytorch.py | 286 +++++++++++++++ .../test_restful_interface_func_turbomind.py | 269 ++++++++++++++ autotest/utils/content_detect_utils.py | 94 +++++ autotest/utils/restful_return_check.py | 68 ++++ 6 files changed, 767 insertions(+), 283 deletions(-) rename autotest/interface/restful/{test_restful_interface_func.py => test_restful_interface_func_common.py} (67%) create mode 100644 autotest/interface/restful/test_restful_interface_func_pytorch.py create mode 100644 autotest/interface/restful/test_restful_interface_func_turbomind.py create mode 100644 autotest/utils/content_detect_utils.py create mode 100644 autotest/utils/restful_return_check.py diff --git a/.github/workflows/daily_ete_test.yml b/.github/workflows/daily_ete_test.yml index f2279536ad..5916957079 100644 --- a/.github/workflows/daily_ete_test.yml +++ b/.github/workflows/daily_ete_test.yml @@ -34,7 +34,7 @@ env: jobs: test_functions: runs-on: [self-hosted, linux-a100] - timeout-minutes: 240 + timeout-minutes: 300 env: REPORT_DIR: /nvme/qa_test_models/test-reports container: @@ -157,9 +157,9 @@ jobs: - name: Test lmdeploy - interface pipeline turbomind case continue-on-error: true if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'interface-pipeline')) - timeout-minutes: 20 + timeout-minutes: 75 run: | - pytest autotest/interface/pipeline/test_pipeline_turbomind_func.py -m 'not pr_test' --alluredir=allure-results + pytest autotest/interface/pipeline -m 'not pr_test' --alluredir=allure-results - name: Test lmdeploy - pipeline torch continue-on-error: true if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'pipeline')) diff --git a/autotest/interface/restful/test_restful_interface_func.py b/autotest/interface/restful/test_restful_interface_func_common.py similarity index 67% rename from autotest/interface/restful/test_restful_interface_func.py rename to autotest/interface/restful/test_restful_interface_func_common.py index dea7b34c79..ac157ef3a6 100644 --- a/autotest/interface/restful/test_restful_interface_func.py +++ b/autotest/interface/restful/test_restful_interface_func_common.py @@ -4,19 +4,24 @@ import pytest from tqdm import tqdm +from utils.content_detect_utils import base_rps_frac_chars_in_dupe_ngrams +from utils.restful_return_check import (assert_chat_completions_batch_return, + assert_chat_completions_stream_return, + assert_chat_interactive_batch_return, + assert_chat_interactive_stream_return) from lmdeploy.serve.openai.api_client import APIClient, get_model_list BASE_HTTP_URL = 'http://10.140.0.187' -DEFAULT_PORT = 23333 +DEFAULT_PORT = 23334 MODEL = 'internlm/internlm2-chat-20b' MODEL_NAME = 'internlm2-chat-20b' BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)]) @pytest.mark.order(7) -@pytest.mark.restful_interface_turbomind -@pytest.mark.flaky(reruns=2) +@pytest.mark.restful_interface_common +@pytest.mark.flaky(reruns=0) class TestRestfulInterfaceBase: def test_issue1232(self): @@ -83,8 +88,8 @@ def test_encode(self): @pytest.mark.order(7) -@pytest.mark.restful_interface_turbomind -@pytest.mark.flaky(reruns=2) +@pytest.mark.restful_interface_common +@pytest.mark.flaky(reruns=0) class TestRestfulInterfaceChatCompletions: def test_chat_completions_check_return_batch1(self): @@ -94,7 +99,7 @@ def test_chat_completions_check_return_batch1(self): messages='Hi, pls intro yourself', temperature=0.01): continue - assert_chat_completions_batch_return(output) + assert_chat_completions_batch_return(output, MODEL_NAME) def test_chat_completions_check_return_batch2(self): api_client = APIClient(BASE_URL) @@ -106,7 +111,7 @@ def test_chat_completions_check_return_batch2(self): }], temperature=0.01): continue - assert_chat_completions_batch_return(output) + assert_chat_completions_batch_return(output, MODEL_NAME) def test_chat_completions_check_return_stream1(self): api_client = APIClient(BASE_URL) @@ -118,10 +123,13 @@ def test_chat_completions_check_return_stream1(self): temperature=0.01): outputList.append(output) - assert_chat_completions_stream_return(outputList[0], True, False) - assert_chat_completions_stream_return(outputList[-1], False, True) + assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True, + False) + assert_chat_completions_stream_return(outputList[-1], MODEL_NAME, + False, True) for index in range(1, len(outputList) - 1): - assert_chat_completions_stream_return(outputList[index]) + assert_chat_completions_stream_return(outputList[index], + MODEL_NAME) def test_chat_completions_check_return_stream2(self): api_client = APIClient(BASE_URL) @@ -136,43 +144,13 @@ def test_chat_completions_check_return_stream2(self): temperature=0.01): outputList.append(output) - assert_chat_completions_stream_return(outputList[0], True, False) - assert_chat_completions_stream_return(outputList[-1], False, True) + assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True, + False) + assert_chat_completions_stream_return(outputList[-1], MODEL_NAME, + False, True) for index in range(1, len(outputList) - 1): - assert_chat_completions_stream_return(outputList[index]) - - def test_chat_completions_ignore_eos_batch(self): - api_client = APIClient(BASE_URL) - for output in api_client.chat_completions_v1( - model=MODEL_NAME, - messages='Hi, what is your name?', - ignore_eos=True, - max_tokens=100, - temperature=0.01): - continue - assert_chat_completions_batch_return(output) - assert output.get('usage').get('completion_tokens') == 101 - assert output.get('choices')[0].get('finish_reason') == 'length' - - def test_chat_completions_ignore_eos_stream(self): - api_client = APIClient(BASE_URL) - outputList = [] - for output in api_client.chat_completions_v1( - model=MODEL_NAME, - messages='Hi, what is your name?', - ignore_eos=True, - stream=True, - max_tokens=100, - temperature=0.01): - outputList.append(output) - - assert_chat_completions_stream_return(outputList[0], True, False) - assert_chat_completions_stream_return(outputList[-1], False, True) - for index in range(1, len(outputList) - 1): - assert_chat_completions_stream_return(outputList[index]) - assert outputList[-1].get('choices')[0].get( - 'finish_reason') == 'length' - assert len(outputList) == 103 + assert_chat_completions_stream_return(outputList[index], + MODEL_NAME) def test_chat_completions_stopwords_batch(self): api_client = APIClient(BASE_URL) @@ -181,7 +159,7 @@ def test_chat_completions_stopwords_batch(self): stop=' is', temperature=0.01): continue - assert_chat_completions_batch_return(output) + assert_chat_completions_batch_return(output, MODEL_NAME) assert ' is' not in output.get('choices')[0].get('message').get( 'content') assert output.get('choices')[0].get('finish_reason') == 'stop' @@ -191,7 +169,7 @@ def test_chat_completions_stopwords_batch(self): stop=[' is', '上海', ' to'], temperature=0.01): continue - assert_chat_completions_batch_return(output) + assert_chat_completions_batch_return(output, MODEL_NAME) assert ' is' not in output.get('choices')[0].get('message').get( 'content') assert ' 上海' not in output.get('choices')[0].get('message').get( @@ -210,10 +188,13 @@ def test_chat_completions_stopwords_stream(self): temperature=0.01): outputList.append(output) - assert_chat_completions_stream_return(outputList[0], True, False) - assert_chat_completions_stream_return(outputList[-1], False, True) + assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True, + False) + assert_chat_completions_stream_return(outputList[-1], MODEL_NAME, + False, True) for index in range(1, len(outputList) - 1): - assert_chat_completions_stream_return(outputList[index]) + assert_chat_completions_stream_return(outputList[index], + MODEL_NAME) assert ' to' not in outputList[index].get('choices')[0].get( 'delta').get('content') assert outputList[-1].get('choices')[0].get('finish_reason') == 'stop' @@ -226,10 +207,13 @@ def test_chat_completions_stopwords_stream(self): temperature=0.01): outputList.append(output) - assert_chat_completions_stream_return(outputList[0], True, False) - assert_chat_completions_stream_return(outputList[-1], False, True) + assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True, + False) + assert_chat_completions_stream_return(outputList[-1], MODEL_NAME, + False, True) for index in range(1, len(outputList) - 1): - assert_chat_completions_stream_return(outputList[index]) + assert_chat_completions_stream_return(outputList[index], + MODEL_NAME) assert ' is' not in outputList[index].get('choices')[0].get( 'delta').get('content') assert '上海' not in outputList[index].get('choices')[0].get( @@ -254,7 +238,7 @@ def test_chat_completions_special_words_batch(self): skip_special_tokens=False, temperature=0.01): continue - assert_chat_completions_batch_return(output) + assert_chat_completions_batch_return(output, MODEL_NAME) assert '<|action_start|><|interpreter|>' in output.get( 'choices')[0].get('message').get('content') @@ -263,40 +247,10 @@ def test_chat_completions_special_words_batch(self): skip_special_tokens=True, temperature=0.01): continue - assert_chat_completions_batch_return(output) + assert_chat_completions_batch_return(output, MODEL_NAME) assert '<|action_start|><|interpreter|>' not in output.get( 'choices')[0].get('message').get('content') - def test_chat_completions_max_tokens_batch(self): - api_client = APIClient(BASE_URL) - for output in api_client.chat_completions_v1( - model=MODEL_NAME, - messages='Hi, pls intro yourself', - max_tokens=5, - temperature=0.01): - continue - assert_chat_completions_batch_return(output) - assert output.get('choices')[0].get('finish_reason') == 'length' - assert output.get('usage').get('completion_tokens') == 6 - - def test_chat_completions_max_tokens_stream(self): - api_client = APIClient(BASE_URL) - outputList = [] - for output in api_client.chat_completions_v1( - model=MODEL_NAME, - messages='Hi, pls intro yourself', - stream=True, - max_tokens=5, - temperature=0.01): - outputList.append(output) - assert_chat_completions_stream_return(outputList[0], True, False) - assert_chat_completions_stream_return(outputList[-1], False, True) - for index in range(1, len(outputList) - 1): - assert_chat_completions_stream_return(outputList[index]) - assert outputList[-1].get('choices')[0].get( - 'finish_reason') == 'length' - assert len(outputList) == 8 - def test_chat_completions_repetition_penalty_batch(self): api_client = APIClient(BASE_URL) for output in api_client.chat_completions_v1(model=MODEL_NAME, @@ -305,29 +259,10 @@ def test_chat_completions_repetition_penalty_batch(self): temperature=0.01, max_tokens=200): continue - assert_chat_completions_batch_return(output) - assert ' is is' * 5 in output.get('choices')[0].get('message').get( - 'content') - - def test_chat_completions_repetition_penalty_stream(self): - api_client = APIClient(BASE_URL) - outputList = [] - response = '' - for output in api_client.chat_completions_v1( - model=MODEL_NAME, - messages='Hi, pls intro yourself', - stream=True, - repetition_penalty=0.1, - temperature=0.01, - max_tokens=200): - outputList.append(output) - assert_chat_completions_stream_return(outputList[0], True, False) - assert_chat_completions_stream_return(outputList[-1], False, True) - for index in range(1, len(outputList) - 1): - assert_chat_completions_stream_return(outputList[index]) - response += outputList[index].get('choices')[0].get('delta').get( - 'content') - assert 'pls pls ' * 5 in response, response + assert_chat_completions_batch_return(output, MODEL_NAME) + assert base_rps_frac_chars_in_dupe_ngrams( + 6, + output.get('choices')[0].get('message').get('content')) > 80 def test_chat_completions_topp_min_batch(self): api_client = APIClient(BASE_URL) @@ -336,7 +271,7 @@ def test_chat_completions_topp_min_batch(self): for output in api_client.chat_completions_v1( model=MODEL_NAME, messages='Shanghai is', top_p=0.1): outputList.append(output) - assert_chat_completions_batch_return(output) + assert_chat_completions_batch_return(output, MODEL_NAME) assert outputList[0].get('choices')[0].get('message').get( 'content') == outputList[1].get('choices')[0].get('message').get( 'content') @@ -344,28 +279,6 @@ def test_chat_completions_topp_min_batch(self): 'content') == outputList[2].get('choices')[0].get('message').get( 'content') - def test_chat_completions_topp_min_stream(self): - api_client = APIClient(BASE_URL) - responseList = [] - for i in range(3): - outputList = [] - response = '' - for output in api_client.chat_completions_v1( - model=MODEL_NAME, - messages='Hi, pls intro yourself', - stream=True, - top_p=0.1): - outputList.append(output) - assert_chat_completions_stream_return(outputList[0], True, False) - assert_chat_completions_stream_return(outputList[-1], False, True) - for index in range(1, len(outputList) - 1): - assert_chat_completions_stream_return(outputList[index]) - response += outputList[index].get('choices')[0].get( - 'delta').get('content') - responseList.append(response) - assert responseList[0] == responseList[1] - assert responseList[1] == responseList[2] - def test_chat_completions_mis_model_name_batch(self): api_client = APIClient(BASE_URL) for output in api_client.chat_completions_v1( @@ -401,25 +314,10 @@ def test_chat_completions_longinput_batch(self): assert output.get('choices')[0].get('finish_reason') == 'length' assert output.get('choices')[0].get('message').get('content') == '' - def test_chat_completions_longinput_stream(self): - api_client = APIClient(BASE_URL) - outputList = [] - for output in api_client.chat_completions_v1( - model=MODEL_NAME, - messages='Hi, pls intro yourself' * 10000, - stream=True, - temperature=0.01): - outputList.append(output) - assert_chat_completions_stream_return(outputList[0], True, False) - assert outputList[1].get('choices')[0].get('finish_reason') == 'length' - assert outputList[1].get('choices')[0].get('delta').get( - 'content') == '' - assert len(outputList) == 2 - @pytest.mark.order(7) @pytest.mark.restful_interface_turbomind -@pytest.mark.flaky(reruns=2) +@pytest.mark.flaky(reruns=0) class TestRestfulInterfaceChatInteractive: def test_chat_interactive_check_return_batch1(self): @@ -483,37 +381,6 @@ def test_chat_interactive_check_return_stream2(self): assert_chat_interactive_stream_return(outputList[index], index=index) - def test_chat_interactive_ignore_eos_batch(self): - api_client = APIClient(BASE_URL) - for output in api_client.chat_interactive_v1( - prompt='Hi, what is your name?', - ignore_eos=True, - request_output_len=100, - temperature=0.01): - continue - assert_chat_interactive_batch_return(output) - assert output.get('tokens') == 101 - assert output.get('finish_reason') == 'length' - - def test_chat_interactive_ignore_eos_stream(self): - api_client = APIClient(BASE_URL) - outputList = [] - for output in api_client.chat_interactive_v1( - prompt='Hi, what is your name?', - ignore_eos=True, - stream=True, - request_output_len=100, - temperature=0.01): - outputList.append(output) - assert_chat_interactive_stream_return(outputList[-1], - True, - index=len(outputList) - 2) - for index in range(0, len(outputList) - 1): - assert_chat_interactive_stream_return(outputList[index], - index=index) - assert output.get('finish_reason') == 'length' - assert len(outputList) == 102 - def test_chat_interactive_stopwords_batch(self): api_client = APIClient(BASE_URL) for output in api_client.chat_interactive_v1(prompt='Shanghai is', @@ -603,35 +470,6 @@ def test_chat_interactive_special_words_batch(self): assert_chat_interactive_batch_return(output) assert '<|action_start|><|interpreter|>' not in output.get('text') - def test_chat_interactive_max_tokens_batch(self): - api_client = APIClient(BASE_URL) - for output in api_client.chat_interactive_v1( - prompt='Hi, pls intro yourself', - request_output_len=5, - temperature=0.01): - continue - assert_chat_interactive_batch_return(output) - assert output.get('finish_reason') == 'length' - assert output.get('tokens') == 6 - - def test_chat_interactive_max_tokens_stream(self): - api_client = APIClient(BASE_URL) - outputList = [] - for output in api_client.chat_interactive_v1( - prompt='Hi, pls intro yourself', - stream=True, - request_output_len=5, - temperature=0.01): - outputList.append(output) - assert_chat_interactive_stream_return(outputList[-1], - True, - index=len(outputList) - 2) - for index in range(0, len(outputList) - 1): - assert_chat_interactive_stream_return(outputList[index], - index=index) - assert output.get('finish_reason') == 'length' - assert len(outputList) == 7 - def test_chat_interactive_repetition_penalty_batch(self): api_client = APIClient(BASE_URL) for output in api_client.chat_interactive_v1(prompt='Shanghai is', @@ -640,7 +478,7 @@ def test_chat_interactive_repetition_penalty_batch(self): request_output_len=512): continue assert_chat_interactive_batch_return(output) - assert 'a 上海 is a 上海, ' * 5 in output.get('text') + assert base_rps_frac_chars_in_dupe_ngrams(6, output.get('text')) > 90 def test_chat_interactive_with_history_batch(self): api_client = APIClient(BASE_URL) @@ -715,7 +553,6 @@ def test_chat_interactive_topp_min_stream(self): assert responseList[0] == responseList[1] assert responseList[1] == responseList[2] - @pytest.mark.tmp def test_chat_interactive_longinput_batch(self): api_client = APIClient(BASE_URL) for output in api_client.chat_interactive_v1( @@ -724,7 +561,6 @@ def test_chat_interactive_longinput_batch(self): assert output.get('finish_reason') == 'length' assert output.get('text') == '' - @pytest.mark.tmp def test_chat_interactive_longinput_stream(self): api_client = APIClient(BASE_URL) outputList = [] @@ -736,72 +572,3 @@ def test_chat_interactive_longinput_stream(self): assert outputList[0].get('finish_reason') == 'length', outputList assert outputList[0].get('text') == '' assert len(outputList) == 1 - - -def assert_chat_completions_batch_return(output): - assert output.get('usage').get('prompt_tokens') > 0 - assert output.get('usage').get('total_tokens') > 0 - assert output.get('usage').get('completion_tokens') > 0 - assert output.get('usage').get('completion_tokens') + output.get( - 'usage').get('prompt_tokens') == output.get('usage').get( - 'total_tokens') - assert output.get('id') is not None - assert output.get('object') == 'chat.completion' - assert output.get('model') == MODEL_NAME - output_message = output.get('choices') - assert len(output_message) == 1 - for message in output_message: - assert message.get('finish_reason') in ['stop', 'length'] - assert message.get('index') == 0 - assert len(message.get('message').get('content')) > 0 - assert message.get('message').get('role') == 'assistant' - - -def assert_chat_completions_stream_return(output, - is_first: bool = False, - is_last: bool = False): - assert output.get('id') is not None - if is_first is False: - assert output.get('object') == 'chat.completion.chunk' - assert output.get('model') == MODEL_NAME - output_message = output.get('choices') - assert len(output_message) == 1 - for message in output_message: - assert message.get('delta').get('role') == 'assistant' - assert message.get('index') == 0 - if is_last is False: - assert message.get('finish_reason') is None - if is_first is False and is_last is False: - assert len(message.get('delta').get('content')) >= 0 - if is_last is True: - assert len(message.get('delta').get('content')) == 0 - assert message.get('finish_reason') in ['stop', 'length'] - - -def assert_chat_interactive_batch_return(output): - assert output.get('input_tokens') > 0 - assert output.get('tokens') > 0 - assert output.get('history_tokens') >= 0 - assert output.get('finish_reason') in ['stop', 'length'] - assert len(output.get('text')) > 0 - - -def assert_chat_interactive_stream_return(output, - is_last: bool = False, - is_text_empty: bool = False, - index: int = None): - assert output.get('input_tokens') > 0 - if index is not None: - assert output.get('tokens') >= index + 1 and output.get( - 'tokens') <= index + 6 - assert output.get('tokens') > 0 - assert output.get('history_tokens') >= 0 - if is_last: - assert len(output.get('text')) >= 0 - assert output.get('finish_reason') in ['stop', 'length'] - elif is_text_empty: - assert len(output.get('text')) == 0 - assert output.get('finish_reason') is None - else: - assert len(output.get('text')) >= 0 - assert output.get('finish_reason') is None diff --git a/autotest/interface/restful/test_restful_interface_func_pytorch.py b/autotest/interface/restful/test_restful_interface_func_pytorch.py new file mode 100644 index 0000000000..b026e6360b --- /dev/null +++ b/autotest/interface/restful/test_restful_interface_func_pytorch.py @@ -0,0 +1,286 @@ +import pytest +from utils.content_detect_utils import base_rps_frac_chars_in_dupe_ngrams +from utils.restful_return_check import (assert_chat_completions_batch_return, + assert_chat_completions_stream_return, + assert_chat_interactive_batch_return, + assert_chat_interactive_stream_return) + +from lmdeploy.serve.openai.api_client import APIClient + +BASE_HTTP_URL = 'http://10.140.0.187' +DEFAULT_PORT = 23334 +MODEL = 'internlm/internlm2-chat-20b' +MODEL_NAME = 'internlm2-chat-20b' +BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)]) + + +@pytest.mark.order(7) +@pytest.mark.restful_interface_pytorch +@pytest.mark.flaky(reruns=2) +class TestRestfulInterfaceChatCompletions: + + def test_chat_completions_ignore_eos_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, what is your name?', + ignore_eos=True, + max_tokens=100, + temperature=0.01): + continue + assert_chat_completions_batch_return(output, MODEL_NAME) + assert output.get('usage').get( + 'completion_tokens') == 101 or output.get('usage').get( + 'completion_tokens') == 100 + assert output.get('choices')[0].get('finish_reason') == 'length' + + def test_chat_completions_ignore_eos_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, what is your name?', + ignore_eos=True, + stream=True, + max_tokens=100, + temperature=0.01): + outputList.append(output) + + assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True, + False) + assert_chat_completions_stream_return(outputList[-1], MODEL_NAME, + False, True) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index], + MODEL_NAME) + assert outputList[-1].get('choices')[0].get( + 'finish_reason') == 'length' + assert len(outputList) == 102 + + def test_chat_completions_max_tokens_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself', + max_tokens=5, + temperature=0.01): + continue + assert_chat_completions_batch_return(output, MODEL_NAME) + assert output.get('choices')[0].get('finish_reason') == 'length' + assert output.get('usage').get('completion_tokens') == 6 or output.get( + 'usage').get('completion_tokens') == 5 + + def test_chat_completions_max_tokens_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself', + stream=True, + max_tokens=5, + temperature=0.01): + outputList.append(output) + assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True, + False) + assert_chat_completions_stream_return(outputList[-1], MODEL_NAME, + False, True) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index], + MODEL_NAME) + assert outputList[-1].get('choices')[0].get( + 'finish_reason') == 'length' + assert len(outputList) == 7 + + def test_chat_completions_repetition_penalty_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + response = '' + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself', + stream=True, + repetition_penalty=0.1, + temperature=0.01, + max_tokens=200): + outputList.append(output) + assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True, + False) + assert_chat_completions_stream_return(outputList[-1], MODEL_NAME, + False, True) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index], + MODEL_NAME) + response += outputList[index].get('choices')[0].get('delta').get( + 'content') + assert base_rps_frac_chars_in_dupe_ngrams(6, response) > 90 + + def test_chat_completions_topp_min_batch(self): + api_client = APIClient(BASE_URL) + outputList = [] + for i in range(3): + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Shanghai is', + top_p=0.1, + temperature=0.01): + outputList.append(output) + assert_chat_completions_batch_return(output, MODEL_NAME) + print(output) + assert outputList[0].get('choices')[0].get('message').get( + 'content') == outputList[1].get('choices')[0].get('message').get( + 'content') + assert outputList[1].get('choices')[0].get('message').get( + 'content') == outputList[2].get('choices')[0].get('message').get( + 'content') + + def test_chat_completions_topp_min_stream(self): + api_client = APIClient(BASE_URL) + responseList = [] + for i in range(3): + outputList = [] + response = '' + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself', + stream=True, + top_p=0.1, + temperature=0.01): + outputList.append(output) + assert_chat_completions_stream_return(outputList[0], MODEL_NAME, + True, False) + assert_chat_completions_stream_return(outputList[-1], MODEL_NAME, + False, True) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index], + MODEL_NAME) + response += outputList[index].get('choices')[0].get( + 'delta').get('content') + responseList.append(response) + assert responseList[0] == responseList[1] + assert responseList[1] == responseList[2] + + def test_chat_completions_longinput_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself' * 10000, + stream=True, + temperature=0.01): + outputList.append(output) + assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True, + False) + assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True, + False) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index], + MODEL_NAME) + assert outputList[1].get('choices')[0].get('finish_reason') == 'length' + assert outputList[1].get('choices')[0].get('delta').get( + 'content') == '' + assert len(outputList) == 2 + + +@pytest.mark.order(7) +@pytest.mark.restful_interface_turbomind +@pytest.mark.flaky(reruns=2) +class TestRestfulInterfaceChatInteractive: + + def test_chat_interactive_ignore_eos_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_interactive_v1( + prompt='Hi, what is your name?', + ignore_eos=True, + request_output_len=100, + temperature=0.01): + continue + assert_chat_interactive_batch_return(output) + assert output.get('tokens') == 100 + assert output.get('finish_reason') == 'length' + + def test_chat_interactive_ignore_eos_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_interactive_v1( + prompt='Hi, what is your name?', + ignore_eos=True, + stream=True, + request_output_len=100, + temperature=0.01): + outputList.append(output) + print(output) + assert_chat_interactive_stream_return(outputList[-1], + True, + index=len(outputList) - 2) + for index in range(0, len(outputList) - 1): + assert_chat_interactive_stream_return(outputList[index], + index=index) + assert output.get('finish_reason') == 'length' + assert len(outputList) == 101 + + def test_chat_interactive_max_tokens_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_interactive_v1( + prompt='Hi, pls intro yourself', + request_output_len=5, + temperature=0.01): + continue + assert_chat_interactive_batch_return(output) + assert output.get('finish_reason') == 'length' + assert output.get('tokens') == 5 + + def test_chat_interactive_max_tokens_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_interactive_v1( + prompt='Hi, pls intro yourself', + stream=True, + request_output_len=5, + temperature=0.01): + outputList.append(output) + assert_chat_interactive_stream_return(outputList[-1], + True, + index=len(outputList) - 2) + for index in range(0, len(outputList) - 1): + assert_chat_interactive_stream_return(outputList[index], + index=index) + assert output.get('finish_reason') == 'length' + assert len(outputList) == 6 + + def test_chat_interactive_topp_min_batch(self): + api_client = APIClient(BASE_URL) + outputList = [] + for i in range(3): + for output in api_client.chat_interactive_v1(prompt='Shanghai is', + top_p=0.01, + temperature=0.01): + continue + assert_chat_interactive_batch_return(output) + outputList.append(output) + print(output) + assert outputList[0] == outputList[1] + assert outputList[1] == outputList[2] + + def test_chat_interactive_topp_min_stream(self): + api_client = APIClient(BASE_URL) + responseList = [] + for i in range(3): + outputList = [] + response = '' + for output in api_client.chat_interactive_v1( + model=MODEL_NAME, + prompt='Hi, pls intro yourself', + stream=True, + top_p=0.01, + temperature=0.01): + outputList.append(output) + assert_chat_interactive_stream_return(outputList[-1], + True, + index=len(outputList) - 2) + for index in range(0, len(outputList) - 1): + assert_chat_interactive_stream_return(outputList[index], + index=index) + response += outputList[index].get('text') + responseList.append(response) + assert responseList[0] == responseList[1] + assert responseList[1] == responseList[2] diff --git a/autotest/interface/restful/test_restful_interface_func_turbomind.py b/autotest/interface/restful/test_restful_interface_func_turbomind.py new file mode 100644 index 0000000000..33ff8e2dfa --- /dev/null +++ b/autotest/interface/restful/test_restful_interface_func_turbomind.py @@ -0,0 +1,269 @@ +import pytest +from utils.content_detect_utils import base_rps_frac_chars_in_dupe_ngrams +from utils.restful_return_check import (assert_chat_completions_batch_return, + assert_chat_completions_stream_return, + assert_chat_interactive_batch_return, + assert_chat_interactive_stream_return) + +from lmdeploy.serve.openai.api_client import APIClient + +BASE_HTTP_URL = 'http://10.140.0.187' +DEFAULT_PORT = 23333 +MODEL = 'internlm/internlm2-chat-20b' +MODEL_NAME = 'internlm2-chat-20b' +BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)]) + + +@pytest.mark.order(7) +@pytest.mark.turbomind +@pytest.mark.flaky(reruns=2) +class TestRestfulInterfaceChatCompletions: + + def test_chat_completions_ignore_eos_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, what is your name?', + ignore_eos=True, + max_tokens=100, + temperature=0.01): + continue + assert_chat_completions_batch_return(output, MODEL_NAME) + assert output.get('usage').get('completion_tokens') == 101 + assert output.get('choices')[0].get('finish_reason') == 'length' + + def test_chat_completions_ignore_eos_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, what is your name?', + ignore_eos=True, + stream=True, + max_tokens=100, + temperature=0.01): + outputList.append(output) + + assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True, + False) + assert_chat_completions_stream_return(outputList[-1], MODEL_NAME, + False, True) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index], + MODEL_NAME) + assert outputList[-1].get('choices')[0].get( + 'finish_reason') == 'length' + assert len(outputList) == 103 + + def test_chat_completions_max_tokens_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself', + max_tokens=5, + temperature=0.01): + continue + assert_chat_completions_batch_return(output, MODEL_NAME) + assert output.get('choices')[0].get('finish_reason') == 'length' + assert output.get('usage').get('completion_tokens') == 6 + + def test_chat_completions_max_tokens_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself', + stream=True, + max_tokens=5, + temperature=0.01): + outputList.append(output) + assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True, + False) + assert_chat_completions_stream_return(outputList[-1], MODEL_NAME, + False, True) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index], + MODEL_NAME) + assert outputList[-1].get('choices')[0].get( + 'finish_reason') == 'length' + assert len(outputList) == 8 + + def test_chat_completions_repetition_penalty_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + response = '' + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself', + stream=True, + repetition_penalty=0.1, + temperature=0.01, + max_tokens=200): + outputList.append(output) + assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True, + False) + assert_chat_completions_stream_return(outputList[-1], MODEL_NAME, + False, True) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index], + MODEL_NAME) + response += outputList[index].get('choices')[0].get('delta').get( + 'content') + assert base_rps_frac_chars_in_dupe_ngrams(6, response) > 90 + + def test_chat_completions_topp_min_batch(self): + api_client = APIClient(BASE_URL) + outputList = [] + for i in range(3): + for output in api_client.chat_completions_v1( + model=MODEL_NAME, messages='Shanghai is', top_p=0.1): + outputList.append(output) + assert_chat_completions_batch_return(output, MODEL_NAME) + assert outputList[0].get('choices')[0].get('message').get( + 'content') == outputList[1].get('choices')[0].get('message').get( + 'content') + assert outputList[1].get('choices')[0].get('message').get( + 'content') == outputList[2].get('choices')[0].get('message').get( + 'content') + + def test_chat_completions_topp_min_stream(self): + api_client = APIClient(BASE_URL) + responseList = [] + for i in range(3): + outputList = [] + response = '' + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself', + stream=True, + top_p=0.1): + outputList.append(output) + assert_chat_completions_stream_return(outputList[0], MODEL_NAME, + True, False) + assert_chat_completions_stream_return(outputList[-1], MODEL_NAME, + False, True) + for index in range(1, len(outputList) - 1): + assert_chat_completions_stream_return(outputList[index], + MODEL_NAME) + response += outputList[index].get('choices')[0].get( + 'delta').get('content') + responseList.append(response) + assert responseList[0] == responseList[1] + assert responseList[1] == responseList[2] + + def test_chat_completions_longinput_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_completions_v1( + model=MODEL_NAME, + messages='Hi, pls intro yourself' * 10000, + stream=True, + temperature=0.01): + outputList.append(output) + assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True, + False) + assert outputList[1].get('choices')[0].get('finish_reason') == 'length' + assert outputList[1].get('choices')[0].get('delta').get( + 'content') == '' + assert len(outputList) == 2 + + +@pytest.mark.order(7) +@pytest.mark.restful_interface_turbomind +@pytest.mark.flaky(reruns=2) +class TestRestfulInterfaceChatInteractive: + + def test_chat_interactive_ignore_eos_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_interactive_v1( + prompt='Hi, what is your name?', + ignore_eos=True, + request_output_len=100, + temperature=0.01): + continue + assert_chat_interactive_batch_return(output) + assert output.get('tokens') == 101 + assert output.get('finish_reason') == 'length' + + def test_chat_interactive_ignore_eos_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_interactive_v1( + prompt='Hi, what is your name?', + ignore_eos=True, + stream=True, + request_output_len=100, + temperature=0.01): + outputList.append(output) + assert_chat_interactive_stream_return(outputList[-1], + True, + index=len(outputList) - 2) + for index in range(0, len(outputList) - 1): + assert_chat_interactive_stream_return(outputList[index], + index=index) + assert output.get('finish_reason') == 'length' + assert len(outputList) == 102 + + def test_chat_interactive_max_tokens_batch(self): + api_client = APIClient(BASE_URL) + for output in api_client.chat_interactive_v1( + prompt='Hi, pls intro yourself', + request_output_len=5, + temperature=0.01): + continue + assert_chat_interactive_batch_return(output) + assert output.get('finish_reason') == 'length' + assert output.get('tokens') == 6 + + def test_chat_interactive_max_tokens_stream(self): + api_client = APIClient(BASE_URL) + outputList = [] + for output in api_client.chat_interactive_v1( + prompt='Hi, pls intro yourself', + stream=True, + request_output_len=5, + temperature=0.01): + outputList.append(output) + assert_chat_interactive_stream_return(outputList[-1], + True, + index=len(outputList) - 2) + for index in range(0, len(outputList) - 1): + assert_chat_interactive_stream_return(outputList[index], + index=index) + assert output.get('finish_reason') == 'length' + assert len(outputList) == 7 + + def test_chat_interactive_topp_min_batch(self): + api_client = APIClient(BASE_URL) + outputList = [] + for i in range(3): + for output in api_client.chat_interactive_v1(prompt='Shanghai is', + top_p=0.01): + continue + assert_chat_interactive_batch_return(output) + outputList.append(output) + assert outputList[0] == outputList[1] + assert outputList[1] == outputList[2] + + def test_chat_interactive_topp_min_stream(self): + api_client = APIClient(BASE_URL) + responseList = [] + for i in range(3): + outputList = [] + response = '' + for output in api_client.chat_interactive_v1( + model=MODEL_NAME, + prompt='Hi, pls intro yourself', + stream=True, + top_p=0.01): + outputList.append(output) + assert_chat_interactive_stream_return(outputList[-1], + True, + index=len(outputList) - 2) + for index in range(0, len(outputList) - 1): + assert_chat_interactive_stream_return(outputList[index], + index=index) + response += outputList[index].get('text') + responseList.append(response) + assert responseList[0] == responseList[1] + assert responseList[1] == responseList[2] diff --git a/autotest/utils/content_detect_utils.py b/autotest/utils/content_detect_utils.py new file mode 100644 index 0000000000..7611c98ad3 --- /dev/null +++ b/autotest/utils/content_detect_utils.py @@ -0,0 +1,94 @@ +import re +import string +import unicodedata +from collections import Counter + +import numpy + +TRANSLATION_TABLE_PUNCTUATION = str.maketrans('', '', string.punctuation) + + +def normalize(text: str, + remove_punct: bool = True, + lowercase: bool = True, + nfd_unicode: bool = True, + white_space: bool = True) -> str: + """Normalize the text by lowercasing and removing punctuation.""" + # remove punctuation + if remove_punct: + text = text.translate(TRANSLATION_TABLE_PUNCTUATION) + + # lowercase + if lowercase: + text = text.lower() + + if white_space: + text = text.strip() + text = re.sub(r'\s+', ' ', text) + + # NFD unicode normalization + if nfd_unicode: + text = unicodedata.normalize('NFD', text) + + return text + + +def form_ngrams(sequence, n): + history = [] + # build the first ngram, yielding only when we have a full ngram + while n > 1: + try: + next_item = next(sequence) + except StopIteration: + # no more data, terminate the generator + return + history.append(next_item) + n -= 1 + + # yield each ngram we have, then add the next item and repeat + for item in sequence: + history.append(item) + yield tuple(history) + del history[0] + + +def base_rps_frac_chars_in_dupe_ngrams(NGRAM_SIZE: int = 5, content: str = ''): + """Base class for calculating the fraction of characters in duplicate word + N-grams. + + This operates on the lower-cased, punctuation removed content. The function + also ensures that characters in overlapping ngrams are only counted once. + """ + normalized_content = normalize(content) + normalized_words = tuple(normalized_content.split()) + + if len(normalized_words) < NGRAM_SIZE: + return 0 + + # fetch the ngrams from the document if they exist, otherwise + # compute them + doc_n_grams = tuple(form_ngrams(iter(normalized_words), NGRAM_SIZE)) + + # keep only ngrams which occur at least twice + ngram_dupes = { + ngram + for ngram, count in Counter(doc_n_grams).items() if count > 1 + } + + duplicated_grams = numpy.zeros(len(normalized_words), dtype=int) + i = 0 + for ngram in doc_n_grams: + if ngram in ngram_dupes: + duplicated_grams[i:i + NGRAM_SIZE] = 1 + + i += 1 + + word_lengths = numpy.array(list(map(len, normalized_words))) + chars_duped = numpy.sum(word_lengths * duplicated_grams) + total_chars = numpy.sum(word_lengths) + + if total_chars == 0: + return 0 + + score = float(chars_duped / total_chars) * 100 + return score diff --git a/autotest/utils/restful_return_check.py b/autotest/utils/restful_return_check.py new file mode 100644 index 0000000000..b7832047d5 --- /dev/null +++ b/autotest/utils/restful_return_check.py @@ -0,0 +1,68 @@ +def assert_chat_completions_batch_return(output, model_name): + assert output.get('usage').get('prompt_tokens') > 0 + assert output.get('usage').get('total_tokens') > 0 + assert output.get('usage').get('completion_tokens') > 0 + assert output.get('usage').get('completion_tokens') + output.get( + 'usage').get('prompt_tokens') == output.get('usage').get( + 'total_tokens') + assert output.get('id') is not None + assert output.get('object') == 'chat.completion' + assert output.get('model') == model_name + output_message = output.get('choices') + assert len(output_message) == 1 + for message in output_message: + assert message.get('finish_reason') in ['stop', 'length'] + assert message.get('index') == 0 + assert len(message.get('message').get('content')) > 0 + assert message.get('message').get('role') == 'assistant' + + +def assert_chat_completions_stream_return(output, + model_name, + is_first: bool = False, + is_last: bool = False): + assert output.get('id') is not None + if is_first is False: + assert output.get('object') == 'chat.completion.chunk' + assert output.get('model') == model_name + output_message = output.get('choices') + assert len(output_message) == 1 + for message in output_message: + assert message.get('delta').get('role') == 'assistant' + assert message.get('index') == 0 + if is_last is False: + assert message.get('finish_reason') is None + if is_first is False and is_last is False: + assert len(message.get('delta').get('content')) >= 0 + if is_last is True: + assert len(message.get('delta').get('content')) == 0 + assert message.get('finish_reason') in ['stop', 'length'] + + +def assert_chat_interactive_batch_return(output): + assert output.get('input_tokens') > 0 + assert output.get('tokens') > 0 + assert output.get('history_tokens') >= 0 + assert output.get('finish_reason') in ['stop', 'length'] + assert len(output.get('text')) > 0 + + +def assert_chat_interactive_stream_return(output, + is_last: bool = False, + is_text_empty: bool = False, + index: int = None): + assert output.get('input_tokens') > 0 + if index is not None: + assert output.get('tokens') >= index and output.get( + 'tokens') <= index + 6 + assert output.get('tokens') > 0 + assert output.get('history_tokens') >= 0 + if is_last: + assert len(output.get('text')) >= 0 + assert output.get('finish_reason') in ['stop', 'length'] + elif is_text_empty: + assert len(output.get('text')) == 0 + assert output.get('finish_reason') is None + else: + assert len(output.get('text')) >= 0 + assert output.get('finish_reason') is None From 72865b1f556475f14be1c8a4004521eb78651195 Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Mon, 11 Mar 2024 14:23:41 +0800 Subject: [PATCH 4/7] update feishu notify --- .github/workflows/daily_ete_test.yml | 6 ++++- .../test_restful_interface_func_common.py | 25 +++++++++++-------- .../test_restful_interface_func_pytorch.py | 8 +++--- .../test_restful_interface_func_turbomind.py | 8 +++--- 4 files changed, 27 insertions(+), 20 deletions(-) diff --git a/.github/workflows/daily_ete_test.yml b/.github/workflows/daily_ete_test.yml index 5916957079..728a467ab8 100644 --- a/.github/workflows/daily_ete_test.yml +++ b/.github/workflows/daily_ete_test.yml @@ -323,4 +323,8 @@ jobs: - name: fail notify if: contains(needs.*.result, 'failure') run: | - curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- daily test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.FEISHU_USER_ID }}'"}]]}}}}' ${{ secrets.FEISHU_WEBHOOK_URL }} + curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- Daily test failed!!!","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.FEISHU_USER_ID }}'"}]]}}}}' ${{ secrets.FEISHU_WEBHOOK_URL }} + - name: success notify + if: needs.test_functions.result=='success' && needs.test_triton.result=='success' + run: | + curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- Daily test success","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"}]]}}}}' ${{ secrets.FEISHU_WEBHOOK_URL }} diff --git a/autotest/interface/restful/test_restful_interface_func_common.py b/autotest/interface/restful/test_restful_interface_func_common.py index ac157ef3a6..e54b6bba96 100644 --- a/autotest/interface/restful/test_restful_interface_func_common.py +++ b/autotest/interface/restful/test_restful_interface_func_common.py @@ -12,16 +12,17 @@ from lmdeploy.serve.openai.api_client import APIClient, get_model_list -BASE_HTTP_URL = 'http://10.140.0.187' -DEFAULT_PORT = 23334 +BASE_HTTP_URL = 'http://localhost' +DEFAULT_PORT = 23333 MODEL = 'internlm/internlm2-chat-20b' MODEL_NAME = 'internlm2-chat-20b' BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)]) -@pytest.mark.order(7) -@pytest.mark.restful_interface_common -@pytest.mark.flaky(reruns=0) +@pytest.mark.order(8) +@pytest.mark.turbomind +@pytest.mark.pytorch +@pytest.mark.flaky(reruns=2) class TestRestfulInterfaceBase: def test_issue1232(self): @@ -87,9 +88,10 @@ def test_encode(self): assert input_ids5 == input_ids2 * 100 -@pytest.mark.order(7) -@pytest.mark.restful_interface_common -@pytest.mark.flaky(reruns=0) +@pytest.mark.order(8) +@pytest.mark.turbomind +@pytest.mark.pytorch +@pytest.mark.flaky(reruns=2) class TestRestfulInterfaceChatCompletions: def test_chat_completions_check_return_batch1(self): @@ -315,9 +317,10 @@ def test_chat_completions_longinput_batch(self): assert output.get('choices')[0].get('message').get('content') == '' -@pytest.mark.order(7) -@pytest.mark.restful_interface_turbomind -@pytest.mark.flaky(reruns=0) +@pytest.mark.order(8) +@pytest.mark.turbomind +@pytest.mark.pytorch +@pytest.mark.flaky(reruns=2) class TestRestfulInterfaceChatInteractive: def test_chat_interactive_check_return_batch1(self): diff --git a/autotest/interface/restful/test_restful_interface_func_pytorch.py b/autotest/interface/restful/test_restful_interface_func_pytorch.py index b026e6360b..2a007f21f8 100644 --- a/autotest/interface/restful/test_restful_interface_func_pytorch.py +++ b/autotest/interface/restful/test_restful_interface_func_pytorch.py @@ -7,15 +7,15 @@ from lmdeploy.serve.openai.api_client import APIClient -BASE_HTTP_URL = 'http://10.140.0.187' -DEFAULT_PORT = 23334 +BASE_HTTP_URL = 'http://localhost' +DEFAULT_PORT = 23333 MODEL = 'internlm/internlm2-chat-20b' MODEL_NAME = 'internlm2-chat-20b' BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)]) @pytest.mark.order(7) -@pytest.mark.restful_interface_pytorch +@pytest.mark.pytorch @pytest.mark.flaky(reruns=2) class TestRestfulInterfaceChatCompletions: @@ -182,7 +182,7 @@ def test_chat_completions_longinput_stream(self): @pytest.mark.order(7) -@pytest.mark.restful_interface_turbomind +@pytest.mark.pytorch @pytest.mark.flaky(reruns=2) class TestRestfulInterfaceChatInteractive: diff --git a/autotest/interface/restful/test_restful_interface_func_turbomind.py b/autotest/interface/restful/test_restful_interface_func_turbomind.py index 33ff8e2dfa..013c157db0 100644 --- a/autotest/interface/restful/test_restful_interface_func_turbomind.py +++ b/autotest/interface/restful/test_restful_interface_func_turbomind.py @@ -7,14 +7,14 @@ from lmdeploy.serve.openai.api_client import APIClient -BASE_HTTP_URL = 'http://10.140.0.187' +BASE_HTTP_URL = 'http://localhost' DEFAULT_PORT = 23333 MODEL = 'internlm/internlm2-chat-20b' MODEL_NAME = 'internlm2-chat-20b' BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)]) -@pytest.mark.order(7) +@pytest.mark.order(8) @pytest.mark.turbomind @pytest.mark.flaky(reruns=2) class TestRestfulInterfaceChatCompletions: @@ -168,8 +168,8 @@ def test_chat_completions_longinput_stream(self): assert len(outputList) == 2 -@pytest.mark.order(7) -@pytest.mark.restful_interface_turbomind +@pytest.mark.order(8) +@pytest.mark.turbomind @pytest.mark.flaky(reruns=2) class TestRestfulInterfaceChatInteractive: From 458e977d1cd796a37257c64c0c294142c2a297b6 Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Mon, 11 Mar 2024 15:02:10 +0800 Subject: [PATCH 5/7] update --- .../test_restful_interface_func_common.py | 12 +-- .../test_restful_interface_func_pytorch.py | 7 +- .../test_restful_interface_func_turbomind.py | 3 +- autotest/utils/content_detect_utils.py | 94 ------------------- 4 files changed, 9 insertions(+), 107 deletions(-) delete mode 100644 autotest/utils/content_detect_utils.py diff --git a/autotest/interface/restful/test_restful_interface_func_common.py b/autotest/interface/restful/test_restful_interface_func_common.py index e54b6bba96..b10bc62a1d 100644 --- a/autotest/interface/restful/test_restful_interface_func_common.py +++ b/autotest/interface/restful/test_restful_interface_func_common.py @@ -4,7 +4,6 @@ import pytest from tqdm import tqdm -from utils.content_detect_utils import base_rps_frac_chars_in_dupe_ngrams from utils.restful_return_check import (assert_chat_completions_batch_return, assert_chat_completions_stream_return, assert_chat_interactive_batch_return, @@ -12,8 +11,8 @@ from lmdeploy.serve.openai.api_client import APIClient, get_model_list -BASE_HTTP_URL = 'http://localhost' -DEFAULT_PORT = 23333 +BASE_HTTP_URL = 'http://10.140.0.187' +DEFAULT_PORT = 23334 MODEL = 'internlm/internlm2-chat-20b' MODEL_NAME = 'internlm2-chat-20b' BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)]) @@ -262,9 +261,8 @@ def test_chat_completions_repetition_penalty_batch(self): max_tokens=200): continue assert_chat_completions_batch_return(output, MODEL_NAME) - assert base_rps_frac_chars_in_dupe_ngrams( - 6, - output.get('choices')[0].get('message').get('content')) > 80 + assert ' is is' * 5 in output.get('choices')[0].get('message').get( + 'content') or ' a a' * 5 in output.get('choices')[0].get('message').get('content') def test_chat_completions_topp_min_batch(self): api_client = APIClient(BASE_URL) @@ -481,7 +479,7 @@ def test_chat_interactive_repetition_penalty_batch(self): request_output_len=512): continue assert_chat_interactive_batch_return(output) - assert base_rps_frac_chars_in_dupe_ngrams(6, output.get('text')) > 90 + assert 'a 上海 is a 上海, ' * 5 in output.get('text') def test_chat_interactive_with_history_batch(self): api_client = APIClient(BASE_URL) diff --git a/autotest/interface/restful/test_restful_interface_func_pytorch.py b/autotest/interface/restful/test_restful_interface_func_pytorch.py index 2a007f21f8..0d87137736 100644 --- a/autotest/interface/restful/test_restful_interface_func_pytorch.py +++ b/autotest/interface/restful/test_restful_interface_func_pytorch.py @@ -1,5 +1,4 @@ import pytest -from utils.content_detect_utils import base_rps_frac_chars_in_dupe_ngrams from utils.restful_return_check import (assert_chat_completions_batch_return, assert_chat_completions_stream_return, assert_chat_interactive_batch_return, @@ -14,7 +13,7 @@ BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)]) -@pytest.mark.order(7) +@pytest.mark.order(8) @pytest.mark.pytorch @pytest.mark.flaky(reruns=2) class TestRestfulInterfaceChatCompletions: @@ -112,7 +111,7 @@ def test_chat_completions_repetition_penalty_stream(self): MODEL_NAME) response += outputList[index].get('choices')[0].get('delta').get( 'content') - assert base_rps_frac_chars_in_dupe_ngrams(6, response) > 90 + assert 'pls pls ' * 5 in response or 'Hi, pls intro yourself\n' * 5 in response, response def test_chat_completions_topp_min_batch(self): api_client = APIClient(BASE_URL) @@ -181,7 +180,7 @@ def test_chat_completions_longinput_stream(self): assert len(outputList) == 2 -@pytest.mark.order(7) +@pytest.mark.order(8) @pytest.mark.pytorch @pytest.mark.flaky(reruns=2) class TestRestfulInterfaceChatInteractive: diff --git a/autotest/interface/restful/test_restful_interface_func_turbomind.py b/autotest/interface/restful/test_restful_interface_func_turbomind.py index 013c157db0..15ce7d6efe 100644 --- a/autotest/interface/restful/test_restful_interface_func_turbomind.py +++ b/autotest/interface/restful/test_restful_interface_func_turbomind.py @@ -1,5 +1,4 @@ import pytest -from utils.content_detect_utils import base_rps_frac_chars_in_dupe_ngrams from utils.restful_return_check import (assert_chat_completions_batch_return, assert_chat_completions_stream_return, assert_chat_interactive_batch_return, @@ -109,7 +108,7 @@ def test_chat_completions_repetition_penalty_stream(self): MODEL_NAME) response += outputList[index].get('choices')[0].get('delta').get( 'content') - assert base_rps_frac_chars_in_dupe_ngrams(6, response) > 90 + assert 'pls pls ' * 5 in response or 'Hi, pls intro yourself\n' * 5 in response, response def test_chat_completions_topp_min_batch(self): api_client = APIClient(BASE_URL) diff --git a/autotest/utils/content_detect_utils.py b/autotest/utils/content_detect_utils.py deleted file mode 100644 index 7611c98ad3..0000000000 --- a/autotest/utils/content_detect_utils.py +++ /dev/null @@ -1,94 +0,0 @@ -import re -import string -import unicodedata -from collections import Counter - -import numpy - -TRANSLATION_TABLE_PUNCTUATION = str.maketrans('', '', string.punctuation) - - -def normalize(text: str, - remove_punct: bool = True, - lowercase: bool = True, - nfd_unicode: bool = True, - white_space: bool = True) -> str: - """Normalize the text by lowercasing and removing punctuation.""" - # remove punctuation - if remove_punct: - text = text.translate(TRANSLATION_TABLE_PUNCTUATION) - - # lowercase - if lowercase: - text = text.lower() - - if white_space: - text = text.strip() - text = re.sub(r'\s+', ' ', text) - - # NFD unicode normalization - if nfd_unicode: - text = unicodedata.normalize('NFD', text) - - return text - - -def form_ngrams(sequence, n): - history = [] - # build the first ngram, yielding only when we have a full ngram - while n > 1: - try: - next_item = next(sequence) - except StopIteration: - # no more data, terminate the generator - return - history.append(next_item) - n -= 1 - - # yield each ngram we have, then add the next item and repeat - for item in sequence: - history.append(item) - yield tuple(history) - del history[0] - - -def base_rps_frac_chars_in_dupe_ngrams(NGRAM_SIZE: int = 5, content: str = ''): - """Base class for calculating the fraction of characters in duplicate word - N-grams. - - This operates on the lower-cased, punctuation removed content. The function - also ensures that characters in overlapping ngrams are only counted once. - """ - normalized_content = normalize(content) - normalized_words = tuple(normalized_content.split()) - - if len(normalized_words) < NGRAM_SIZE: - return 0 - - # fetch the ngrams from the document if they exist, otherwise - # compute them - doc_n_grams = tuple(form_ngrams(iter(normalized_words), NGRAM_SIZE)) - - # keep only ngrams which occur at least twice - ngram_dupes = { - ngram - for ngram, count in Counter(doc_n_grams).items() if count > 1 - } - - duplicated_grams = numpy.zeros(len(normalized_words), dtype=int) - i = 0 - for ngram in doc_n_grams: - if ngram in ngram_dupes: - duplicated_grams[i:i + NGRAM_SIZE] = 1 - - i += 1 - - word_lengths = numpy.array(list(map(len, normalized_words))) - chars_duped = numpy.sum(word_lengths * duplicated_grams) - total_chars = numpy.sum(word_lengths) - - if total_chars == 0: - return 0 - - score = float(chars_duped / total_chars) * 100 - return score From 7389d674cf126c570daf03ca4335b7dd7ad2384f Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Mon, 11 Mar 2024 15:03:17 +0800 Subject: [PATCH 6/7] update --- .../interface/restful/test_restful_interface_func_common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/autotest/interface/restful/test_restful_interface_func_common.py b/autotest/interface/restful/test_restful_interface_func_common.py index b10bc62a1d..60b7da57eb 100644 --- a/autotest/interface/restful/test_restful_interface_func_common.py +++ b/autotest/interface/restful/test_restful_interface_func_common.py @@ -262,7 +262,8 @@ def test_chat_completions_repetition_penalty_batch(self): continue assert_chat_completions_batch_return(output, MODEL_NAME) assert ' is is' * 5 in output.get('choices')[0].get('message').get( - 'content') or ' a a' * 5 in output.get('choices')[0].get('message').get('content') + 'content') or ' a a' * 5 in output.get('choices')[0].get( + 'message').get('content') def test_chat_completions_topp_min_batch(self): api_client = APIClient(BASE_URL) From 7ce28e27a14adc0ee56b3a3d162b4d9d3112c966 Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Mon, 11 Mar 2024 16:26:22 +0800 Subject: [PATCH 7/7] fix lint --- .../interface/restful/test_restful_interface_func_pytorch.py | 3 ++- .../interface/restful/test_restful_interface_func_turbomind.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/autotest/interface/restful/test_restful_interface_func_pytorch.py b/autotest/interface/restful/test_restful_interface_func_pytorch.py index 0d87137736..2709fb0df0 100644 --- a/autotest/interface/restful/test_restful_interface_func_pytorch.py +++ b/autotest/interface/restful/test_restful_interface_func_pytorch.py @@ -111,7 +111,8 @@ def test_chat_completions_repetition_penalty_stream(self): MODEL_NAME) response += outputList[index].get('choices')[0].get('delta').get( 'content') - assert 'pls pls ' * 5 in response or 'Hi, pls intro yourself\n' * 5 in response, response + assert 'pls pls ' * 5 in response or \ + 'Hi, pls intro yourself\n' * 5 in response def test_chat_completions_topp_min_batch(self): api_client = APIClient(BASE_URL) diff --git a/autotest/interface/restful/test_restful_interface_func_turbomind.py b/autotest/interface/restful/test_restful_interface_func_turbomind.py index 15ce7d6efe..51f55d2048 100644 --- a/autotest/interface/restful/test_restful_interface_func_turbomind.py +++ b/autotest/interface/restful/test_restful_interface_func_turbomind.py @@ -108,7 +108,8 @@ def test_chat_completions_repetition_penalty_stream(self): MODEL_NAME) response += outputList[index].get('choices')[0].get('delta').get( 'content') - assert 'pls pls ' * 5 in response or 'Hi, pls intro yourself\n' * 5 in response, response + assert 'pls pls ' * 5 in response or \ + 'Hi, pls intro yourself\n' * 5 in response def test_chat_completions_topp_min_batch(self): api_client = APIClient(BASE_URL)