From 6b21d507023ad03810cc2d0ffff63c2eec628740 Mon Sep 17 00:00:00 2001
From: zhulin1 <zhulin1@pjlab.org.cn>
Date: Mon, 11 Mar 2024 10:31:17 +0800
Subject: [PATCH 1/7] bugfix and add more testcases

---
 .github/workflows/pr_ete_test.yml             |   3 +
 .../test_restful_interface_turbomind.py       | 807 ++++++++++++++++++
 .../pipeline/test_pipeline_chat_pytorch.py    |   3 +-
 3 files changed, 812 insertions(+), 1 deletion(-)
 create mode 100644 autotest/interface/restful/test_restful_interface_turbomind.py

diff --git a/.github/workflows/pr_ete_test.yml b/.github/workflows/pr_ete_test.yml
index 08bf24b4b7..94f2ef719f 100644
--- a/.github/workflows/pr_ete_test.yml
+++ b/.github/workflows/pr_ete_test.yml
@@ -15,6 +15,9 @@ on:
       - "setup.py"
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
 
 env:
   HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
diff --git a/autotest/interface/restful/test_restful_interface_turbomind.py b/autotest/interface/restful/test_restful_interface_turbomind.py
new file mode 100644
index 0000000000..dea7b34c79
--- /dev/null
+++ b/autotest/interface/restful/test_restful_interface_turbomind.py
@@ -0,0 +1,807 @@
+import random
+from concurrent.futures import ThreadPoolExecutor
+from random import randint
+
+import pytest
+from tqdm import tqdm
+
+from lmdeploy.serve.openai.api_client import APIClient, get_model_list
+
+BASE_HTTP_URL = 'http://10.140.0.187'
+DEFAULT_PORT = 23333
+MODEL = 'internlm/internlm2-chat-20b'
+MODEL_NAME = 'internlm2-chat-20b'
+BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)])
+
+
+@pytest.mark.order(7)
+@pytest.mark.restful_interface_turbomind
+@pytest.mark.flaky(reruns=2)
+class TestRestfulInterfaceBase:
+
+    def test_issue1232(self):
+
+        def process_one(question):
+            api_client = APIClient(BASE_URL)
+            model_name = api_client.available_models[0]
+
+            msg = [dict(role='user', content=question)]
+
+            data = api_client.chat_interactive_v1(msg,
+                                                  session_id=randint(1, 100),
+                                                  repetition_penalty=1.02,
+                                                  request_output_len=224)
+            for item in data:
+                pass
+
+            data = api_client.chat_completions_v1(model=model_name,
+                                                  messages=msg,
+                                                  repetition_penalty=1.02,
+                                                  stop=['<|im_end|>', '100'],
+                                                  max_tokens=10)
+
+            for item in data:
+                response = item
+
+            return response
+
+        with ThreadPoolExecutor(max_workers=256) as executor:
+            for response in tqdm(executor.map(process_one, ['你是谁'] * 500)):
+                continue
+
+    def test_get_model(self):
+        api_client = APIClient(BASE_URL)
+        model_name = api_client.available_models[0]
+        assert model_name == MODEL_NAME, api_client.available_models
+
+        model_list = get_model_list(BASE_URL + '/v1/models')
+        assert MODEL_NAME in model_list, model_list
+
+    def test_encode(self):
+        api_client = APIClient(BASE_URL)
+        input_ids1, length1 = api_client.encode('Hi, pls intro yourself')
+        input_ids2, length2 = api_client.encode('Hi, pls intro yourself',
+                                                add_bos=False)
+        input_ids3, length3 = api_client.encode('Hi, pls intro yourself',
+                                                do_preprocess=True)
+        input_ids4, length4 = api_client.encode('Hi, pls intro yourself',
+                                                do_preprocess=True,
+                                                add_bos=False)
+        input_ids5, length5 = api_client.encode('Hi, pls intro yourself' * 100,
+                                                add_bos=False)
+
+        assert len(input_ids1) == length1 and length1 > 0
+        assert len(input_ids2) == length2 and length2 > 0
+        assert len(input_ids3) == length3 and length3 > 0
+        assert len(input_ids4) == length4 and length4 > 0
+        assert len(input_ids5) == length5 and length5 > 0
+        assert length1 == length2 + 1
+        assert input_ids2 == input_ids1[1:]
+        assert input_ids1[0] == 1 and input_ids3[0] == 1
+        assert length5 == length2 * 100
+        assert input_ids5 == input_ids2 * 100
+
+
+@pytest.mark.order(7)
+@pytest.mark.restful_interface_turbomind
+@pytest.mark.flaky(reruns=2)
+class TestRestfulInterfaceChatCompletions:
+
+    def test_chat_completions_check_return_batch1(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, pls intro yourself',
+                temperature=0.01):
+            continue
+        assert_chat_completions_batch_return(output)
+
+    def test_chat_completions_check_return_batch2(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages=[{
+                    'role': 'user',
+                    'content': 'Hi, pls intro yourself'
+                }],
+                temperature=0.01):
+            continue
+        assert_chat_completions_batch_return(output)
+
+    def test_chat_completions_check_return_stream1(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, pls intro yourself',
+                stream=True,
+                temperature=0.01):
+            outputList.append(output)
+
+        assert_chat_completions_stream_return(outputList[0], True, False)
+        assert_chat_completions_stream_return(outputList[-1], False, True)
+        for index in range(1, len(outputList) - 1):
+            assert_chat_completions_stream_return(outputList[index])
+
+    def test_chat_completions_check_return_stream2(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages=[{
+                    'role': 'user',
+                    'content': 'Hi, pls intro yourself'
+                }],
+                stream=True,
+                temperature=0.01):
+            outputList.append(output)
+
+        assert_chat_completions_stream_return(outputList[0], True, False)
+        assert_chat_completions_stream_return(outputList[-1], False, True)
+        for index in range(1, len(outputList) - 1):
+            assert_chat_completions_stream_return(outputList[index])
+
+    def test_chat_completions_ignore_eos_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, what is your name?',
+                ignore_eos=True,
+                max_tokens=100,
+                temperature=0.01):
+            continue
+        assert_chat_completions_batch_return(output)
+        assert output.get('usage').get('completion_tokens') == 101
+        assert output.get('choices')[0].get('finish_reason') == 'length'
+
+    def test_chat_completions_ignore_eos_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, what is your name?',
+                ignore_eos=True,
+                stream=True,
+                max_tokens=100,
+                temperature=0.01):
+            outputList.append(output)
+
+        assert_chat_completions_stream_return(outputList[0], True, False)
+        assert_chat_completions_stream_return(outputList[-1], False, True)
+        for index in range(1, len(outputList) - 1):
+            assert_chat_completions_stream_return(outputList[index])
+        assert outputList[-1].get('choices')[0].get(
+            'finish_reason') == 'length'
+        assert len(outputList) == 103
+
+    def test_chat_completions_stopwords_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_completions_v1(model=MODEL_NAME,
+                                                     messages='Shanghai is',
+                                                     stop=' is',
+                                                     temperature=0.01):
+            continue
+        assert_chat_completions_batch_return(output)
+        assert ' is' not in output.get('choices')[0].get('message').get(
+            'content')
+        assert output.get('choices')[0].get('finish_reason') == 'stop'
+
+        for output in api_client.chat_completions_v1(model=MODEL_NAME,
+                                                     messages='Shanghai is',
+                                                     stop=[' is', '上海', ' to'],
+                                                     temperature=0.01):
+            continue
+        assert_chat_completions_batch_return(output)
+        assert ' is' not in output.get('choices')[0].get('message').get(
+            'content')
+        assert ' 上海' not in output.get('choices')[0].get('message').get(
+            'content')
+        assert ' to' not in output.get('choices')[0].get('message').get(
+            'content')
+        assert output.get('choices')[0].get('finish_reason') == 'stop'
+
+    def test_chat_completions_stopwords_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_completions_v1(model=MODEL_NAME,
+                                                     messages='Shanghai is',
+                                                     stop=' is',
+                                                     stream=True,
+                                                     temperature=0.01):
+            outputList.append(output)
+
+        assert_chat_completions_stream_return(outputList[0], True, False)
+        assert_chat_completions_stream_return(outputList[-1], False, True)
+        for index in range(1, len(outputList) - 1):
+            assert_chat_completions_stream_return(outputList[index])
+            assert ' to' not in outputList[index].get('choices')[0].get(
+                'delta').get('content')
+        assert outputList[-1].get('choices')[0].get('finish_reason') == 'stop'
+
+        outputList = []
+        for output in api_client.chat_completions_v1(model=MODEL_NAME,
+                                                     messages='Shanghai is',
+                                                     stop=[' is', '上海', ' to'],
+                                                     stream=True,
+                                                     temperature=0.01):
+            outputList.append(output)
+
+        assert_chat_completions_stream_return(outputList[0], True, False)
+        assert_chat_completions_stream_return(outputList[-1], False, True)
+        for index in range(1, len(outputList) - 1):
+            assert_chat_completions_stream_return(outputList[index])
+            assert ' is' not in outputList[index].get('choices')[0].get(
+                'delta').get('content')
+            assert '上海' not in outputList[index].get('choices')[0].get(
+                'delta').get('content')
+            assert ' to' not in outputList[index].get('choices')[0].get(
+                'delta').get('content')
+        assert outputList[-1].get('choices')[0].get('finish_reason') == 'stop'
+
+    def test_chat_completions_special_words_batch(self):
+        message = '<|im_start|>system\n当开启工具以及代码时，根据需求选择合适的工具进行调用\n' + \
+                '<|im_end|><|im_start|>system name=<|interpreter|>\n你现在已经' + \
+                '能够在一个有状态的 Jupyter 笔记本环境中运行 Python 代码。当你向 python ' + \
+                '发送含有 Python >代码的消息时，它将在该环境中执行。这个工具适用于多种场景，' + \
+                '如数据分析或处理（包括数据操作、统计分析、图表绘制），复杂的计算问题（解决数学和物理' + \
+                '难题），编程示例（理解编程概念或特性），文本处理和分析（比如文本解析和自然语言处理），机器学习和数据科学（用于' + \
+                '展示模型训练和数据可视化），以及文件操作和数据导入（处理CSV、JSON等格式的文件）。<|im_end|>\n' + \
+                '<|im_start|>user\n设 $L$ 为圆周$x^2+y^2=2x$，计算曲线积分：$I=\\int_L' + \
+                '{x\\mathrm{d}s}=$<|im_end|>\n<|im_start|>assistant'
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_completions_v1(model=MODEL_NAME,
+                                                     messages=message,
+                                                     skip_special_tokens=False,
+                                                     temperature=0.01):
+            continue
+        assert_chat_completions_batch_return(output)
+        assert '<|action_start|><|interpreter|>' in output.get(
+            'choices')[0].get('message').get('content')
+
+        for output in api_client.chat_completions_v1(model=MODEL_NAME,
+                                                     messages=message,
+                                                     skip_special_tokens=True,
+                                                     temperature=0.01):
+            continue
+        assert_chat_completions_batch_return(output)
+        assert '<|action_start|><|interpreter|>' not in output.get(
+            'choices')[0].get('message').get('content')
+
+    def test_chat_completions_max_tokens_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, pls intro yourself',
+                max_tokens=5,
+                temperature=0.01):
+            continue
+        assert_chat_completions_batch_return(output)
+        assert output.get('choices')[0].get('finish_reason') == 'length'
+        assert output.get('usage').get('completion_tokens') == 6
+
+    def test_chat_completions_max_tokens_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, pls intro yourself',
+                stream=True,
+                max_tokens=5,
+                temperature=0.01):
+            outputList.append(output)
+        assert_chat_completions_stream_return(outputList[0], True, False)
+        assert_chat_completions_stream_return(outputList[-1], False, True)
+        for index in range(1, len(outputList) - 1):
+            assert_chat_completions_stream_return(outputList[index])
+        assert outputList[-1].get('choices')[0].get(
+            'finish_reason') == 'length'
+        assert len(outputList) == 8
+
+    def test_chat_completions_repetition_penalty_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_completions_v1(model=MODEL_NAME,
+                                                     messages='Shanghai is',
+                                                     repetition_penalty=0.1,
+                                                     temperature=0.01,
+                                                     max_tokens=200):
+            continue
+        assert_chat_completions_batch_return(output)
+        assert ' is is' * 5 in output.get('choices')[0].get('message').get(
+            'content')
+
+    def test_chat_completions_repetition_penalty_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        response = ''
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, pls intro yourself',
+                stream=True,
+                repetition_penalty=0.1,
+                temperature=0.01,
+                max_tokens=200):
+            outputList.append(output)
+        assert_chat_completions_stream_return(outputList[0], True, False)
+        assert_chat_completions_stream_return(outputList[-1], False, True)
+        for index in range(1, len(outputList) - 1):
+            assert_chat_completions_stream_return(outputList[index])
+            response += outputList[index].get('choices')[0].get('delta').get(
+                'content')
+        assert 'pls pls ' * 5 in response, response
+
+    def test_chat_completions_topp_min_batch(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for i in range(3):
+            for output in api_client.chat_completions_v1(
+                    model=MODEL_NAME, messages='Shanghai is', top_p=0.1):
+                outputList.append(output)
+            assert_chat_completions_batch_return(output)
+        assert outputList[0].get('choices')[0].get('message').get(
+            'content') == outputList[1].get('choices')[0].get('message').get(
+                'content')
+        assert outputList[1].get('choices')[0].get('message').get(
+            'content') == outputList[2].get('choices')[0].get('message').get(
+                'content')
+
+    def test_chat_completions_topp_min_stream(self):
+        api_client = APIClient(BASE_URL)
+        responseList = []
+        for i in range(3):
+            outputList = []
+            response = ''
+            for output in api_client.chat_completions_v1(
+                    model=MODEL_NAME,
+                    messages='Hi, pls intro yourself',
+                    stream=True,
+                    top_p=0.1):
+                outputList.append(output)
+            assert_chat_completions_stream_return(outputList[0], True, False)
+            assert_chat_completions_stream_return(outputList[-1], False, True)
+            for index in range(1, len(outputList) - 1):
+                assert_chat_completions_stream_return(outputList[index])
+                response += outputList[index].get('choices')[0].get(
+                    'delta').get('content')
+            responseList.append(response)
+        assert responseList[0] == responseList[1]
+        assert responseList[1] == responseList[2]
+
+    def test_chat_completions_mis_model_name_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_completions_v1(
+                model='error', messages='Hi, pls intro yourself',
+                temperature=0.01):
+            continue
+        assert output.get('code') == 404
+        assert output.get('message') == 'The model `error` does not exist.'
+        assert output.get('object') == 'error'
+
+    def test_chat_completions_mis_model_name_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_completions_v1(
+                model='error',
+                messages='Hi, pls intro yourself',
+                stream=True,
+                max_tokens=5,
+                temperature=0.01):
+            outputList.append(output)
+        assert output.get('code') == 404
+        assert output.get('message') == 'The model `error` does not exist.'
+        assert output.get('object') == 'error'
+        assert len(outputList) == 1
+
+    def test_chat_completions_longinput_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, pls intro yourself' * 10000,
+                temperature=0.01):
+            continue
+        assert output.get('choices')[0].get('finish_reason') == 'length'
+        assert output.get('choices')[0].get('message').get('content') == ''
+
+    def test_chat_completions_longinput_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, pls intro yourself' * 10000,
+                stream=True,
+                temperature=0.01):
+            outputList.append(output)
+        assert_chat_completions_stream_return(outputList[0], True, False)
+        assert outputList[1].get('choices')[0].get('finish_reason') == 'length'
+        assert outputList[1].get('choices')[0].get('delta').get(
+            'content') == ''
+        assert len(outputList) == 2
+
+
+@pytest.mark.order(7)
+@pytest.mark.restful_interface_turbomind
+@pytest.mark.flaky(reruns=2)
+class TestRestfulInterfaceChatInteractive:
+
+    def test_chat_interactive_check_return_batch1(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_interactive_v1(
+                prompt='Hi, pls intro yourself', temperature=0.01):
+            continue
+        assert_chat_interactive_batch_return(output)
+
+    def test_chat_interactive_check_return_batch2(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_interactive_v1(prompt=[{
+                'role':
+                'user',
+                'content':
+                'Hi, pls intro yourself'
+        }],
+                                                     temperature=0.01):
+            continue
+        assert_chat_interactive_batch_return(output)
+
+    def test_chat_interactive_check_return_stream1(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_interactive_v1(
+                prompt='Hi, pls intro yourself', stream=True,
+                temperature=0.01):
+            outputList.append(output)
+        assert_chat_interactive_stream_return(outputList[-1],
+                                              True,
+                                              index=len(outputList) - 2)
+        assert_chat_interactive_stream_return(outputList[-2],
+                                              False,
+                                              True,
+                                              index=len(outputList) - 2)
+        for index in range(0, len(outputList) - 2):
+            assert_chat_interactive_stream_return(outputList[index],
+                                                  index=index)
+
+    def test_chat_interactive_check_return_stream2(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_interactive_v1(prompt=[{
+                'role':
+                'user',
+                'content':
+                'Hi, pls intro yourself'
+        }],
+                                                     stream=True,
+                                                     temperature=0.01):
+            outputList.append(output)
+
+        assert_chat_interactive_stream_return(outputList[-1],
+                                              True,
+                                              index=len(outputList) - 2)
+        assert_chat_interactive_stream_return(outputList[-2],
+                                              False,
+                                              True,
+                                              index=len(outputList) - 2)
+        for index in range(0, len(outputList) - 2):
+            assert_chat_interactive_stream_return(outputList[index],
+                                                  index=index)
+
+    def test_chat_interactive_ignore_eos_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_interactive_v1(
+                prompt='Hi, what is your name?',
+                ignore_eos=True,
+                request_output_len=100,
+                temperature=0.01):
+            continue
+        assert_chat_interactive_batch_return(output)
+        assert output.get('tokens') == 101
+        assert output.get('finish_reason') == 'length'
+
+    def test_chat_interactive_ignore_eos_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_interactive_v1(
+                prompt='Hi, what is your name?',
+                ignore_eos=True,
+                stream=True,
+                request_output_len=100,
+                temperature=0.01):
+            outputList.append(output)
+        assert_chat_interactive_stream_return(outputList[-1],
+                                              True,
+                                              index=len(outputList) - 2)
+        for index in range(0, len(outputList) - 1):
+            assert_chat_interactive_stream_return(outputList[index],
+                                                  index=index)
+        assert output.get('finish_reason') == 'length'
+        assert len(outputList) == 102
+
+    def test_chat_interactive_stopwords_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_interactive_v1(prompt='Shanghai is',
+                                                     stop=' is',
+                                                     temperature=0.01):
+            continue
+        assert_chat_interactive_batch_return(output)
+        assert ' is' not in output.get('text')
+        assert output.get('finish_reason') == 'stop'
+
+        for output in api_client.chat_interactive_v1(prompt='Shanghai is',
+                                                     stop=[' is', '上海', ' to'],
+                                                     temperature=0.01):
+            continue
+        assert_chat_interactive_batch_return(output)
+        assert ' is' not in output.get('text')
+        assert ' 上海' not in output.get('text')
+        assert ' to' not in output.get('text')
+        assert output.get('finish_reason') == 'stop'
+
+    def test_chat_interactive_stopwords_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_interactive_v1(prompt='Shanghai is',
+                                                     stop=' is',
+                                                     stream=True,
+                                                     temperature=0.01):
+            outputList.append(output)
+
+        assert_chat_interactive_stream_return(outputList[-1],
+                                              True,
+                                              index=len(outputList) - 2)
+        assert_chat_interactive_stream_return(outputList[-2],
+                                              False,
+                                              True,
+                                              index=len(outputList) - 2)
+        for index in range(0, len(outputList) - 2):
+            assert_chat_interactive_stream_return(outputList[index],
+                                                  index=index)
+            assert ' to' not in outputList[index].get('text')
+        assert output.get('finish_reason') == 'stop'
+
+        outputList = []
+        for output in api_client.chat_interactive_v1(prompt='Shanghai is',
+                                                     stop=[' is', '上海', ' to'],
+                                                     stream=True,
+                                                     temperature=0.01):
+            outputList.append(output)
+
+        assert_chat_interactive_stream_return(outputList[-1],
+                                              True,
+                                              index=len(outputList) - 2)
+        assert_chat_interactive_stream_return(outputList[-2],
+                                              False,
+                                              True,
+                                              index=len(outputList) - 2)
+        for index in range(0, len(outputList) - 2):
+            assert_chat_interactive_stream_return(outputList[index],
+                                                  index=index)
+            assert ' is' not in outputList[index].get('text')
+            assert '上海' not in outputList[index].get('text')
+            assert ' to' not in outputList[index].get('text')
+        assert output.get('finish_reason') == 'stop'
+
+    def test_chat_interactive_special_words_batch(self):
+        message = '<|im_start|>system\n当开启工具以及代码时，根据需求选择合适的工具进行调用\n' + \
+                '<|im_end|><|im_start|>system name=<|interpreter|>\n你现在已经' + \
+                '能够在一个有状态的 Jupyter 笔记本环境中运行 Python 代码。当你向 python ' + \
+                '发送含有 Python >代码的消息时，它将在该环境中执行。这个工具适用于多种场景，' + \
+                '如数据分析或处理（包括数据操作、统计分析、图表绘制），复杂的计算问题（解决数学和物理' + \
+                '难题），编程示例（理解编程概念或特性），文本处理和分析（比如文本解析和自然语言处理），机器学习和数据科学（用于' + \
+                '展示模型训练和数据可视化），以及文件操作和数据导入（处理CSV、JSON等格式的文件）。<|im_end|>\n' + \
+                '<|im_start|>user\n设 $L$ 为圆周$x^2+y^2=2x$，计算曲线积分：$I=\\int_L' + \
+                '{x\\mathrm{d}s}=$<|im_end|>\n<|im_start|>assistant'
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_interactive_v1(prompt=message,
+                                                     skip_special_tokens=False,
+                                                     temperature=0.01):
+            continue
+        assert_chat_interactive_batch_return(output)
+        assert '<|action_start|><|interpreter|>' in output.get('text')
+
+        for output in api_client.chat_interactive_v1(prompt=message,
+                                                     skip_special_tokens=True,
+                                                     temperature=0.01):
+            continue
+        assert_chat_interactive_batch_return(output)
+        assert '<|action_start|><|interpreter|>' not in output.get('text')
+
+    def test_chat_interactive_max_tokens_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_interactive_v1(
+                prompt='Hi, pls intro yourself',
+                request_output_len=5,
+                temperature=0.01):
+            continue
+        assert_chat_interactive_batch_return(output)
+        assert output.get('finish_reason') == 'length'
+        assert output.get('tokens') == 6
+
+    def test_chat_interactive_max_tokens_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_interactive_v1(
+                prompt='Hi, pls intro yourself',
+                stream=True,
+                request_output_len=5,
+                temperature=0.01):
+            outputList.append(output)
+        assert_chat_interactive_stream_return(outputList[-1],
+                                              True,
+                                              index=len(outputList) - 2)
+        for index in range(0, len(outputList) - 1):
+            assert_chat_interactive_stream_return(outputList[index],
+                                                  index=index)
+        assert output.get('finish_reason') == 'length'
+        assert len(outputList) == 7
+
+    def test_chat_interactive_repetition_penalty_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_interactive_v1(prompt='Shanghai is',
+                                                     repetition_penalty=0.1,
+                                                     temperature=0.01,
+                                                     request_output_len=512):
+            continue
+        assert_chat_interactive_batch_return(output)
+        assert 'a 上海 is a 上海, ' * 5 in output.get('text')
+
+    def test_chat_interactive_with_history_batch(self):
+        api_client = APIClient(BASE_URL)
+        history = 0
+        session_id = random.randint(0, 100000)
+        for i in range(3):
+            for output in api_client.chat_interactive_v1(
+                    prompt='Shanghai is',
+                    temperature=0.01,
+                    interactive_mode=True,
+                    session_id=session_id):
+                continue
+            assert_chat_interactive_batch_return(output)
+            assert output.get('history_tokens') == history
+            history += output.get('input_tokens') + output.get('tokens')
+
+    def test_chat_interactive_with_history_stream(self):
+        api_client = APIClient(BASE_URL)
+        history = 0
+        session_id = random.randint(0, 100000)
+        for i in range(3):
+            outputList = []
+            for output in api_client.chat_interactive_v1(
+                    prompt='Hi, pls intro yourself',
+                    stream=True,
+                    temperature=0.01,
+                    interactive_mode=True,
+                    session_id=session_id):
+                outputList.append(output)
+            assert_chat_interactive_stream_return(outputList[-1],
+                                                  True,
+                                                  index=len(outputList) - 2)
+            for index in range(0, len(outputList) - 1):
+                assert_chat_interactive_stream_return(outputList[index],
+                                                      index=index)
+            assert outputList[-1].get('history_tokens') == history
+            history += outputList[-1].get('input_tokens') + outputList[-1].get(
+                'tokens')
+
+    def test_chat_interactive_topp_min_batch(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for i in range(3):
+            for output in api_client.chat_interactive_v1(prompt='Shanghai is',
+                                                         top_p=0.01):
+                continue
+            assert_chat_interactive_batch_return(output)
+            outputList.append(output)
+        assert outputList[0] == outputList[1]
+        assert outputList[1] == outputList[2]
+
+    def test_chat_interactive_topp_min_stream(self):
+        api_client = APIClient(BASE_URL)
+        responseList = []
+        for i in range(3):
+            outputList = []
+            response = ''
+            for output in api_client.chat_interactive_v1(
+                    model=MODEL_NAME,
+                    prompt='Hi, pls intro yourself',
+                    stream=True,
+                    top_p=0.01):
+                outputList.append(output)
+            assert_chat_interactive_stream_return(outputList[-1],
+                                                  True,
+                                                  index=len(outputList) - 2)
+            for index in range(0, len(outputList) - 1):
+                assert_chat_interactive_stream_return(outputList[index],
+                                                      index=index)
+                response += outputList[index].get('text')
+            responseList.append(response)
+        assert responseList[0] == responseList[1]
+        assert responseList[1] == responseList[2]
+
+    @pytest.mark.tmp
+    def test_chat_interactive_longinput_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_interactive_v1(
+                prompt='Hi, pls intro yourself' * 10000, temperature=0.01):
+            continue
+        assert output.get('finish_reason') == 'length'
+        assert output.get('text') == ''
+
+    @pytest.mark.tmp
+    def test_chat_interactive_longinput_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_interactive_v1(
+                prompt='Hi, pls intro yourself' * 10000,
+                stream=True,
+                temperature=0.01):
+            outputList.append(output)
+        assert outputList[0].get('finish_reason') == 'length', outputList
+        assert outputList[0].get('text') == ''
+        assert len(outputList) == 1
+
+
+def assert_chat_completions_batch_return(output):
+    assert output.get('usage').get('prompt_tokens') > 0
+    assert output.get('usage').get('total_tokens') > 0
+    assert output.get('usage').get('completion_tokens') > 0
+    assert output.get('usage').get('completion_tokens') + output.get(
+        'usage').get('prompt_tokens') == output.get('usage').get(
+            'total_tokens')
+    assert output.get('id') is not None
+    assert output.get('object') == 'chat.completion'
+    assert output.get('model') == MODEL_NAME
+    output_message = output.get('choices')
+    assert len(output_message) == 1
+    for message in output_message:
+        assert message.get('finish_reason') in ['stop', 'length']
+        assert message.get('index') == 0
+        assert len(message.get('message').get('content')) > 0
+        assert message.get('message').get('role') == 'assistant'
+
+
+def assert_chat_completions_stream_return(output,
+                                          is_first: bool = False,
+                                          is_last: bool = False):
+    assert output.get('id') is not None
+    if is_first is False:
+        assert output.get('object') == 'chat.completion.chunk'
+    assert output.get('model') == MODEL_NAME
+    output_message = output.get('choices')
+    assert len(output_message) == 1
+    for message in output_message:
+        assert message.get('delta').get('role') == 'assistant'
+        assert message.get('index') == 0
+        if is_last is False:
+            assert message.get('finish_reason') is None
+        if is_first is False and is_last is False:
+            assert len(message.get('delta').get('content')) >= 0
+        if is_last is True:
+            assert len(message.get('delta').get('content')) == 0
+            assert message.get('finish_reason') in ['stop', 'length']
+
+
+def assert_chat_interactive_batch_return(output):
+    assert output.get('input_tokens') > 0
+    assert output.get('tokens') > 0
+    assert output.get('history_tokens') >= 0
+    assert output.get('finish_reason') in ['stop', 'length']
+    assert len(output.get('text')) > 0
+
+
+def assert_chat_interactive_stream_return(output,
+                                          is_last: bool = False,
+                                          is_text_empty: bool = False,
+                                          index: int = None):
+    assert output.get('input_tokens') > 0
+    if index is not None:
+        assert output.get('tokens') >= index + 1 and output.get(
+            'tokens') <= index + 6
+    assert output.get('tokens') > 0
+    assert output.get('history_tokens') >= 0
+    if is_last:
+        assert len(output.get('text')) >= 0
+        assert output.get('finish_reason') in ['stop', 'length']
+    elif is_text_empty:
+        assert len(output.get('text')) == 0
+        assert output.get('finish_reason') is None
+    else:
+        assert len(output.get('text')) >= 0
+        assert output.get('finish_reason') is None
diff --git a/autotest/tools/pipeline/test_pipeline_chat_pytorch.py b/autotest/tools/pipeline/test_pipeline_chat_pytorch.py
index 7e0318eebd..eea30502bb 100644
--- a/autotest/tools/pipeline/test_pipeline_chat_pytorch.py
+++ b/autotest/tools/pipeline/test_pipeline_chat_pytorch.py
@@ -22,7 +22,8 @@ def getModelList(tp_num):
 @pytest.mark.parametrize('model', getModelList(tp_num=1))
 def test_pipeline_chat_pytorch_tp1(config, common_case_config, model,
                                    worker_id):
-    os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
+    if 'gw' in worker_id:
+        os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
     p = Process(target=run_pipeline_chat_test,
                 args=(config, common_case_config, model, 'pytorch'))
     p.start()

From 62d7925a43aff4d604ffd1c68b1ed4f9c3064860 Mon Sep 17 00:00:00 2001
From: zhulin1 <zhulin1@pjlab.org.cn>
Date: Mon, 11 Mar 2024 10:38:32 +0800
Subject: [PATCH 2/7] rename

---
 .github/workflows/pr_ete_test.yml                                | 1 +
 ...ful_interface_turbomind.py => test_restful_interface_func.py} | 0
 2 files changed, 1 insertion(+)
 rename autotest/interface/restful/{test_restful_interface_turbomind.py => test_restful_interface_func.py} (100%)

diff --git a/.github/workflows/pr_ete_test.yml b/.github/workflows/pr_ete_test.yml
index 94f2ef719f..72daea081b 100644
--- a/.github/workflows/pr_ete_test.yml
+++ b/.github/workflows/pr_ete_test.yml
@@ -19,6 +19,7 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
   cancel-in-progress: true
 
+
 env:
   HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
   HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
diff --git a/autotest/interface/restful/test_restful_interface_turbomind.py b/autotest/interface/restful/test_restful_interface_func.py
similarity index 100%
rename from autotest/interface/restful/test_restful_interface_turbomind.py
rename to autotest/interface/restful/test_restful_interface_func.py

From 646962429ba2cf22c34a4dcc691021e85253e76c Mon Sep 17 00:00:00 2001
From: zhulin1 <zhulin1@pjlab.org.cn>
Date: Mon, 11 Mar 2024 13:56:08 +0800
Subject: [PATCH 3/7] fix

---
 .github/workflows/daily_ete_test.yml          |   6 +-
 ... => test_restful_interface_func_common.py} | 327 +++---------------
 .../test_restful_interface_func_pytorch.py    | 286 +++++++++++++++
 .../test_restful_interface_func_turbomind.py  | 269 ++++++++++++++
 autotest/utils/content_detect_utils.py        |  94 +++++
 autotest/utils/restful_return_check.py        |  68 ++++
 6 files changed, 767 insertions(+), 283 deletions(-)
 rename autotest/interface/restful/{test_restful_interface_func.py => test_restful_interface_func_common.py} (67%)
 create mode 100644 autotest/interface/restful/test_restful_interface_func_pytorch.py
 create mode 100644 autotest/interface/restful/test_restful_interface_func_turbomind.py
 create mode 100644 autotest/utils/content_detect_utils.py
 create mode 100644 autotest/utils/restful_return_check.py

diff --git a/.github/workflows/daily_ete_test.yml b/.github/workflows/daily_ete_test.yml
index f2279536ad..5916957079 100644
--- a/.github/workflows/daily_ete_test.yml
+++ b/.github/workflows/daily_ete_test.yml
@@ -34,7 +34,7 @@ env:
 jobs:
   test_functions:
     runs-on: [self-hosted, linux-a100]
-    timeout-minutes: 240
+    timeout-minutes: 300
     env:
       REPORT_DIR: /nvme/qa_test_models/test-reports
     container:
@@ -157,9 +157,9 @@ jobs:
       - name: Test lmdeploy - interface pipeline turbomind case
         continue-on-error: true
         if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'interface-pipeline'))
-        timeout-minutes: 20
+        timeout-minutes: 75
         run: |
-          pytest autotest/interface/pipeline/test_pipeline_turbomind_func.py -m 'not pr_test' --alluredir=allure-results
+          pytest autotest/interface/pipeline -m 'not pr_test' --alluredir=allure-results
       - name: Test lmdeploy - pipeline torch
         continue-on-error: true
         if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'pipeline'))
diff --git a/autotest/interface/restful/test_restful_interface_func.py b/autotest/interface/restful/test_restful_interface_func_common.py
similarity index 67%
rename from autotest/interface/restful/test_restful_interface_func.py
rename to autotest/interface/restful/test_restful_interface_func_common.py
index dea7b34c79..ac157ef3a6 100644
--- a/autotest/interface/restful/test_restful_interface_func.py
+++ b/autotest/interface/restful/test_restful_interface_func_common.py
@@ -4,19 +4,24 @@
 
 import pytest
 from tqdm import tqdm
+from utils.content_detect_utils import base_rps_frac_chars_in_dupe_ngrams
+from utils.restful_return_check import (assert_chat_completions_batch_return,
+                                        assert_chat_completions_stream_return,
+                                        assert_chat_interactive_batch_return,
+                                        assert_chat_interactive_stream_return)
 
 from lmdeploy.serve.openai.api_client import APIClient, get_model_list
 
 BASE_HTTP_URL = 'http://10.140.0.187'
-DEFAULT_PORT = 23333
+DEFAULT_PORT = 23334
 MODEL = 'internlm/internlm2-chat-20b'
 MODEL_NAME = 'internlm2-chat-20b'
 BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)])
 
 
 @pytest.mark.order(7)
-@pytest.mark.restful_interface_turbomind
-@pytest.mark.flaky(reruns=2)
+@pytest.mark.restful_interface_common
+@pytest.mark.flaky(reruns=0)
 class TestRestfulInterfaceBase:
 
     def test_issue1232(self):
@@ -83,8 +88,8 @@ def test_encode(self):
 
 
 @pytest.mark.order(7)
-@pytest.mark.restful_interface_turbomind
-@pytest.mark.flaky(reruns=2)
+@pytest.mark.restful_interface_common
+@pytest.mark.flaky(reruns=0)
 class TestRestfulInterfaceChatCompletions:
 
     def test_chat_completions_check_return_batch1(self):
@@ -94,7 +99,7 @@ def test_chat_completions_check_return_batch1(self):
                 messages='Hi, pls intro yourself',
                 temperature=0.01):
             continue
-        assert_chat_completions_batch_return(output)
+        assert_chat_completions_batch_return(output, MODEL_NAME)
 
     def test_chat_completions_check_return_batch2(self):
         api_client = APIClient(BASE_URL)
@@ -106,7 +111,7 @@ def test_chat_completions_check_return_batch2(self):
                 }],
                 temperature=0.01):
             continue
-        assert_chat_completions_batch_return(output)
+        assert_chat_completions_batch_return(output, MODEL_NAME)
 
     def test_chat_completions_check_return_stream1(self):
         api_client = APIClient(BASE_URL)
@@ -118,10 +123,13 @@ def test_chat_completions_check_return_stream1(self):
                 temperature=0.01):
             outputList.append(output)
 
-        assert_chat_completions_stream_return(outputList[0], True, False)
-        assert_chat_completions_stream_return(outputList[-1], False, True)
+        assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
+                                              False)
+        assert_chat_completions_stream_return(outputList[-1], MODEL_NAME,
+                                              False, True)
         for index in range(1, len(outputList) - 1):
-            assert_chat_completions_stream_return(outputList[index])
+            assert_chat_completions_stream_return(outputList[index],
+                                                  MODEL_NAME)
 
     def test_chat_completions_check_return_stream2(self):
         api_client = APIClient(BASE_URL)
@@ -136,43 +144,13 @@ def test_chat_completions_check_return_stream2(self):
                 temperature=0.01):
             outputList.append(output)
 
-        assert_chat_completions_stream_return(outputList[0], True, False)
-        assert_chat_completions_stream_return(outputList[-1], False, True)
+        assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
+                                              False)
+        assert_chat_completions_stream_return(outputList[-1], MODEL_NAME,
+                                              False, True)
         for index in range(1, len(outputList) - 1):
-            assert_chat_completions_stream_return(outputList[index])
-
-    def test_chat_completions_ignore_eos_batch(self):
-        api_client = APIClient(BASE_URL)
-        for output in api_client.chat_completions_v1(
-                model=MODEL_NAME,
-                messages='Hi, what is your name?',
-                ignore_eos=True,
-                max_tokens=100,
-                temperature=0.01):
-            continue
-        assert_chat_completions_batch_return(output)
-        assert output.get('usage').get('completion_tokens') == 101
-        assert output.get('choices')[0].get('finish_reason') == 'length'
-
-    def test_chat_completions_ignore_eos_stream(self):
-        api_client = APIClient(BASE_URL)
-        outputList = []
-        for output in api_client.chat_completions_v1(
-                model=MODEL_NAME,
-                messages='Hi, what is your name?',
-                ignore_eos=True,
-                stream=True,
-                max_tokens=100,
-                temperature=0.01):
-            outputList.append(output)
-
-        assert_chat_completions_stream_return(outputList[0], True, False)
-        assert_chat_completions_stream_return(outputList[-1], False, True)
-        for index in range(1, len(outputList) - 1):
-            assert_chat_completions_stream_return(outputList[index])
-        assert outputList[-1].get('choices')[0].get(
-            'finish_reason') == 'length'
-        assert len(outputList) == 103
+            assert_chat_completions_stream_return(outputList[index],
+                                                  MODEL_NAME)
 
     def test_chat_completions_stopwords_batch(self):
         api_client = APIClient(BASE_URL)
@@ -181,7 +159,7 @@ def test_chat_completions_stopwords_batch(self):
                                                      stop=' is',
                                                      temperature=0.01):
             continue
-        assert_chat_completions_batch_return(output)
+        assert_chat_completions_batch_return(output, MODEL_NAME)
         assert ' is' not in output.get('choices')[0].get('message').get(
             'content')
         assert output.get('choices')[0].get('finish_reason') == 'stop'
@@ -191,7 +169,7 @@ def test_chat_completions_stopwords_batch(self):
                                                      stop=[' is', '上海', ' to'],
                                                      temperature=0.01):
             continue
-        assert_chat_completions_batch_return(output)
+        assert_chat_completions_batch_return(output, MODEL_NAME)
         assert ' is' not in output.get('choices')[0].get('message').get(
             'content')
         assert ' 上海' not in output.get('choices')[0].get('message').get(
@@ -210,10 +188,13 @@ def test_chat_completions_stopwords_stream(self):
                                                      temperature=0.01):
             outputList.append(output)
 
-        assert_chat_completions_stream_return(outputList[0], True, False)
-        assert_chat_completions_stream_return(outputList[-1], False, True)
+        assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
+                                              False)
+        assert_chat_completions_stream_return(outputList[-1], MODEL_NAME,
+                                              False, True)
         for index in range(1, len(outputList) - 1):
-            assert_chat_completions_stream_return(outputList[index])
+            assert_chat_completions_stream_return(outputList[index],
+                                                  MODEL_NAME)
             assert ' to' not in outputList[index].get('choices')[0].get(
                 'delta').get('content')
         assert outputList[-1].get('choices')[0].get('finish_reason') == 'stop'
@@ -226,10 +207,13 @@ def test_chat_completions_stopwords_stream(self):
                                                      temperature=0.01):
             outputList.append(output)
 
-        assert_chat_completions_stream_return(outputList[0], True, False)
-        assert_chat_completions_stream_return(outputList[-1], False, True)
+        assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
+                                              False)
+        assert_chat_completions_stream_return(outputList[-1], MODEL_NAME,
+                                              False, True)
         for index in range(1, len(outputList) - 1):
-            assert_chat_completions_stream_return(outputList[index])
+            assert_chat_completions_stream_return(outputList[index],
+                                                  MODEL_NAME)
             assert ' is' not in outputList[index].get('choices')[0].get(
                 'delta').get('content')
             assert '上海' not in outputList[index].get('choices')[0].get(
@@ -254,7 +238,7 @@ def test_chat_completions_special_words_batch(self):
                                                      skip_special_tokens=False,
                                                      temperature=0.01):
             continue
-        assert_chat_completions_batch_return(output)
+        assert_chat_completions_batch_return(output, MODEL_NAME)
         assert '<|action_start|><|interpreter|>' in output.get(
             'choices')[0].get('message').get('content')
 
@@ -263,40 +247,10 @@ def test_chat_completions_special_words_batch(self):
                                                      skip_special_tokens=True,
                                                      temperature=0.01):
             continue
-        assert_chat_completions_batch_return(output)
+        assert_chat_completions_batch_return(output, MODEL_NAME)
         assert '<|action_start|><|interpreter|>' not in output.get(
             'choices')[0].get('message').get('content')
 
-    def test_chat_completions_max_tokens_batch(self):
-        api_client = APIClient(BASE_URL)
-        for output in api_client.chat_completions_v1(
-                model=MODEL_NAME,
-                messages='Hi, pls intro yourself',
-                max_tokens=5,
-                temperature=0.01):
-            continue
-        assert_chat_completions_batch_return(output)
-        assert output.get('choices')[0].get('finish_reason') == 'length'
-        assert output.get('usage').get('completion_tokens') == 6
-
-    def test_chat_completions_max_tokens_stream(self):
-        api_client = APIClient(BASE_URL)
-        outputList = []
-        for output in api_client.chat_completions_v1(
-                model=MODEL_NAME,
-                messages='Hi, pls intro yourself',
-                stream=True,
-                max_tokens=5,
-                temperature=0.01):
-            outputList.append(output)
-        assert_chat_completions_stream_return(outputList[0], True, False)
-        assert_chat_completions_stream_return(outputList[-1], False, True)
-        for index in range(1, len(outputList) - 1):
-            assert_chat_completions_stream_return(outputList[index])
-        assert outputList[-1].get('choices')[0].get(
-            'finish_reason') == 'length'
-        assert len(outputList) == 8
-
     def test_chat_completions_repetition_penalty_batch(self):
         api_client = APIClient(BASE_URL)
         for output in api_client.chat_completions_v1(model=MODEL_NAME,
@@ -305,29 +259,10 @@ def test_chat_completions_repetition_penalty_batch(self):
                                                      temperature=0.01,
                                                      max_tokens=200):
             continue
-        assert_chat_completions_batch_return(output)
-        assert ' is is' * 5 in output.get('choices')[0].get('message').get(
-            'content')
-
-    def test_chat_completions_repetition_penalty_stream(self):
-        api_client = APIClient(BASE_URL)
-        outputList = []
-        response = ''
-        for output in api_client.chat_completions_v1(
-                model=MODEL_NAME,
-                messages='Hi, pls intro yourself',
-                stream=True,
-                repetition_penalty=0.1,
-                temperature=0.01,
-                max_tokens=200):
-            outputList.append(output)
-        assert_chat_completions_stream_return(outputList[0], True, False)
-        assert_chat_completions_stream_return(outputList[-1], False, True)
-        for index in range(1, len(outputList) - 1):
-            assert_chat_completions_stream_return(outputList[index])
-            response += outputList[index].get('choices')[0].get('delta').get(
-                'content')
-        assert 'pls pls ' * 5 in response, response
+        assert_chat_completions_batch_return(output, MODEL_NAME)
+        assert base_rps_frac_chars_in_dupe_ngrams(
+            6,
+            output.get('choices')[0].get('message').get('content')) > 80
 
     def test_chat_completions_topp_min_batch(self):
         api_client = APIClient(BASE_URL)
@@ -336,7 +271,7 @@ def test_chat_completions_topp_min_batch(self):
             for output in api_client.chat_completions_v1(
                     model=MODEL_NAME, messages='Shanghai is', top_p=0.1):
                 outputList.append(output)
-            assert_chat_completions_batch_return(output)
+            assert_chat_completions_batch_return(output, MODEL_NAME)
         assert outputList[0].get('choices')[0].get('message').get(
             'content') == outputList[1].get('choices')[0].get('message').get(
                 'content')
@@ -344,28 +279,6 @@ def test_chat_completions_topp_min_batch(self):
             'content') == outputList[2].get('choices')[0].get('message').get(
                 'content')
 
-    def test_chat_completions_topp_min_stream(self):
-        api_client = APIClient(BASE_URL)
-        responseList = []
-        for i in range(3):
-            outputList = []
-            response = ''
-            for output in api_client.chat_completions_v1(
-                    model=MODEL_NAME,
-                    messages='Hi, pls intro yourself',
-                    stream=True,
-                    top_p=0.1):
-                outputList.append(output)
-            assert_chat_completions_stream_return(outputList[0], True, False)
-            assert_chat_completions_stream_return(outputList[-1], False, True)
-            for index in range(1, len(outputList) - 1):
-                assert_chat_completions_stream_return(outputList[index])
-                response += outputList[index].get('choices')[0].get(
-                    'delta').get('content')
-            responseList.append(response)
-        assert responseList[0] == responseList[1]
-        assert responseList[1] == responseList[2]
-
     def test_chat_completions_mis_model_name_batch(self):
         api_client = APIClient(BASE_URL)
         for output in api_client.chat_completions_v1(
@@ -401,25 +314,10 @@ def test_chat_completions_longinput_batch(self):
         assert output.get('choices')[0].get('finish_reason') == 'length'
         assert output.get('choices')[0].get('message').get('content') == ''
 
-    def test_chat_completions_longinput_stream(self):
-        api_client = APIClient(BASE_URL)
-        outputList = []
-        for output in api_client.chat_completions_v1(
-                model=MODEL_NAME,
-                messages='Hi, pls intro yourself' * 10000,
-                stream=True,
-                temperature=0.01):
-            outputList.append(output)
-        assert_chat_completions_stream_return(outputList[0], True, False)
-        assert outputList[1].get('choices')[0].get('finish_reason') == 'length'
-        assert outputList[1].get('choices')[0].get('delta').get(
-            'content') == ''
-        assert len(outputList) == 2
-
 
 @pytest.mark.order(7)
 @pytest.mark.restful_interface_turbomind
-@pytest.mark.flaky(reruns=2)
+@pytest.mark.flaky(reruns=0)
 class TestRestfulInterfaceChatInteractive:
 
     def test_chat_interactive_check_return_batch1(self):
@@ -483,37 +381,6 @@ def test_chat_interactive_check_return_stream2(self):
             assert_chat_interactive_stream_return(outputList[index],
                                                   index=index)
 
-    def test_chat_interactive_ignore_eos_batch(self):
-        api_client = APIClient(BASE_URL)
-        for output in api_client.chat_interactive_v1(
-                prompt='Hi, what is your name?',
-                ignore_eos=True,
-                request_output_len=100,
-                temperature=0.01):
-            continue
-        assert_chat_interactive_batch_return(output)
-        assert output.get('tokens') == 101
-        assert output.get('finish_reason') == 'length'
-
-    def test_chat_interactive_ignore_eos_stream(self):
-        api_client = APIClient(BASE_URL)
-        outputList = []
-        for output in api_client.chat_interactive_v1(
-                prompt='Hi, what is your name?',
-                ignore_eos=True,
-                stream=True,
-                request_output_len=100,
-                temperature=0.01):
-            outputList.append(output)
-        assert_chat_interactive_stream_return(outputList[-1],
-                                              True,
-                                              index=len(outputList) - 2)
-        for index in range(0, len(outputList) - 1):
-            assert_chat_interactive_stream_return(outputList[index],
-                                                  index=index)
-        assert output.get('finish_reason') == 'length'
-        assert len(outputList) == 102
-
     def test_chat_interactive_stopwords_batch(self):
         api_client = APIClient(BASE_URL)
         for output in api_client.chat_interactive_v1(prompt='Shanghai is',
@@ -603,35 +470,6 @@ def test_chat_interactive_special_words_batch(self):
         assert_chat_interactive_batch_return(output)
         assert '<|action_start|><|interpreter|>' not in output.get('text')
 
-    def test_chat_interactive_max_tokens_batch(self):
-        api_client = APIClient(BASE_URL)
-        for output in api_client.chat_interactive_v1(
-                prompt='Hi, pls intro yourself',
-                request_output_len=5,
-                temperature=0.01):
-            continue
-        assert_chat_interactive_batch_return(output)
-        assert output.get('finish_reason') == 'length'
-        assert output.get('tokens') == 6
-
-    def test_chat_interactive_max_tokens_stream(self):
-        api_client = APIClient(BASE_URL)
-        outputList = []
-        for output in api_client.chat_interactive_v1(
-                prompt='Hi, pls intro yourself',
-                stream=True,
-                request_output_len=5,
-                temperature=0.01):
-            outputList.append(output)
-        assert_chat_interactive_stream_return(outputList[-1],
-                                              True,
-                                              index=len(outputList) - 2)
-        for index in range(0, len(outputList) - 1):
-            assert_chat_interactive_stream_return(outputList[index],
-                                                  index=index)
-        assert output.get('finish_reason') == 'length'
-        assert len(outputList) == 7
-
     def test_chat_interactive_repetition_penalty_batch(self):
         api_client = APIClient(BASE_URL)
         for output in api_client.chat_interactive_v1(prompt='Shanghai is',
@@ -640,7 +478,7 @@ def test_chat_interactive_repetition_penalty_batch(self):
                                                      request_output_len=512):
             continue
         assert_chat_interactive_batch_return(output)
-        assert 'a 上海 is a 上海, ' * 5 in output.get('text')
+        assert base_rps_frac_chars_in_dupe_ngrams(6, output.get('text')) > 90
 
     def test_chat_interactive_with_history_batch(self):
         api_client = APIClient(BASE_URL)
@@ -715,7 +553,6 @@ def test_chat_interactive_topp_min_stream(self):
         assert responseList[0] == responseList[1]
         assert responseList[1] == responseList[2]
 
-    @pytest.mark.tmp
     def test_chat_interactive_longinput_batch(self):
         api_client = APIClient(BASE_URL)
         for output in api_client.chat_interactive_v1(
@@ -724,7 +561,6 @@ def test_chat_interactive_longinput_batch(self):
         assert output.get('finish_reason') == 'length'
         assert output.get('text') == ''
 
-    @pytest.mark.tmp
     def test_chat_interactive_longinput_stream(self):
         api_client = APIClient(BASE_URL)
         outputList = []
@@ -736,72 +572,3 @@ def test_chat_interactive_longinput_stream(self):
         assert outputList[0].get('finish_reason') == 'length', outputList
         assert outputList[0].get('text') == ''
         assert len(outputList) == 1
-
-
-def assert_chat_completions_batch_return(output):
-    assert output.get('usage').get('prompt_tokens') > 0
-    assert output.get('usage').get('total_tokens') > 0
-    assert output.get('usage').get('completion_tokens') > 0
-    assert output.get('usage').get('completion_tokens') + output.get(
-        'usage').get('prompt_tokens') == output.get('usage').get(
-            'total_tokens')
-    assert output.get('id') is not None
-    assert output.get('object') == 'chat.completion'
-    assert output.get('model') == MODEL_NAME
-    output_message = output.get('choices')
-    assert len(output_message) == 1
-    for message in output_message:
-        assert message.get('finish_reason') in ['stop', 'length']
-        assert message.get('index') == 0
-        assert len(message.get('message').get('content')) > 0
-        assert message.get('message').get('role') == 'assistant'
-
-
-def assert_chat_completions_stream_return(output,
-                                          is_first: bool = False,
-                                          is_last: bool = False):
-    assert output.get('id') is not None
-    if is_first is False:
-        assert output.get('object') == 'chat.completion.chunk'
-    assert output.get('model') == MODEL_NAME
-    output_message = output.get('choices')
-    assert len(output_message) == 1
-    for message in output_message:
-        assert message.get('delta').get('role') == 'assistant'
-        assert message.get('index') == 0
-        if is_last is False:
-            assert message.get('finish_reason') is None
-        if is_first is False and is_last is False:
-            assert len(message.get('delta').get('content')) >= 0
-        if is_last is True:
-            assert len(message.get('delta').get('content')) == 0
-            assert message.get('finish_reason') in ['stop', 'length']
-
-
-def assert_chat_interactive_batch_return(output):
-    assert output.get('input_tokens') > 0
-    assert output.get('tokens') > 0
-    assert output.get('history_tokens') >= 0
-    assert output.get('finish_reason') in ['stop', 'length']
-    assert len(output.get('text')) > 0
-
-
-def assert_chat_interactive_stream_return(output,
-                                          is_last: bool = False,
-                                          is_text_empty: bool = False,
-                                          index: int = None):
-    assert output.get('input_tokens') > 0
-    if index is not None:
-        assert output.get('tokens') >= index + 1 and output.get(
-            'tokens') <= index + 6
-    assert output.get('tokens') > 0
-    assert output.get('history_tokens') >= 0
-    if is_last:
-        assert len(output.get('text')) >= 0
-        assert output.get('finish_reason') in ['stop', 'length']
-    elif is_text_empty:
-        assert len(output.get('text')) == 0
-        assert output.get('finish_reason') is None
-    else:
-        assert len(output.get('text')) >= 0
-        assert output.get('finish_reason') is None
diff --git a/autotest/interface/restful/test_restful_interface_func_pytorch.py b/autotest/interface/restful/test_restful_interface_func_pytorch.py
new file mode 100644
index 0000000000..b026e6360b
--- /dev/null
+++ b/autotest/interface/restful/test_restful_interface_func_pytorch.py
@@ -0,0 +1,286 @@
+import pytest
+from utils.content_detect_utils import base_rps_frac_chars_in_dupe_ngrams
+from utils.restful_return_check import (assert_chat_completions_batch_return,
+                                        assert_chat_completions_stream_return,
+                                        assert_chat_interactive_batch_return,
+                                        assert_chat_interactive_stream_return)
+
+from lmdeploy.serve.openai.api_client import APIClient
+
+BASE_HTTP_URL = 'http://10.140.0.187'
+DEFAULT_PORT = 23334
+MODEL = 'internlm/internlm2-chat-20b'
+MODEL_NAME = 'internlm2-chat-20b'
+BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)])
+
+
+@pytest.mark.order(7)
+@pytest.mark.restful_interface_pytorch
+@pytest.mark.flaky(reruns=2)
+class TestRestfulInterfaceChatCompletions:
+
+    def test_chat_completions_ignore_eos_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, what is your name?',
+                ignore_eos=True,
+                max_tokens=100,
+                temperature=0.01):
+            continue
+        assert_chat_completions_batch_return(output, MODEL_NAME)
+        assert output.get('usage').get(
+            'completion_tokens') == 101 or output.get('usage').get(
+                'completion_tokens') == 100
+        assert output.get('choices')[0].get('finish_reason') == 'length'
+
+    def test_chat_completions_ignore_eos_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, what is your name?',
+                ignore_eos=True,
+                stream=True,
+                max_tokens=100,
+                temperature=0.01):
+            outputList.append(output)
+
+        assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
+                                              False)
+        assert_chat_completions_stream_return(outputList[-1], MODEL_NAME,
+                                              False, True)
+        for index in range(1, len(outputList) - 1):
+            assert_chat_completions_stream_return(outputList[index],
+                                                  MODEL_NAME)
+        assert outputList[-1].get('choices')[0].get(
+            'finish_reason') == 'length'
+        assert len(outputList) == 102
+
+    def test_chat_completions_max_tokens_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, pls intro yourself',
+                max_tokens=5,
+                temperature=0.01):
+            continue
+        assert_chat_completions_batch_return(output, MODEL_NAME)
+        assert output.get('choices')[0].get('finish_reason') == 'length'
+        assert output.get('usage').get('completion_tokens') == 6 or output.get(
+            'usage').get('completion_tokens') == 5
+
+    def test_chat_completions_max_tokens_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, pls intro yourself',
+                stream=True,
+                max_tokens=5,
+                temperature=0.01):
+            outputList.append(output)
+        assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
+                                              False)
+        assert_chat_completions_stream_return(outputList[-1], MODEL_NAME,
+                                              False, True)
+        for index in range(1, len(outputList) - 1):
+            assert_chat_completions_stream_return(outputList[index],
+                                                  MODEL_NAME)
+        assert outputList[-1].get('choices')[0].get(
+            'finish_reason') == 'length'
+        assert len(outputList) == 7
+
+    def test_chat_completions_repetition_penalty_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        response = ''
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, pls intro yourself',
+                stream=True,
+                repetition_penalty=0.1,
+                temperature=0.01,
+                max_tokens=200):
+            outputList.append(output)
+        assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
+                                              False)
+        assert_chat_completions_stream_return(outputList[-1], MODEL_NAME,
+                                              False, True)
+        for index in range(1, len(outputList) - 1):
+            assert_chat_completions_stream_return(outputList[index],
+                                                  MODEL_NAME)
+            response += outputList[index].get('choices')[0].get('delta').get(
+                'content')
+        assert base_rps_frac_chars_in_dupe_ngrams(6, response) > 90
+
+    def test_chat_completions_topp_min_batch(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for i in range(3):
+            for output in api_client.chat_completions_v1(
+                    model=MODEL_NAME,
+                    messages='Shanghai is',
+                    top_p=0.1,
+                    temperature=0.01):
+                outputList.append(output)
+            assert_chat_completions_batch_return(output, MODEL_NAME)
+            print(output)
+        assert outputList[0].get('choices')[0].get('message').get(
+            'content') == outputList[1].get('choices')[0].get('message').get(
+                'content')
+        assert outputList[1].get('choices')[0].get('message').get(
+            'content') == outputList[2].get('choices')[0].get('message').get(
+                'content')
+
+    def test_chat_completions_topp_min_stream(self):
+        api_client = APIClient(BASE_URL)
+        responseList = []
+        for i in range(3):
+            outputList = []
+            response = ''
+            for output in api_client.chat_completions_v1(
+                    model=MODEL_NAME,
+                    messages='Hi, pls intro yourself',
+                    stream=True,
+                    top_p=0.1,
+                    temperature=0.01):
+                outputList.append(output)
+            assert_chat_completions_stream_return(outputList[0], MODEL_NAME,
+                                                  True, False)
+            assert_chat_completions_stream_return(outputList[-1], MODEL_NAME,
+                                                  False, True)
+            for index in range(1, len(outputList) - 1):
+                assert_chat_completions_stream_return(outputList[index],
+                                                      MODEL_NAME)
+                response += outputList[index].get('choices')[0].get(
+                    'delta').get('content')
+            responseList.append(response)
+        assert responseList[0] == responseList[1]
+        assert responseList[1] == responseList[2]
+
+    def test_chat_completions_longinput_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, pls intro yourself' * 10000,
+                stream=True,
+                temperature=0.01):
+            outputList.append(output)
+        assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
+                                              False)
+        assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
+                                              False)
+        for index in range(1, len(outputList) - 1):
+            assert_chat_completions_stream_return(outputList[index],
+                                                  MODEL_NAME)
+        assert outputList[1].get('choices')[0].get('finish_reason') == 'length'
+        assert outputList[1].get('choices')[0].get('delta').get(
+            'content') == ''
+        assert len(outputList) == 2
+
+
+@pytest.mark.order(7)
+@pytest.mark.restful_interface_turbomind
+@pytest.mark.flaky(reruns=2)
+class TestRestfulInterfaceChatInteractive:
+
+    def test_chat_interactive_ignore_eos_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_interactive_v1(
+                prompt='Hi, what is your name?',
+                ignore_eos=True,
+                request_output_len=100,
+                temperature=0.01):
+            continue
+        assert_chat_interactive_batch_return(output)
+        assert output.get('tokens') == 100
+        assert output.get('finish_reason') == 'length'
+
+    def test_chat_interactive_ignore_eos_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_interactive_v1(
+                prompt='Hi, what is your name?',
+                ignore_eos=True,
+                stream=True,
+                request_output_len=100,
+                temperature=0.01):
+            outputList.append(output)
+            print(output)
+        assert_chat_interactive_stream_return(outputList[-1],
+                                              True,
+                                              index=len(outputList) - 2)
+        for index in range(0, len(outputList) - 1):
+            assert_chat_interactive_stream_return(outputList[index],
+                                                  index=index)
+        assert output.get('finish_reason') == 'length'
+        assert len(outputList) == 101
+
+    def test_chat_interactive_max_tokens_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_interactive_v1(
+                prompt='Hi, pls intro yourself',
+                request_output_len=5,
+                temperature=0.01):
+            continue
+        assert_chat_interactive_batch_return(output)
+        assert output.get('finish_reason') == 'length'
+        assert output.get('tokens') == 5
+
+    def test_chat_interactive_max_tokens_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_interactive_v1(
+                prompt='Hi, pls intro yourself',
+                stream=True,
+                request_output_len=5,
+                temperature=0.01):
+            outputList.append(output)
+        assert_chat_interactive_stream_return(outputList[-1],
+                                              True,
+                                              index=len(outputList) - 2)
+        for index in range(0, len(outputList) - 1):
+            assert_chat_interactive_stream_return(outputList[index],
+                                                  index=index)
+        assert output.get('finish_reason') == 'length'
+        assert len(outputList) == 6
+
+    def test_chat_interactive_topp_min_batch(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for i in range(3):
+            for output in api_client.chat_interactive_v1(prompt='Shanghai is',
+                                                         top_p=0.01,
+                                                         temperature=0.01):
+                continue
+            assert_chat_interactive_batch_return(output)
+            outputList.append(output)
+            print(output)
+        assert outputList[0] == outputList[1]
+        assert outputList[1] == outputList[2]
+
+    def test_chat_interactive_topp_min_stream(self):
+        api_client = APIClient(BASE_URL)
+        responseList = []
+        for i in range(3):
+            outputList = []
+            response = ''
+            for output in api_client.chat_interactive_v1(
+                    model=MODEL_NAME,
+                    prompt='Hi, pls intro yourself',
+                    stream=True,
+                    top_p=0.01,
+                    temperature=0.01):
+                outputList.append(output)
+            assert_chat_interactive_stream_return(outputList[-1],
+                                                  True,
+                                                  index=len(outputList) - 2)
+            for index in range(0, len(outputList) - 1):
+                assert_chat_interactive_stream_return(outputList[index],
+                                                      index=index)
+                response += outputList[index].get('text')
+            responseList.append(response)
+        assert responseList[0] == responseList[1]
+        assert responseList[1] == responseList[2]
diff --git a/autotest/interface/restful/test_restful_interface_func_turbomind.py b/autotest/interface/restful/test_restful_interface_func_turbomind.py
new file mode 100644
index 0000000000..33ff8e2dfa
--- /dev/null
+++ b/autotest/interface/restful/test_restful_interface_func_turbomind.py
@@ -0,0 +1,269 @@
+import pytest
+from utils.content_detect_utils import base_rps_frac_chars_in_dupe_ngrams
+from utils.restful_return_check import (assert_chat_completions_batch_return,
+                                        assert_chat_completions_stream_return,
+                                        assert_chat_interactive_batch_return,
+                                        assert_chat_interactive_stream_return)
+
+from lmdeploy.serve.openai.api_client import APIClient
+
+BASE_HTTP_URL = 'http://10.140.0.187'
+DEFAULT_PORT = 23333
+MODEL = 'internlm/internlm2-chat-20b'
+MODEL_NAME = 'internlm2-chat-20b'
+BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)])
+
+
+@pytest.mark.order(7)
+@pytest.mark.turbomind
+@pytest.mark.flaky(reruns=2)
+class TestRestfulInterfaceChatCompletions:
+
+    def test_chat_completions_ignore_eos_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, what is your name?',
+                ignore_eos=True,
+                max_tokens=100,
+                temperature=0.01):
+            continue
+        assert_chat_completions_batch_return(output, MODEL_NAME)
+        assert output.get('usage').get('completion_tokens') == 101
+        assert output.get('choices')[0].get('finish_reason') == 'length'
+
+    def test_chat_completions_ignore_eos_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, what is your name?',
+                ignore_eos=True,
+                stream=True,
+                max_tokens=100,
+                temperature=0.01):
+            outputList.append(output)
+
+        assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
+                                              False)
+        assert_chat_completions_stream_return(outputList[-1], MODEL_NAME,
+                                              False, True)
+        for index in range(1, len(outputList) - 1):
+            assert_chat_completions_stream_return(outputList[index],
+                                                  MODEL_NAME)
+        assert outputList[-1].get('choices')[0].get(
+            'finish_reason') == 'length'
+        assert len(outputList) == 103
+
+    def test_chat_completions_max_tokens_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, pls intro yourself',
+                max_tokens=5,
+                temperature=0.01):
+            continue
+        assert_chat_completions_batch_return(output, MODEL_NAME)
+        assert output.get('choices')[0].get('finish_reason') == 'length'
+        assert output.get('usage').get('completion_tokens') == 6
+
+    def test_chat_completions_max_tokens_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, pls intro yourself',
+                stream=True,
+                max_tokens=5,
+                temperature=0.01):
+            outputList.append(output)
+        assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
+                                              False)
+        assert_chat_completions_stream_return(outputList[-1], MODEL_NAME,
+                                              False, True)
+        for index in range(1, len(outputList) - 1):
+            assert_chat_completions_stream_return(outputList[index],
+                                                  MODEL_NAME)
+        assert outputList[-1].get('choices')[0].get(
+            'finish_reason') == 'length'
+        assert len(outputList) == 8
+
+    def test_chat_completions_repetition_penalty_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        response = ''
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, pls intro yourself',
+                stream=True,
+                repetition_penalty=0.1,
+                temperature=0.01,
+                max_tokens=200):
+            outputList.append(output)
+        assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
+                                              False)
+        assert_chat_completions_stream_return(outputList[-1], MODEL_NAME,
+                                              False, True)
+        for index in range(1, len(outputList) - 1):
+            assert_chat_completions_stream_return(outputList[index],
+                                                  MODEL_NAME)
+            response += outputList[index].get('choices')[0].get('delta').get(
+                'content')
+        assert base_rps_frac_chars_in_dupe_ngrams(6, response) > 90
+
+    def test_chat_completions_topp_min_batch(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for i in range(3):
+            for output in api_client.chat_completions_v1(
+                    model=MODEL_NAME, messages='Shanghai is', top_p=0.1):
+                outputList.append(output)
+            assert_chat_completions_batch_return(output, MODEL_NAME)
+        assert outputList[0].get('choices')[0].get('message').get(
+            'content') == outputList[1].get('choices')[0].get('message').get(
+                'content')
+        assert outputList[1].get('choices')[0].get('message').get(
+            'content') == outputList[2].get('choices')[0].get('message').get(
+                'content')
+
+    def test_chat_completions_topp_min_stream(self):
+        api_client = APIClient(BASE_URL)
+        responseList = []
+        for i in range(3):
+            outputList = []
+            response = ''
+            for output in api_client.chat_completions_v1(
+                    model=MODEL_NAME,
+                    messages='Hi, pls intro yourself',
+                    stream=True,
+                    top_p=0.1):
+                outputList.append(output)
+            assert_chat_completions_stream_return(outputList[0], MODEL_NAME,
+                                                  True, False)
+            assert_chat_completions_stream_return(outputList[-1], MODEL_NAME,
+                                                  False, True)
+            for index in range(1, len(outputList) - 1):
+                assert_chat_completions_stream_return(outputList[index],
+                                                      MODEL_NAME)
+                response += outputList[index].get('choices')[0].get(
+                    'delta').get('content')
+            responseList.append(response)
+        assert responseList[0] == responseList[1]
+        assert responseList[1] == responseList[2]
+
+    def test_chat_completions_longinput_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_completions_v1(
+                model=MODEL_NAME,
+                messages='Hi, pls intro yourself' * 10000,
+                stream=True,
+                temperature=0.01):
+            outputList.append(output)
+        assert_chat_completions_stream_return(outputList[0], MODEL_NAME, True,
+                                              False)
+        assert outputList[1].get('choices')[0].get('finish_reason') == 'length'
+        assert outputList[1].get('choices')[0].get('delta').get(
+            'content') == ''
+        assert len(outputList) == 2
+
+
+@pytest.mark.order(7)
+@pytest.mark.restful_interface_turbomind
+@pytest.mark.flaky(reruns=2)
+class TestRestfulInterfaceChatInteractive:
+
+    def test_chat_interactive_ignore_eos_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_interactive_v1(
+                prompt='Hi, what is your name?',
+                ignore_eos=True,
+                request_output_len=100,
+                temperature=0.01):
+            continue
+        assert_chat_interactive_batch_return(output)
+        assert output.get('tokens') == 101
+        assert output.get('finish_reason') == 'length'
+
+    def test_chat_interactive_ignore_eos_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_interactive_v1(
+                prompt='Hi, what is your name?',
+                ignore_eos=True,
+                stream=True,
+                request_output_len=100,
+                temperature=0.01):
+            outputList.append(output)
+        assert_chat_interactive_stream_return(outputList[-1],
+                                              True,
+                                              index=len(outputList) - 2)
+        for index in range(0, len(outputList) - 1):
+            assert_chat_interactive_stream_return(outputList[index],
+                                                  index=index)
+        assert output.get('finish_reason') == 'length'
+        assert len(outputList) == 102
+
+    def test_chat_interactive_max_tokens_batch(self):
+        api_client = APIClient(BASE_URL)
+        for output in api_client.chat_interactive_v1(
+                prompt='Hi, pls intro yourself',
+                request_output_len=5,
+                temperature=0.01):
+            continue
+        assert_chat_interactive_batch_return(output)
+        assert output.get('finish_reason') == 'length'
+        assert output.get('tokens') == 6
+
+    def test_chat_interactive_max_tokens_stream(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for output in api_client.chat_interactive_v1(
+                prompt='Hi, pls intro yourself',
+                stream=True,
+                request_output_len=5,
+                temperature=0.01):
+            outputList.append(output)
+        assert_chat_interactive_stream_return(outputList[-1],
+                                              True,
+                                              index=len(outputList) - 2)
+        for index in range(0, len(outputList) - 1):
+            assert_chat_interactive_stream_return(outputList[index],
+                                                  index=index)
+        assert output.get('finish_reason') == 'length'
+        assert len(outputList) == 7
+
+    def test_chat_interactive_topp_min_batch(self):
+        api_client = APIClient(BASE_URL)
+        outputList = []
+        for i in range(3):
+            for output in api_client.chat_interactive_v1(prompt='Shanghai is',
+                                                         top_p=0.01):
+                continue
+            assert_chat_interactive_batch_return(output)
+            outputList.append(output)
+        assert outputList[0] == outputList[1]
+        assert outputList[1] == outputList[2]
+
+    def test_chat_interactive_topp_min_stream(self):
+        api_client = APIClient(BASE_URL)
+        responseList = []
+        for i in range(3):
+            outputList = []
+            response = ''
+            for output in api_client.chat_interactive_v1(
+                    model=MODEL_NAME,
+                    prompt='Hi, pls intro yourself',
+                    stream=True,
+                    top_p=0.01):
+                outputList.append(output)
+            assert_chat_interactive_stream_return(outputList[-1],
+                                                  True,
+                                                  index=len(outputList) - 2)
+            for index in range(0, len(outputList) - 1):
+                assert_chat_interactive_stream_return(outputList[index],
+                                                      index=index)
+                response += outputList[index].get('text')
+            responseList.append(response)
+        assert responseList[0] == responseList[1]
+        assert responseList[1] == responseList[2]
diff --git a/autotest/utils/content_detect_utils.py b/autotest/utils/content_detect_utils.py
new file mode 100644
index 0000000000..7611c98ad3
--- /dev/null
+++ b/autotest/utils/content_detect_utils.py
@@ -0,0 +1,94 @@
+import re
+import string
+import unicodedata
+from collections import Counter
+
+import numpy
+
+TRANSLATION_TABLE_PUNCTUATION = str.maketrans('', '', string.punctuation)
+
+
+def normalize(text: str,
+              remove_punct: bool = True,
+              lowercase: bool = True,
+              nfd_unicode: bool = True,
+              white_space: bool = True) -> str:
+    """Normalize the text by lowercasing and removing punctuation."""
+    # remove punctuation
+    if remove_punct:
+        text = text.translate(TRANSLATION_TABLE_PUNCTUATION)
+
+    # lowercase
+    if lowercase:
+        text = text.lower()
+
+    if white_space:
+        text = text.strip()
+        text = re.sub(r'\s+', ' ', text)
+
+    # NFD unicode normalization
+    if nfd_unicode:
+        text = unicodedata.normalize('NFD', text)
+
+    return text
+
+
+def form_ngrams(sequence, n):
+    history = []
+    # build the first ngram, yielding only when we have a full ngram
+    while n > 1:
+        try:
+            next_item = next(sequence)
+        except StopIteration:
+            # no more data, terminate the generator
+            return
+        history.append(next_item)
+        n -= 1
+
+    # yield each ngram we have, then add the next item and repeat
+    for item in sequence:
+        history.append(item)
+        yield tuple(history)
+        del history[0]
+
+
+def base_rps_frac_chars_in_dupe_ngrams(NGRAM_SIZE: int = 5, content: str = ''):
+    """Base class for calculating the fraction of characters in duplicate word
+    N-grams.
+
+    This operates on the lower-cased, punctuation removed content. The function
+    also ensures that characters in overlapping ngrams are only counted once.
+    """
+    normalized_content = normalize(content)
+    normalized_words = tuple(normalized_content.split())
+
+    if len(normalized_words) < NGRAM_SIZE:
+        return 0
+
+    # fetch the ngrams from the document if they exist, otherwise
+    # compute them
+    doc_n_grams = tuple(form_ngrams(iter(normalized_words), NGRAM_SIZE))
+
+    # keep only ngrams which occur at least twice
+    ngram_dupes = {
+        ngram
+        for ngram, count in Counter(doc_n_grams).items() if count > 1
+    }
+
+    duplicated_grams = numpy.zeros(len(normalized_words), dtype=int)
+    i = 0
+    for ngram in doc_n_grams:
+        if ngram in ngram_dupes:
+            duplicated_grams[i:i + NGRAM_SIZE] = 1
+
+        i += 1
+
+    word_lengths = numpy.array(list(map(len, normalized_words)))
+    chars_duped = numpy.sum(word_lengths * duplicated_grams)
+    total_chars = numpy.sum(word_lengths)
+
+    if total_chars == 0:
+        return 0
+
+    score = float(chars_duped / total_chars) * 100
+    return score
diff --git a/autotest/utils/restful_return_check.py b/autotest/utils/restful_return_check.py
new file mode 100644
index 0000000000..b7832047d5
--- /dev/null
+++ b/autotest/utils/restful_return_check.py
@@ -0,0 +1,68 @@
+def assert_chat_completions_batch_return(output, model_name):
+    assert output.get('usage').get('prompt_tokens') > 0
+    assert output.get('usage').get('total_tokens') > 0
+    assert output.get('usage').get('completion_tokens') > 0
+    assert output.get('usage').get('completion_tokens') + output.get(
+        'usage').get('prompt_tokens') == output.get('usage').get(
+            'total_tokens')
+    assert output.get('id') is not None
+    assert output.get('object') == 'chat.completion'
+    assert output.get('model') == model_name
+    output_message = output.get('choices')
+    assert len(output_message) == 1
+    for message in output_message:
+        assert message.get('finish_reason') in ['stop', 'length']
+        assert message.get('index') == 0
+        assert len(message.get('message').get('content')) > 0
+        assert message.get('message').get('role') == 'assistant'
+
+
+def assert_chat_completions_stream_return(output,
+                                          model_name,
+                                          is_first: bool = False,
+                                          is_last: bool = False):
+    assert output.get('id') is not None
+    if is_first is False:
+        assert output.get('object') == 'chat.completion.chunk'
+    assert output.get('model') == model_name
+    output_message = output.get('choices')
+    assert len(output_message) == 1
+    for message in output_message:
+        assert message.get('delta').get('role') == 'assistant'
+        assert message.get('index') == 0
+        if is_last is False:
+            assert message.get('finish_reason') is None
+        if is_first is False and is_last is False:
+            assert len(message.get('delta').get('content')) >= 0
+        if is_last is True:
+            assert len(message.get('delta').get('content')) == 0
+            assert message.get('finish_reason') in ['stop', 'length']
+
+
+def assert_chat_interactive_batch_return(output):
+    assert output.get('input_tokens') > 0
+    assert output.get('tokens') > 0
+    assert output.get('history_tokens') >= 0
+    assert output.get('finish_reason') in ['stop', 'length']
+    assert len(output.get('text')) > 0
+
+
+def assert_chat_interactive_stream_return(output,
+                                          is_last: bool = False,
+                                          is_text_empty: bool = False,
+                                          index: int = None):
+    assert output.get('input_tokens') > 0
+    if index is not None:
+        assert output.get('tokens') >= index and output.get(
+            'tokens') <= index + 6
+    assert output.get('tokens') > 0
+    assert output.get('history_tokens') >= 0
+    if is_last:
+        assert len(output.get('text')) >= 0
+        assert output.get('finish_reason') in ['stop', 'length']
+    elif is_text_empty:
+        assert len(output.get('text')) == 0
+        assert output.get('finish_reason') is None
+    else:
+        assert len(output.get('text')) >= 0
+        assert output.get('finish_reason') is None

From 72865b1f556475f14be1c8a4004521eb78651195 Mon Sep 17 00:00:00 2001
From: zhulin1 <zhulin1@pjlab.org.cn>
Date: Mon, 11 Mar 2024 14:23:41 +0800
Subject: [PATCH 4/7] update feishu notify

---
 .github/workflows/daily_ete_test.yml          |  6 ++++-
 .../test_restful_interface_func_common.py     | 25 +++++++++++--------
 .../test_restful_interface_func_pytorch.py    |  8 +++---
 .../test_restful_interface_func_turbomind.py  |  8 +++---
 4 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/daily_ete_test.yml b/.github/workflows/daily_ete_test.yml
index 5916957079..728a467ab8 100644
--- a/.github/workflows/daily_ete_test.yml
+++ b/.github/workflows/daily_ete_test.yml
@@ -323,4 +323,8 @@ jobs:
       - name: fail notify
         if: contains(needs.*.result, 'failure')
         run: |
-          curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- daily test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.FEISHU_USER_ID }}'"}]]}}}}'  ${{ secrets.FEISHU_WEBHOOK_URL }}
+          curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- Daily test failed！！！","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.FEISHU_USER_ID }}'"}]]}}}}'  ${{ secrets.FEISHU_WEBHOOK_URL }}
+      - name: success notify
+        if: needs.test_functions.result=='success' && needs.test_triton.result=='success'
+        run: |
+          curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- Daily test success","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"}]]}}}}'  ${{ secrets.FEISHU_WEBHOOK_URL }}
diff --git a/autotest/interface/restful/test_restful_interface_func_common.py b/autotest/interface/restful/test_restful_interface_func_common.py
index ac157ef3a6..e54b6bba96 100644
--- a/autotest/interface/restful/test_restful_interface_func_common.py
+++ b/autotest/interface/restful/test_restful_interface_func_common.py
@@ -12,16 +12,17 @@
 
 from lmdeploy.serve.openai.api_client import APIClient, get_model_list
 
-BASE_HTTP_URL = 'http://10.140.0.187'
-DEFAULT_PORT = 23334
+BASE_HTTP_URL = 'http://localhost'
+DEFAULT_PORT = 23333
 MODEL = 'internlm/internlm2-chat-20b'
 MODEL_NAME = 'internlm2-chat-20b'
 BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)])
 
 
-@pytest.mark.order(7)
-@pytest.mark.restful_interface_common
-@pytest.mark.flaky(reruns=0)
+@pytest.mark.order(8)
+@pytest.mark.turbomind
+@pytest.mark.pytorch
+@pytest.mark.flaky(reruns=2)
 class TestRestfulInterfaceBase:
 
     def test_issue1232(self):
@@ -87,9 +88,10 @@ def test_encode(self):
         assert input_ids5 == input_ids2 * 100
 
 
-@pytest.mark.order(7)
-@pytest.mark.restful_interface_common
-@pytest.mark.flaky(reruns=0)
+@pytest.mark.order(8)
+@pytest.mark.turbomind
+@pytest.mark.pytorch
+@pytest.mark.flaky(reruns=2)
 class TestRestfulInterfaceChatCompletions:
 
     def test_chat_completions_check_return_batch1(self):
@@ -315,9 +317,10 @@ def test_chat_completions_longinput_batch(self):
         assert output.get('choices')[0].get('message').get('content') == ''
 
 
-@pytest.mark.order(7)
-@pytest.mark.restful_interface_turbomind
-@pytest.mark.flaky(reruns=0)
+@pytest.mark.order(8)
+@pytest.mark.turbomind
+@pytest.mark.pytorch
+@pytest.mark.flaky(reruns=2)
 class TestRestfulInterfaceChatInteractive:
 
     def test_chat_interactive_check_return_batch1(self):
diff --git a/autotest/interface/restful/test_restful_interface_func_pytorch.py b/autotest/interface/restful/test_restful_interface_func_pytorch.py
index b026e6360b..2a007f21f8 100644
--- a/autotest/interface/restful/test_restful_interface_func_pytorch.py
+++ b/autotest/interface/restful/test_restful_interface_func_pytorch.py
@@ -7,15 +7,15 @@
 
 from lmdeploy.serve.openai.api_client import APIClient
 
-BASE_HTTP_URL = 'http://10.140.0.187'
-DEFAULT_PORT = 23334
+BASE_HTTP_URL = 'http://localhost'
+DEFAULT_PORT = 23333
 MODEL = 'internlm/internlm2-chat-20b'
 MODEL_NAME = 'internlm2-chat-20b'
 BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)])
 
 
 @pytest.mark.order(7)
-@pytest.mark.restful_interface_pytorch
+@pytest.mark.pytorch
 @pytest.mark.flaky(reruns=2)
 class TestRestfulInterfaceChatCompletions:
 
@@ -182,7 +182,7 @@ def test_chat_completions_longinput_stream(self):
 
 
 @pytest.mark.order(7)
-@pytest.mark.restful_interface_turbomind
+@pytest.mark.pytorch
 @pytest.mark.flaky(reruns=2)
 class TestRestfulInterfaceChatInteractive:
 
diff --git a/autotest/interface/restful/test_restful_interface_func_turbomind.py b/autotest/interface/restful/test_restful_interface_func_turbomind.py
index 33ff8e2dfa..013c157db0 100644
--- a/autotest/interface/restful/test_restful_interface_func_turbomind.py
+++ b/autotest/interface/restful/test_restful_interface_func_turbomind.py
@@ -7,14 +7,14 @@
 
 from lmdeploy.serve.openai.api_client import APIClient
 
-BASE_HTTP_URL = 'http://10.140.0.187'
+BASE_HTTP_URL = 'http://localhost'
 DEFAULT_PORT = 23333
 MODEL = 'internlm/internlm2-chat-20b'
 MODEL_NAME = 'internlm2-chat-20b'
 BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)])
 
 
-@pytest.mark.order(7)
+@pytest.mark.order(8)
 @pytest.mark.turbomind
 @pytest.mark.flaky(reruns=2)
 class TestRestfulInterfaceChatCompletions:
@@ -168,8 +168,8 @@ def test_chat_completions_longinput_stream(self):
         assert len(outputList) == 2
 
 
-@pytest.mark.order(7)
-@pytest.mark.restful_interface_turbomind
+@pytest.mark.order(8)
+@pytest.mark.turbomind
 @pytest.mark.flaky(reruns=2)
 class TestRestfulInterfaceChatInteractive:
 

From 458e977d1cd796a37257c64c0c294142c2a297b6 Mon Sep 17 00:00:00 2001
From: zhulin1 <zhulin1@pjlab.org.cn>
Date: Mon, 11 Mar 2024 15:02:10 +0800
Subject: [PATCH 5/7] update

---
 .../test_restful_interface_func_common.py     | 12 +--
 .../test_restful_interface_func_pytorch.py    |  7 +-
 .../test_restful_interface_func_turbomind.py  |  3 +-
 autotest/utils/content_detect_utils.py        | 94 -------------------
 4 files changed, 9 insertions(+), 107 deletions(-)
 delete mode 100644 autotest/utils/content_detect_utils.py

diff --git a/autotest/interface/restful/test_restful_interface_func_common.py b/autotest/interface/restful/test_restful_interface_func_common.py
index e54b6bba96..b10bc62a1d 100644
--- a/autotest/interface/restful/test_restful_interface_func_common.py
+++ b/autotest/interface/restful/test_restful_interface_func_common.py
@@ -4,7 +4,6 @@
 
 import pytest
 from tqdm import tqdm
-from utils.content_detect_utils import base_rps_frac_chars_in_dupe_ngrams
 from utils.restful_return_check import (assert_chat_completions_batch_return,
                                         assert_chat_completions_stream_return,
                                         assert_chat_interactive_batch_return,
@@ -12,8 +11,8 @@
 
 from lmdeploy.serve.openai.api_client import APIClient, get_model_list
 
-BASE_HTTP_URL = 'http://localhost'
-DEFAULT_PORT = 23333
+BASE_HTTP_URL = 'http://10.140.0.187'
+DEFAULT_PORT = 23334
 MODEL = 'internlm/internlm2-chat-20b'
 MODEL_NAME = 'internlm2-chat-20b'
 BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)])
@@ -262,9 +261,8 @@ def test_chat_completions_repetition_penalty_batch(self):
                                                      max_tokens=200):
             continue
         assert_chat_completions_batch_return(output, MODEL_NAME)
-        assert base_rps_frac_chars_in_dupe_ngrams(
-            6,
-            output.get('choices')[0].get('message').get('content')) > 80
+        assert ' is is' * 5 in output.get('choices')[0].get('message').get(
+            'content') or ' a a' * 5 in output.get('choices')[0].get('message').get('content')
 
     def test_chat_completions_topp_min_batch(self):
         api_client = APIClient(BASE_URL)
@@ -481,7 +479,7 @@ def test_chat_interactive_repetition_penalty_batch(self):
                                                      request_output_len=512):
             continue
         assert_chat_interactive_batch_return(output)
-        assert base_rps_frac_chars_in_dupe_ngrams(6, output.get('text')) > 90
+        assert 'a 上海 is a 上海, ' * 5 in output.get('text')
 
     def test_chat_interactive_with_history_batch(self):
         api_client = APIClient(BASE_URL)
diff --git a/autotest/interface/restful/test_restful_interface_func_pytorch.py b/autotest/interface/restful/test_restful_interface_func_pytorch.py
index 2a007f21f8..0d87137736 100644
--- a/autotest/interface/restful/test_restful_interface_func_pytorch.py
+++ b/autotest/interface/restful/test_restful_interface_func_pytorch.py
@@ -1,5 +1,4 @@
 import pytest
-from utils.content_detect_utils import base_rps_frac_chars_in_dupe_ngrams
 from utils.restful_return_check import (assert_chat_completions_batch_return,
                                         assert_chat_completions_stream_return,
                                         assert_chat_interactive_batch_return,
@@ -14,7 +13,7 @@
 BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)])
 
 
-@pytest.mark.order(7)
+@pytest.mark.order(8)
 @pytest.mark.pytorch
 @pytest.mark.flaky(reruns=2)
 class TestRestfulInterfaceChatCompletions:
@@ -112,7 +111,7 @@ def test_chat_completions_repetition_penalty_stream(self):
                                                   MODEL_NAME)
             response += outputList[index].get('choices')[0].get('delta').get(
                 'content')
-        assert base_rps_frac_chars_in_dupe_ngrams(6, response) > 90
+        assert 'pls pls ' * 5 in response or 'Hi, pls intro yourself\n' * 5 in response, response
 
     def test_chat_completions_topp_min_batch(self):
         api_client = APIClient(BASE_URL)
@@ -181,7 +180,7 @@ def test_chat_completions_longinput_stream(self):
         assert len(outputList) == 2
 
 
-@pytest.mark.order(7)
+@pytest.mark.order(8)
 @pytest.mark.pytorch
 @pytest.mark.flaky(reruns=2)
 class TestRestfulInterfaceChatInteractive:
diff --git a/autotest/interface/restful/test_restful_interface_func_turbomind.py b/autotest/interface/restful/test_restful_interface_func_turbomind.py
index 013c157db0..15ce7d6efe 100644
--- a/autotest/interface/restful/test_restful_interface_func_turbomind.py
+++ b/autotest/interface/restful/test_restful_interface_func_turbomind.py
@@ -1,5 +1,4 @@
 import pytest
-from utils.content_detect_utils import base_rps_frac_chars_in_dupe_ngrams
 from utils.restful_return_check import (assert_chat_completions_batch_return,
                                         assert_chat_completions_stream_return,
                                         assert_chat_interactive_batch_return,
@@ -109,7 +108,7 @@ def test_chat_completions_repetition_penalty_stream(self):
                                                   MODEL_NAME)
             response += outputList[index].get('choices')[0].get('delta').get(
                 'content')
-        assert base_rps_frac_chars_in_dupe_ngrams(6, response) > 90
+        assert 'pls pls ' * 5 in response or 'Hi, pls intro yourself\n' * 5 in response, response
 
     def test_chat_completions_topp_min_batch(self):
         api_client = APIClient(BASE_URL)
diff --git a/autotest/utils/content_detect_utils.py b/autotest/utils/content_detect_utils.py
deleted file mode 100644
index 7611c98ad3..0000000000
--- a/autotest/utils/content_detect_utils.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import re
-import string
-import unicodedata
-from collections import Counter
-
-import numpy
-
-TRANSLATION_TABLE_PUNCTUATION = str.maketrans('', '', string.punctuation)
-
-
-def normalize(text: str,
-              remove_punct: bool = True,
-              lowercase: bool = True,
-              nfd_unicode: bool = True,
-              white_space: bool = True) -> str:
-    """Normalize the text by lowercasing and removing punctuation."""
-    # remove punctuation
-    if remove_punct:
-        text = text.translate(TRANSLATION_TABLE_PUNCTUATION)
-
-    # lowercase
-    if lowercase:
-        text = text.lower()
-
-    if white_space:
-        text = text.strip()
-        text = re.sub(r'\s+', ' ', text)
-
-    # NFD unicode normalization
-    if nfd_unicode:
-        text = unicodedata.normalize('NFD', text)
-
-    return text
-
-
-def form_ngrams(sequence, n):
-    history = []
-    # build the first ngram, yielding only when we have a full ngram
-    while n > 1:
-        try:
-            next_item = next(sequence)
-        except StopIteration:
-            # no more data, terminate the generator
-            return
-        history.append(next_item)
-        n -= 1
-
-    # yield each ngram we have, then add the next item and repeat
-    for item in sequence:
-        history.append(item)
-        yield tuple(history)
-        del history[0]
-
-
-def base_rps_frac_chars_in_dupe_ngrams(NGRAM_SIZE: int = 5, content: str = ''):
-    """Base class for calculating the fraction of characters in duplicate word
-    N-grams.
-
-    This operates on the lower-cased, punctuation removed content. The function
-    also ensures that characters in overlapping ngrams are only counted once.
-    """
-    normalized_content = normalize(content)
-    normalized_words = tuple(normalized_content.split())
-
-    if len(normalized_words) < NGRAM_SIZE:
-        return 0
-
-    # fetch the ngrams from the document if they exist, otherwise
-    # compute them
-    doc_n_grams = tuple(form_ngrams(iter(normalized_words), NGRAM_SIZE))
-
-    # keep only ngrams which occur at least twice
-    ngram_dupes = {
-        ngram
-        for ngram, count in Counter(doc_n_grams).items() if count > 1
-    }
-
-    duplicated_grams = numpy.zeros(len(normalized_words), dtype=int)
-    i = 0
-    for ngram in doc_n_grams:
-        if ngram in ngram_dupes:
-            duplicated_grams[i:i + NGRAM_SIZE] = 1
-
-        i += 1
-
-    word_lengths = numpy.array(list(map(len, normalized_words)))
-    chars_duped = numpy.sum(word_lengths * duplicated_grams)
-    total_chars = numpy.sum(word_lengths)
-
-    if total_chars == 0:
-        return 0
-
-    score = float(chars_duped / total_chars) * 100
-    return score

From 7389d674cf126c570daf03ca4335b7dd7ad2384f Mon Sep 17 00:00:00 2001
From: zhulin1 <zhulin1@pjlab.org.cn>
Date: Mon, 11 Mar 2024 15:03:17 +0800
Subject: [PATCH 6/7] update

---
 .../interface/restful/test_restful_interface_func_common.py    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/autotest/interface/restful/test_restful_interface_func_common.py b/autotest/interface/restful/test_restful_interface_func_common.py
index b10bc62a1d..60b7da57eb 100644
--- a/autotest/interface/restful/test_restful_interface_func_common.py
+++ b/autotest/interface/restful/test_restful_interface_func_common.py
@@ -262,7 +262,8 @@ def test_chat_completions_repetition_penalty_batch(self):
             continue
         assert_chat_completions_batch_return(output, MODEL_NAME)
         assert ' is is' * 5 in output.get('choices')[0].get('message').get(
-            'content') or ' a a' * 5 in output.get('choices')[0].get('message').get('content')
+            'content') or ' a a' * 5 in output.get('choices')[0].get(
+                'message').get('content')
 
     def test_chat_completions_topp_min_batch(self):
         api_client = APIClient(BASE_URL)

From 7ce28e27a14adc0ee56b3a3d162b4d9d3112c966 Mon Sep 17 00:00:00 2001
From: zhulin1 <zhulin1@pjlab.org.cn>
Date: Mon, 11 Mar 2024 16:26:22 +0800
Subject: [PATCH 7/7] fix lint

---
 .../interface/restful/test_restful_interface_func_pytorch.py   | 3 ++-
 .../interface/restful/test_restful_interface_func_turbomind.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/autotest/interface/restful/test_restful_interface_func_pytorch.py b/autotest/interface/restful/test_restful_interface_func_pytorch.py
index 0d87137736..2709fb0df0 100644
--- a/autotest/interface/restful/test_restful_interface_func_pytorch.py
+++ b/autotest/interface/restful/test_restful_interface_func_pytorch.py
@@ -111,7 +111,8 @@ def test_chat_completions_repetition_penalty_stream(self):
                                                   MODEL_NAME)
             response += outputList[index].get('choices')[0].get('delta').get(
                 'content')
-        assert 'pls pls ' * 5 in response or 'Hi, pls intro yourself\n' * 5 in response, response
+        assert 'pls pls ' * 5 in response or \
+            'Hi, pls intro yourself\n' * 5 in response
 
     def test_chat_completions_topp_min_batch(self):
         api_client = APIClient(BASE_URL)
diff --git a/autotest/interface/restful/test_restful_interface_func_turbomind.py b/autotest/interface/restful/test_restful_interface_func_turbomind.py
index 15ce7d6efe..51f55d2048 100644
--- a/autotest/interface/restful/test_restful_interface_func_turbomind.py
+++ b/autotest/interface/restful/test_restful_interface_func_turbomind.py
@@ -108,7 +108,8 @@ def test_chat_completions_repetition_penalty_stream(self):
                                                   MODEL_NAME)
             response += outputList[index].get('choices')[0].get('delta').get(
                 'content')
-        assert 'pls pls ' * 5 in response or 'Hi, pls intro yourself\n' * 5 in response, response
+        assert 'pls pls ' * 5 in response or \
+            'Hi, pls intro yourself\n' * 5 in response
 
     def test_chat_completions_topp_min_batch(self):
         api_client = APIClient(BASE_URL)