fix condition

InternLM · Mar 5, 2024 · 7ca1d5d · 7ca1d5d
1 parent 54f09d0
commit 7ca1d5d
Show file tree

Hide file tree

Showing 6 changed files with 60 additions and 43 deletions.
diff --git a/.github/workflows/daily_ete_test.yml b/.github/workflows/daily_ete_test.yml
@@ -15,14 +15,14 @@ on:
         default: 'main'
       backend:
         required: true
-        description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "turbomind, pytorch"'
+        description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"'
         type: string
-        default: 'turbomind, pytorch'
+        default: "['turbomind', 'pytorch']"
       model:
         required: true
         description: 'Set testcase module filter: chat, restful, pipeline, quantization. Default contains all models'
         type: string
-        default: 'w4a16,kvint8,w8a8,kvint8-w4a16,convert,pipeline,restful,chat-workspace,chat-hf'
+        default: "['quantization','convert','pipeline','restful','chat','interface-pipeline']"
   schedule:
     - cron:  '00 18 * * *'
 
@@ -34,7 +34,7 @@ env:
 jobs:
   test_functions:
     runs-on: [self-hosted, linux-a100]
-    timeout-minutes: 420
+    timeout-minutes: 240
     env:
       REPORT_DIR: /nvme/qa_test_models/test-reports
     container:
@@ -95,86 +95,86 @@ jobs:
           lmdeploy check_env
       - name: Test lmdeploy - quantization w4a16
         continue-on-error: true
-        if: contains(${{github.event.inputs.model}}, 'w4a16') && contains(${{github.event.inputs.backend}}, 'turbomind')
+        if: contains(fromJSON(${{github.event.inputs.backend}}), 'turbomind') && contains(fromJSON(${{github.event.inputs.model}}), 'quantization') 
         run: |
           pytest autotest/tools/quantization/test_quantization_w4a16.py -m 'not pr_test' -n 8 --alluredir=allure-results --clean-alluredir
       - name: Test lmdeploy - quantization kv int8
         continue-on-error: true
-        if: contains(${{github.event.inputs.model}}, 'kvint8') && contains(${{github.event.inputs.backend}}, 'turbomind')
+        if: contains(fromJSON(${{github.event.inputs.backend}}), 'turbomind') && contains(fromJSON(${{github.event.inputs.model}}), 'quantization') 
         run: |
           pytest autotest/tools/quantization/test_quantization_kvint8.py -n 8 --alluredir=allure-results
       - name: Test lmdeploy - quantization w8a8
         continue-on-error: true
-        if: contains(${{github.event.inputs.model}}, 'w8a8') && contains(${{github.event.inputs.backend}}, 'pytorch')
+        if: contains(fromJSON(${{github.event.inputs.backend}}), 'pytorch') && contains(fromJSON(${{github.event.inputs.model}}), 'quantization') 
         run: |
           pytest autotest/tools/quantization/test_quantization_w8a8.py -n 8 --alluredir=allure-results
       - name: Test lmdeploy - quantization kv int8 and w4a16
         continue-on-error: true
-        if: contains(${{github.event.inputs.model}}, 'kvint8-w4a16') && contains(${{github.event.inputs.backend}}, 'turbomind')
+        if: contains(fromJSON(${{github.event.inputs.backend}}), 'turbomind') && contains(fromJSON(${{github.event.inputs.model}}), 'quantization')
         run: |
           pytest autotest/tools/quantization/test_quantization_kvint8_w4a16.py -n 8 --alluredir=allure-results
       - name: Test lmdeploy - convert
         continue-on-error: true
-        if: contains(${{github.event.inputs.model}}, 'convert') && contains(${{github.event.inputs.backend}}, 'turbomind')
+        if: contains(fromJSON(${{github.event.inputs.backend}}), 'turbomind') && contains(fromJSON(${{github.event.inputs.model}}), 'convert')
         run: |
           pytest autotest/tools/convert -m 'not pr_test' -n 8 --alluredir=allure-results
-      - name: Test lmdeploy - interface pipeline turbomind case
+      - name: Test lmdeploy - chat workspace
         continue-on-error: true
-        if: contains(${{github.event.inputs.model}}, 'interface-pipeline') && contains(${{github.event.inputs.backend}}, 'turbomind')
+        if: contains(fromJSON(${{github.event.inputs.backend}}), 'turbomind') && contains(fromJSON(${{github.event.inputs.model}}), 'chat') 
         timeout-minutes: 20
         run: |
-          pytest autotest/interface/pipeline/test_pipeline_turbomind_func.py -m 'not pr_test' --alluredir=allure-results
+          pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
+          pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
+      - name: Test lmdeploy - chat hf turbomind
+        continue-on-error: true
+        if: contains(fromJSON(${{github.event.inputs.backend}}), 'turbomind') && contains(fromJSON(${{github.event.inputs.model}}), 'chat') 
+        timeout-minutes: 20
+        run: |
+          pytest autotest/tools/chat/test_command_chat_hf_turbomind.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
+          pytest autotest/tools/chat/test_command_chat_hf_turbomind.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
+      - name: Test lmdeploy - chat hf torch
+        continue-on-error: true
+        if: contains(fromJSON(${{github.event.inputs.backend}}), 'pytorch') && contains(fromJSON(${{github.event.inputs.model}}), 'chat') 
+        timeout-minutes: 20
+        run: |
+          pytest autotest/tools/chat/test_command_chat_hf_pytorch.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
+          pytest autotest/tools/chat/test_command_chat_hf_pytorch.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
       - name: Test lmdeploy - pipeline turbomind
         continue-on-error: true
-        if: contains(${{github.event.inputs.model}}, 'pipeline') && contains(${{github.event.inputs.backend}}, 'turbomind')
-        timeout-minutes: 45
+        if: contains(fromJSON(${{github.event.inputs.backend}}), 'turbomind') && contains(fromJSON(${{github.event.inputs.model}}), 'pipeline')
+        timeout-minutes: 25
         run: |
           pytest autotest/tools/pipeline/test_pipeline_chat_turbomind.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
           pytest autotest/tools/pipeline/test_pipeline_chat_turbomind.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
       - name: Test lmdeploy - pipeline torch
         continue-on-error: true
-        if: contains(${{github.event.inputs.model}}, 'pipeline') && contains(${{github.event.inputs.backend}}, 'pytorch')
-        timeout-minutes: 75
+        if: contains(fromJSON(${{github.event.inputs.backend}}), 'pytorch') && contains(fromJSON(${{github.event.inputs.model}}), 'pipeline')
+        timeout-minutes: 25
         run: |
           pytest autotest/tools/pipeline/test_pipeline_chat_pytorch.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
           pytest autotest/tools/pipeline/test_pipeline_chat_pytorch.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
       - name: Test lmdeploy - restful turbomind
         continue-on-error: true
-        if: contains(${{github.event.inputs.model}}, 'restful') && contains(${{github.event.inputs.backend}}, 'turbomind')
-        timeout-minutes: 60
+        if: contains(fromJSON(${{github.event.inputs.backend}}), 'turbomind') && contains(fromJSON(${{github.event.inputs.model}}), 'restful') 
+        timeout-minutes: 30
         run: |
           pytest autotest/tools/restful/test_restful_chat_turbomind.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
           pytest autotest/tools/restful/test_restful_chat_turbomind.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
       - name: Test lmdeploy - restful torch
         continue-on-error: true
-        if: contains(${{github.event.inputs.model}}, 'restful') && contains(${{github.event.inputs.backend}}, 'turbomind')
-        timeout-minutes: 80
+        if: contains(fromJSON(${{github.event.inputs.backend}}), 'pytorch') && contains(fromJSON(${{github.event.inputs.model}}), 'restful') 
+        timeout-minutes: 40
         run: |
           pytest autotest/tools/restful/test_restful_chat_pytorch.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
           pytest autotest/tools/restful/test_restful_chat_pytorch.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
-      - name: Test lmdeploy - chat workspace
-        continue-on-error: true
-        if: contains(${{github.event.inputs.model}}, 'chat-workspace') && contains(${{github.event.inputs.backend}}, 'turbomind')
-        timeout-minutes: 30
-        run: |
-          pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
-          pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
-      - name: Test lmdeploy - chat hf turbomind
-        continue-on-error: true
-        if: contains(${{github.event.inputs.model}}, 'chat-hf') && contains(${{github.event.inputs.backend}}, 'turbomind')
-        timeout-minutes: 45
-        run: |
-          pytest autotest/tools/chat/test_command_chat_hf_turbomind.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
-          pytest autotest/tools/chat/test_command_chat_hf_turbomind.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
-      - name: Test lmdeploy - chat hf torch
+      - name: Test lmdeploy - interface pipeline turbomind case
         continue-on-error: true
-        if: contains(${{github.event.inputs.model}}, 'chat-hf') && contains(${{github.event.inputs.backend}}, 'pytorch')
-        timeout-minutes: 60
+        if: contains(fromJSON(${{github.event.inputs.backend}}), 'turbomind') && contains(fromJSON(${{github.event.inputs.model}}), 'interface-pipeline')
+        timeout-minutes: 20
         run: |
-          pytest autotest/tools/chat/test_command_chat_hf_pytorch.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
-          pytest autotest/tools/chat/test_command_chat_hf_pytorch.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
+          pytest autotest/interface/pipeline/test_pipeline_turbomind_func.py -m 'not pr_test' --alluredir=allure-results
       - name: Test lmdeploy - rerun all fail cases
-        timeout-minutes: 60
+        timeout-minutes: 30
         run: |
           pytest autotest --lf --alluredir=allure-results
       - name: Generate reports

diff --git a/autotest/prompt_case.yaml b/autotest/prompt_case.yaml
@@ -77,6 +77,9 @@ chinese_poem_case:
         - internlm2-20b:
             - len_g:
                 5
+        - falcon-7b:
+            - len_g:
+                5
 english_poem_case:
     - write a romantic English poem:
         - contain:
@@ -110,6 +113,14 @@ emoji_case:
                 - \u2714
                 - 赞
                 - emoji
+        - baichuan2-7b-chat:
+            - contain:
+                - 👍
+                - 😊
+                - \u2714
+                - 赞
+                - emoji
+                - \!
 traditional_chinese_case:
     - 使用繁體介紹香港維多利亞港:
         - contain:

diff --git a/autotest/tools/pipeline/test_pipeline_chat_pytorch.py b/autotest/tools/pipeline/test_pipeline_chat_pytorch.py
@@ -40,7 +40,8 @@ def test_pipeline_chat_pytorch_tp1(config, common_case_config, model,
 @pytest.mark.parametrize('model', getModelList(tp_num=2))
 def test_pipeline_chat_pytorch_tp2(config, common_case_config, model,
                                    worker_id):
-    os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id,
+    if 'gw' in worker_id:
+        os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id,
                                                                  tp_num=2)
     p = Process(target=run_pipeline_chat_test,
                 args=(config, common_case_config, model, 'pytorch'))

diff --git a/autotest/tools/pipeline/test_pipeline_chat_turbomind.py b/autotest/tools/pipeline/test_pipeline_chat_turbomind.py
@@ -14,7 +14,8 @@
 @pytest.mark.flaky(reruns=0)
 @pytest.mark.parametrize('model', get_all_model_list(tp_num=1))
 def test_pipeline_chat_tp1(config, common_case_config, model, worker_id):
-    os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
+    if 'gw' in worker_id:
+        os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
     p = Process(target=run_pipeline_chat_test,
                 args=(config, common_case_config, model, 'turbomind'))
     p.start()
@@ -29,7 +30,8 @@ def test_pipeline_chat_tp1(config, common_case_config, model, worker_id):
 @pytest.mark.flaky(reruns=0)
 @pytest.mark.parametrize('model', get_all_model_list(tp_num=2))
 def test_pipeline_chat_tp2(config, common_case_config, model, worker_id):
-    os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id,
+    if 'gw' in worker_id:
+        os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id,
                                                                  tp_num=2)
     p = Process(target=run_pipeline_chat_test,
                 args=(config, common_case_config, model, 'turbomind'))

diff --git a/autotest/utils/pipeline_chat.py b/autotest/utils/pipeline_chat.py
@@ -44,6 +44,7 @@ def run_pipeline_chat_test(config, cases_info, model_case, type):
 
     # run testcases
     gen_config = GenerationConfig(temperature=0.01)
+    gen_config = GenerationConfig()
     for case in cases_info.keys():
         if (case == 'memory_test'
                 or case == 'emoji_case') and 'chat' not in model_case.lower():

diff --git a/autotest/utils/run_client_chat.py b/autotest/utils/run_client_chat.py
@@ -32,6 +32,8 @@ def command_line_test(config,
                 cmd += ' --model-format hf'
         elif 'w4' in model_case or '4bits' in model_case:
             cmd += ' --model-format awq'
+        if 'chat' not in model_case.lower():
+            cmd += ' --cap completion'
     return command_test(config, [cmd], model_case, case, case_info,
                         type == 'turbomind')