fix pr test for newest internlm2 model (#1806)

* update * update * update * update * update * update
InternLM · Jun 20, 2024 · 4eaf473 · 4eaf473
1 parent 90c3773
commit 4eaf473
Show file tree

Hide file tree

Showing 5 changed files with 33 additions and 11 deletions.
diff --git a/.github/resources/opencompass-hf-results.json b/.github/resources/opencompass-hf-results.json
@@ -50,33 +50,45 @@
     },
     "internlm/internlm2-chat-7b": {
         "ceval": "61.25",
-        "mmlu": "63.64",
+        "mmlu": "63.05",
+        "GaokaoBench": "54.50",
+        "winogrande": "73.48",
+        "hellaswag":"84.80",
+        "math":"28.14",
         "wic": "60.34",
         "wsc": "65.38",
-        "triviaqa": "58.49",
         "gsm8k": "69.90",
+        "bbh":"57.83",
         "race-middle": "88.72",
         "race-high": "84.51",
         "crows_pairs": "29.64"
     },
     "internlm/internlm2-chat-20b": {
-        "ceval": "-",
+        "ceval": "63.56",
         "mmlu": "66.50",
+        "GaokaoBench": "57.95",
+        "hellaswag":"88.48",
+        "math":"34.68",
         "wic": "-",
         "wsc": "-",
         "triviaqa": "-",
-        "gsm8k": "79.53",
+        "gsm8k": "75.21",
+        "bbh":"68.24",
         "race-middle": "-",
         "race-high": "-",
         "crows_pairs": "-"
     },
     "Qwen/Qwen1.5-7B-Chat": {
         "ceval": "71.12",
-        "mmlu": "61.98",
+        "mmlu": "61.82",
+        "GaokaoBench": "71.00",
+        "winogrande": "65.19",
+        "hellaswag":"71.61",
+        "math":"22.64",
         "wic": "47.96",
         "wsc": "41.35",
-        "triviaqa": "47.37",
         "gsm8k": "56.25",
+        "bbh":"38.56",
         "race-middle": "88.16",
         "race-high": "83.33",
         "crows_pairs": "34.02"
@@ -105,12 +117,16 @@
     },
     "meta-llama/Meta-Llama-3-8B-Instruct": {
         "ceval": "52.32",
-        "mmlu": "67.60",
+        "mmlu": "68.37",
+        "GaokaoBench": "45.44",
+        "winogrande": "66.22",
+        "hellaswag":"74.39",
+        "math":"27.52",
         "wic": "36.99",
         "wsc": "32.69",
-        "triviaqa": "64.80",
         "gsm8k": "79.53",
-        "race-middle": "86.63",
+        "bbh":"52.83",
+        "race-middle": "88.63",
         "race-high": "81.22",
         "crows_pairs": "86.07"
     }

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -726,5 +726,5 @@ jobs:
           ref: ${{github.event.inputs.repo_ref || 'main'}}
       - name: Get overview
         run: |
-          pip install pandas
+          pip install pandas fire
           python3 .github/scripts/action_tools.py generate_benchmark_report $REPORT_DIR
diff --git a/.github/workflows/pr_ete_test.yml b/.github/workflows/pr_ete_test.yml
@@ -52,6 +52,8 @@ jobs:
         run: |
           python3 -m pip cache dir
           python3 -m pip install torch==2.1.0 torchvision==0.16.0
+          # the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
+          python3 -m pip install /root/packages/flash_attn-2.5.8+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
       - name: Build lmdeploy
         run: |
           python3 -m pip install cmake

diff --git a/autotest/config.yaml b/autotest/config.yaml
@@ -11,6 +11,7 @@ tp_config:
     internlm2-20b: 2
     Qwen-VL-Chat: 2
     llava-v1.5-13b: 2
+    Qwen2-7B-Instruct: 2
 
 turbomind_chat_model:
     - meta-llama/Llama-2-7b-chat-hf
@@ -30,6 +31,7 @@ turbomind_chat_model:
     - Qwen/Qwen1.5-7B-Chat
     - Qwen/Qwen1.5-4B-Chat-AWQ
     - Qwen/Qwen2-7B-Instruct
+    - Qwen/Qwen2-1.5B-Instruct
     - lmdeploy/llama2-chat-7b-w4
     - baichuan-inc/Baichuan2-7B-Chat
     - 01-ai/Yi-6B-Chat
@@ -53,6 +55,7 @@ pytorch_chat_model:
     - 01-ai/Yi-6B-Chat
     - Qwen/Qwen1.5-7B-Chat
     - Qwen/Qwen1.5-MoE-A2.7B-Chat
+    - Qwen/Qwen2-0.5B-Instruct
     - mistralai/Mistral-7B-Instruct-v0.1
     - mistralai/Mixtral-8x7B-Instruct-v0.1
     - google/gemma-7b-it
@@ -94,6 +97,7 @@ quatization_case_config:
         - internlm/internlm2-20b
         - Qwen/Qwen1.5-7B-Chat
         - Qwen/Qwen2-7B-Instruct
+        - Qwen/Qwen2-1.5B-Instruct
         - Qwen/Qwen-VL-Chat
         - meta-llama/Meta-Llama-3-8B-Instruct
         - liuhaotian/llava-v1.5-7b

diff --git a/autotest/utils/get_run_config.py b/autotest/utils/get_run_config.py
@@ -87,7 +87,7 @@ def get_model_name(model):
         return 'vicuna'
     if ('yi-vl' in model_name):
         return 'yi-vl'
-    if ('qwen1.5' in model_name):
+    if ('qwen' in model_name):
         return 'qwen'
     if ('internvl') in model_name:
         return 'internvl-internlm2'