diff --git a/.github/resources/opencompass-hf-results.json b/.github/resources/opencompass-hf-results.json index 2e64e3822..6e7c45365 100644 --- a/.github/resources/opencompass-hf-results.json +++ b/.github/resources/opencompass-hf-results.json @@ -50,33 +50,45 @@ }, "internlm/internlm2-chat-7b": { "ceval": "61.25", - "mmlu": "63.64", + "mmlu": "63.05", + "GaokaoBench": "54.50", + "winogrande": "73.48", + "hellaswag":"84.80", + "math":"28.14", "wic": "60.34", "wsc": "65.38", - "triviaqa": "58.49", "gsm8k": "69.90", + "bbh":"57.83", "race-middle": "88.72", "race-high": "84.51", "crows_pairs": "29.64" }, "internlm/internlm2-chat-20b": { - "ceval": "-", + "ceval": "63.56", "mmlu": "66.50", + "GaokaoBench": "57.95", + "hellaswag":"88.48", + "math":"34.68", "wic": "-", "wsc": "-", "triviaqa": "-", - "gsm8k": "79.53", + "gsm8k": "75.21", + "bbh":"68.24", "race-middle": "-", "race-high": "-", "crows_pairs": "-" }, "Qwen/Qwen1.5-7B-Chat": { "ceval": "71.12", - "mmlu": "61.98", + "mmlu": "61.82", + "GaokaoBench": "71.00", + "winogrande": "65.19", + "hellaswag":"71.61", + "math":"22.64", "wic": "47.96", "wsc": "41.35", - "triviaqa": "47.37", "gsm8k": "56.25", + "bbh":"38.56", "race-middle": "88.16", "race-high": "83.33", "crows_pairs": "34.02" @@ -105,12 +117,16 @@ }, "meta-llama/Meta-Llama-3-8B-Instruct": { "ceval": "52.32", - "mmlu": "67.60", + "mmlu": "68.37", + "GaokaoBench": "45.44", + "winogrande": "66.22", + "hellaswag":"74.39", + "math":"27.52", "wic": "36.99", "wsc": "32.69", - "triviaqa": "64.80", "gsm8k": "79.53", - "race-middle": "86.63", + "bbh":"52.83", + "race-middle": "88.63", "race-high": "81.22", "crows_pairs": "86.07" } diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index ff2949300..f0bff844d 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -726,5 +726,5 @@ jobs: ref: ${{github.event.inputs.repo_ref || 'main'}} - name: Get overview run: | - pip install pandas + pip install pandas fire python3 .github/scripts/action_tools.py generate_benchmark_report $REPORT_DIR diff --git a/.github/workflows/pr_ete_test.yml b/.github/workflows/pr_ete_test.yml index df9bd7c45..c47730096 100644 --- a/.github/workflows/pr_ete_test.yml +++ b/.github/workflows/pr_ete_test.yml @@ -52,6 +52,8 @@ jobs: run: | python3 -m pip cache dir python3 -m pip install torch==2.1.0 torchvision==0.16.0 + # the install packeage from. https://github.com/Dao-AILab/flash-attention/releases + python3 -m pip install /root/packages/flash_attn-2.5.8+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl - name: Build lmdeploy run: | python3 -m pip install cmake diff --git a/autotest/config.yaml b/autotest/config.yaml index 8b1d164f9..430efab00 100644 --- a/autotest/config.yaml +++ b/autotest/config.yaml @@ -11,6 +11,7 @@ tp_config: internlm2-20b: 2 Qwen-VL-Chat: 2 llava-v1.5-13b: 2 + Qwen2-7B-Instruct: 2 turbomind_chat_model: - meta-llama/Llama-2-7b-chat-hf @@ -30,6 +31,7 @@ turbomind_chat_model: - Qwen/Qwen1.5-7B-Chat - Qwen/Qwen1.5-4B-Chat-AWQ - Qwen/Qwen2-7B-Instruct + - Qwen/Qwen2-1.5B-Instruct - lmdeploy/llama2-chat-7b-w4 - baichuan-inc/Baichuan2-7B-Chat - 01-ai/Yi-6B-Chat @@ -53,6 +55,7 @@ pytorch_chat_model: - 01-ai/Yi-6B-Chat - Qwen/Qwen1.5-7B-Chat - Qwen/Qwen1.5-MoE-A2.7B-Chat + - Qwen/Qwen2-0.5B-Instruct - mistralai/Mistral-7B-Instruct-v0.1 - mistralai/Mixtral-8x7B-Instruct-v0.1 - google/gemma-7b-it @@ -94,6 +97,7 @@ quatization_case_config: - internlm/internlm2-20b - Qwen/Qwen1.5-7B-Chat - Qwen/Qwen2-7B-Instruct + - Qwen/Qwen2-1.5B-Instruct - Qwen/Qwen-VL-Chat - meta-llama/Meta-Llama-3-8B-Instruct - liuhaotian/llava-v1.5-7b diff --git a/autotest/utils/get_run_config.py b/autotest/utils/get_run_config.py index 2a585c535..858c17793 100644 --- a/autotest/utils/get_run_config.py +++ b/autotest/utils/get_run_config.py @@ -87,7 +87,7 @@ def get_model_name(model): return 'vicuna' if ('yi-vl' in model_name): return 'yi-vl' - if ('qwen1.5' in model_name): + if ('qwen' in model_name): return 'qwen' if ('internvl') in model_name: return 'internvl-internlm2'