Merge branch 'main' into internlm3-awq

Conflicts: lmdeploy/lite/apis/calibrate.py lmdeploy/lite/quantization/awq.py
AllentDan · Dec 18, 2024 · b1c2c0d · b1c2c0d
2 parents a827598 + bafa3d2
commit b1c2c0d
Show file tree

Hide file tree

Showing 448 changed files with 30,747 additions and 14,562 deletions.
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
@@ -1,6 +1,6 @@
-## Contributing to InternLM
+## Contributing to LMDeploy
 
-Welcome to the InternLM community, all kinds of contributions are welcomed, including but not limited to
+Welcome to the LMDeploy community, all kinds of contributions are welcomed, including but not limited to
 
 **Fix bug**
 
@@ -56,7 +56,7 @@ upstream	[email protected]:InternLM/lmdeploy.git (push)
 
 #### 2. Configure pre-commit
 
-You should configure [pre-commit](https://pre-commit.com/#intro) in the local development environment to make sure the code style matches that of InternLM. **Note**: The following code should be executed under the lmdeploy directory.
+You should configure [pre-commit](https://pre-commit.com/#intro) in the local development environment to make sure the code style matches that of LMDeploy. **Note**: The following code should be executed under the lmdeploy directory.
 
 ```shell
 pip install -U pre-commit
@@ -96,7 +96,7 @@ git checkout -b yhc/refactor_contributing_doc
 In subsequent development, if the master branch of the local repository is behind the master branch of "upstream", we need to pull the upstream for synchronization, and then execute the above command:
 
 ```shell
-git pull upstream master
+git pull upstream main
 ```
 
 #### 4. Commit the code and pass the unit test
@@ -151,7 +151,7 @@ Find more details about Pull Request description in [pull request guidelines](#p
 
 <img src="https://user-images.githubusercontent.com/57566630/167307490-f9ebf9fa-63c0-4d83-8ba1-081ea169eb3a.png" width="1200">
 
-IternLM will run unit test for the posted Pull Request on different platforms (Linux, Window, Mac), based on different versions of Python, PyTorch, CUDA to make sure the code is correct. We can see the specific test information by clicking `Details` in the above image so that we can modify the code.
+LMDeploy will run unit test for the posted Pull Request on different platforms (Linux, Window, Mac), based on different versions of Python, PyTorch, CUDA to make sure the code is correct. We can see the specific test information by clicking `Details` in the above image so that we can modify the code.
 
 (3) If the Pull Request passes the CI, then you can wait for the review from other developers. You'll modify the code based on the reviewer's comments, and repeat the steps [4](#4-commit-the-code-and-pass-the-unit-test)-[5](#5-push-the-code-to-remote) until all reviewers approve it. Then, we will merge it ASAP.
 
@@ -163,14 +163,14 @@ If your local branch conflicts with the latest master branch of "upstream", you'
 
 ```shell
 git fetch --all --prune
-git rebase upstream/master
+git rebase upstream/main
 ```
 
 or
 
 ```shell
 git fetch --all --prune
-git merge upstream/master
+git merge upstream/main
 ```
 
 If you are very good at handling conflicts, then you can use rebase to resolve conflicts, as this will keep your commit logs tidy. If you are not familiar with `rebase`, then you can use `merge` to resolve conflicts.

diff --git a/.github/scripts/action_tools.py b/.github/scripts/action_tools.py
@@ -269,5 +269,30 @@ def generate_benchmark_report(report_path: str):
     _append_summary('## Benchmark Results End')
 
 
+def generate_csv_from_profile_result(file_path: str, out_path: str):
+    with open(file_path, 'r') as f:
+        data = f.readlines()
+        data = [json.loads(line) for line in data]
+
+        data_csv = []
+        for item in data:
+            row = [
+                item.get('request_rate'),
+                item.get('completed'),
+                round(item.get('completed') / item.get('duration'), 3),
+                round(item.get('median_ttft_ms'), 3),
+                round(item.get('output_throughput'), 3)
+            ]
+            data_csv.append(row)
+        import csv
+        with open(out_path, 'w', newline='') as f:
+            writer = csv.writer(f)
+            writer.writerow([
+                'request_rate', 'completed', 'RPM', 'median_ttft_ms',
+                'output_throughput'
+            ])
+            writer.writerows(data_csv)
+
+
 if __name__ == '__main__':
     fire.Fire()
diff --git a/.github/scripts/eval_base_config.py b/.github/scripts/eval_base_config.py
@@ -4,6 +4,8 @@
 
 with read_base():
     # choose a list of datasets
+    from opencompass.configs.datasets.ARC_c.ARC_c_few_shot_ppl import \
+        ARC_c_datasets  # noqa: F401, E501
     from opencompass.configs.datasets.bbh.bbh_gen_98fba6 import \
         bbh_datasets  # noqa: F401, E501
     from opencompass.configs.datasets.ceval.ceval_ppl import \
@@ -14,17 +16,43 @@
         crowspairs_datasets  # noqa: F401, E501
     from opencompass.configs.datasets.drop.drop_gen_a2697c import \
         drop_datasets  # noqa: F401, E501
-    from opencompass.configs.datasets.gpqa.gpqa_ppl_6bf57a import \
+    # Corebench v1.7
+    from opencompass.configs.datasets.GaokaoBench.GaokaoBench_no_subjective_gen_d21e37 import \
+        GaokaoBench_datasets  # noqa: F401, E501
+    from opencompass.configs.datasets.gpqa.gpqa_few_shot_ppl_4b5a83 import \
         gpqa_datasets  # noqa: F401, E501
-    from opencompass.configs.datasets.gsm8k.gsm8k_gen_1d7fe4 import \
+    from opencompass.configs.datasets.gsm8k.gsm8k_gen_17d0dc import \
         gsm8k_datasets  # noqa: F401, E501
-    from opencompass.configs.datasets.hellaswag.hellaswag_ppl import \
+    from opencompass.configs.datasets.hellaswag.hellaswag_10shot_ppl_59c85e import \
         hellaswag_datasets  # noqa: F401, E501
+    from opencompass.configs.datasets.humaneval.internal_humaneval_gen_ce6b06 import \
+        humaneval_datasets as humaneval_v2_datasets  # noqa: F401, E501
+    from opencompass.configs.datasets.humaneval.internal_humaneval_gen_d2537e import \
+        humaneval_datasets  # noqa: F401, E501
+    from opencompass.configs.datasets.math.math_4shot_base_gen_43d5b6 import \
+        math_datasets  # noqa: F401, E501
+    from opencompass.configs.datasets.MathBench.mathbench_2024_few_shot_mixed_4a3fd4 import \
+        mathbench_datasets  # noqa: F401, E501
+    from opencompass.configs.datasets.mbpp.sanitized_mbpp_gen_742f0c import \
+        sanitized_mbpp_datasets  # noqa: F401, E501
     from opencompass.configs.datasets.mmlu.mmlu_ppl_ac766d import \
         mmlu_datasets  # noqa: F401, E501
-    from opencompass.configs.datasets.race.race_ppl_a138cd import \
+    from opencompass.configs.datasets.mmlu_pro.mmlu_pro_few_shot_gen_bfaf90 import \
+        mmlu_pro_datasets  # noqa: F401, E501
+    from opencompass.configs.datasets.nq.nq_open_1shot_gen_20a989 import \
+        nq_datasets  # noqa: F401, E501
+    from opencompass.configs.datasets.race.race_few_shot_ppl import \
         race_datasets  # noqa: F401, E501
-    # read models
+    from opencompass.configs.datasets.SuperGLUE_BoolQ.SuperGLUE_BoolQ_few_shot_ppl import \
+        BoolQ_datasets  # noqa: F401, E501
+    from opencompass.configs.datasets.TheoremQA.TheoremQA_5shot_gen_6f0af8 import \
+        TheoremQA_datasets  # noqa: F401, E501
+    from opencompass.configs.datasets.triviaqa.triviaqa_wiki_1shot_gen_20a989 import \
+        triviaqa_datasets  # noqa: F401, E501
+    from opencompass.configs.datasets.wikibench.wikibench_few_shot_ppl_c23d79 import \
+        wikibench_datasets  # noqa: F401, E501
+    from opencompass.configs.datasets.winogrande.winogrande_5shot_ll_252f01 import \
+        winogrande_datasets  # noqa: F401, E501
     from opencompass.configs.models.baichuan.hf_baichuan_7b import \
         models as hf_baichuan_7b  # noqa: F401, E501
     from opencompass.configs.models.gemma.hf_gemma_7b import \
@@ -49,6 +77,8 @@
         models as hf_mistral_7b_v0_1  # noqa: F401, E501
     from opencompass.configs.models.mistral.hf_mixtral_8x7b_v0_1 import \
         models as hf_mixtral_8x7b_v0_1  # noqa: F401, E501
+    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b import \
+        models as lmdeploy_qwen2_5_7b  # noqa: F401, E501
     from opencompass.configs.models.qwen.hf_qwen1_5_7b import \
         models as hf_qwen1_5_7b  # noqa: F401, E501
     from opencompass.configs.models.qwen.hf_qwen2_7b import \
@@ -59,10 +89,106 @@
         models as lmdeploy_qwen1_5_7b  # noqa: F401, E501
     from opencompass.configs.models.qwen.lmdeploy_qwen2_7b import \
         models as lmdeploy_qwen2_7b  # noqa: F401, E501
-    # and output the results in a chosen format
-    from opencompass.configs.summarizers.medium import \
-        summarizer  # noqa: F401, E501
+    # Summary Groups
+    from opencompass.configs.summarizers.groups.cmmlu import \
+        cmmlu_summary_groups  # noqa: F401, E501
+    from opencompass.configs.summarizers.groups.GaokaoBench import \
+        GaokaoBench_summary_groups  # noqa: F401, E501
+    from opencompass.configs.summarizers.groups.mathbench_v1_2024 import \
+        mathbench_2024_summary_groups  # noqa: F401, E501
+    from opencompass.configs.summarizers.groups.mmlu import \
+        mmlu_summary_groups  # noqa: F401, E501
+    from opencompass.configs.summarizers.groups.mmlu_pro import \
+        mmlu_pro_summary_groups  # noqa: F401, E501
+
+    # read models
+race_datasets = [race_datasets[1]]
+summarizer = dict(
+    dataset_abbrs=[
+        ['race-high', 'accuracy'],
+        ['ARC-c', 'accuracy'],
+        ['BoolQ', 'accuracy'],
+        ['mmlu_pro', 'naive_average'],
+        ['GPQA_diamond', 'accuracy'],
+        ['cmmlu', 'naive_average'],
+        ['mmlu', 'naive_average'],
+        ['drop', 'accuracy'],
+        ['bbh', 'naive_average'],
+        ['math', 'accuracy'],
+        ['openai_humaneval', 'humaneval_pass@1'],
+        ['openai_humaneval_v2', 'humaneval_pass@1'],
+        ['sanitized_mbpp', 'score'],
+        ['wikibench-wiki-single_choice_cncircular', 'perf_4'],
+        ['gsm8k', 'accuracy'],
+        ['GaokaoBench', 'weighted_average'],
+        ['triviaqa_wiki_1shot', 'score'],
+        ['nq_open_1shot', 'score'],
+        ['winogrande', 'accuracy'],
+        ['hellaswag', 'accuracy'],
+        ['TheoremQA', 'score'],
+        '###### MathBench-A: Application Part ######',
+        'college',
+        'high',
+        'middle',
+        'primary',
+        'arithmetic',
+        'mathbench-a (average)',
+        '###### MathBench-T: Theory Part ######',
+        'college_knowledge',
+        'high_knowledge',
+        'middle_knowledge',
+        'primary_knowledge',
+        'mathbench-t (average)',
+        '###### Overall: Average between MathBench-A and MathBench-T ######',
+        'Overall',
+        '',
+        'mmlu',
+        'mmlu-stem',
+        'mmlu-social-science',
+        'mmlu-humanities',
+        'mmlu-other',
+        'cmmlu',
+        'cmmlu-stem',
+        'cmmlu-social-science',
+        'cmmlu-humanities',
+        'cmmlu-other',
+        'cmmlu-china-specific',
+        'mmlu_pro',
+        'mmlu_pro_biology',
+        'mmlu_pro_business',
+        'mmlu_pro_chemistry',
+        'mmlu_pro_computer_science',
+        'mmlu_pro_economics',
+        'mmlu_pro_engineering',
+        'mmlu_pro_health',
+        'mmlu_pro_history',
+        'mmlu_pro_law',
+        'mmlu_pro_math',
+        'mmlu_pro_philosophy',
+        'mmlu_pro_physics',
+        'mmlu_pro_psychology',
+        'mmlu_pro_other',
+    ],
+    summary_groups=sum(
+        [v for k, v in locals().items() if k.endswith('_summary_groups')], []),
+)
 
 turbomind_qwen1_5_7b = deepcopy(*lmdeploy_qwen1_5_7b)
 turbomind_qwen2_7b = deepcopy(*lmdeploy_qwen2_7b)
+turbomind_qwen2_5_7b = deepcopy(*lmdeploy_qwen2_5_7b)
+turbomind_qwen2_5_14b = deepcopy(*lmdeploy_qwen2_5_7b)
+turbomind_qwen2_5_14b['path'] = 'Qwen/Qwen2.5-14B'
 turbomind_internlm2_5_7b = deepcopy(*lmdeploy_internlm2_5_7b)
+turbomind_internlm2_5_7b_4bits = deepcopy(*lmdeploy_internlm2_5_7b)
+turbomind_internlm2_5_7b_batch1 = deepcopy(*lmdeploy_internlm2_5_7b)
+turbomind_internlm2_5_7b_batch1_4bits = deepcopy(*lmdeploy_internlm2_5_7b)
+
+for model in [v for k, v in locals().items() if k.endswith('_4bits')]:
+    model['engine_config']['model_format'] = 'awq'
+    model['abbr'] = model['abbr'] + '_4bits'
+    model['path'] = model['path'] + '-inner-4bits'
+
+for model in [v for k, v in locals().items() if '_batch1' in k]:
+    model['abbr'] = model['abbr'] + '_batch1'
+    model['engine_config']['max_batch_size'] = 1
+    model['batch_size'] = 1