[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
opea-project · Oct 29, 2024 · 66269c7 · 66269c7
1 parent 41d4d92
commit 66269c7
Show file tree

Hide file tree

Showing 37 changed files with 1,490 additions and 738 deletions.
diff --git a/evals/evaluation/HELMET/README.md b/evals/evaluation/HELMET/README.md
@@ -157,7 +157,7 @@ python eval.py --config configs/cite.yaml --use_vllm
 Disclaimer: 
 VLLM can be much faster than using the native HuggingFace generation; however, we found that the results can be slightly different, so we recommend using the native HuggingFace generation for the final evaluation.
 All reported results in the paper are from the native HuggingFace generation.
-The speedup is much more noticable for tasks that generates more tokens (e.g., summarization may see up to 2x speedup), whereas the speedup is less noticable for tasks that generate fewer tokens (e.g., JSON KV may see less than 5% speedup).
+The speedup is much more noticeable for tasks that generates more tokens (e.g., summarization may see up to 2x speedup), whereas the speedup is less noticeable for tasks that generate fewer tokens (e.g., JSON KV may see less than 5% speedup).
 
 </details>
 
@@ -211,7 +211,7 @@ Please also cite the original dataset creators, listed below:
 @inproceedings{mallen-etal-2023-trust,
     title = "When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories",
     author = "Mallen, Alex  and
-      Asai, Akari  and
+      Asia, Akari  and
       Zhong, Victor  and
       Das, Rajarshi  and
       Khashabi, Daniel  and

diff --git a/evals/evaluation/HELMET/arguments.py b/evals/evaluation/HELMET/arguments.py
@@ -1,8 +1,13 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 import argparse
-import yaml
 import ast
 import os
 
+import yaml
+
+
 def parse_arguments():
     parser = argparse.ArgumentParser(description="evaluation on downstream tasks")
     parser.add_argument("--config", type=str, default=None, help="path to config file")
@@ -27,27 +32,59 @@ def parse_arguments():
 
     # evaluation settings
     parser.add_argument("--shots", type=int, default=5, help="total number of demos (encoder + decoder)")
-    parser.add_argument("--input_max_length", type=str, default='8192', help="the maximum number of tokens of the input, we truncate the end of the context; can be separated by comma to match the specified datasets")
+    parser.add_argument(
+        "--input_max_length",
+        type=str,
+        default="8192",
+        help="the maximum number of tokens of the input, we truncate the end of the context; can be separated by comma to match the specified datasets",
+    )
 
     # generation settings
-    parser.add_argument("--do_sample", type=ast.literal_eval, choices=[True, False], default=False, help="whether to use sampling (false is greedy), overwrites temperature")
-    parser.add_argument("--generation_max_length", type=str, default='10', help="max number of tokens to generate, can be separated by comma to match the specified datasets")
+    parser.add_argument(
+        "--do_sample",
+        type=ast.literal_eval,
+        choices=[True, False],
+        default=False,
+        help="whether to use sampling (false is greedy), overwrites temperature",
+    )
+    parser.add_argument(
+        "--generation_max_length",
+        type=str,
+        default="10",
+        help="max number of tokens to generate, can be separated by comma to match the specified datasets",
+    )
     parser.add_argument("--generation_min_length", type=int, default=0, help="min number of tokens to generate")
     parser.add_argument("--temperature", type=float, default=1.0, help="generation temperature")
     parser.add_argument("--top_p", type=float, default=1.0, help="top-p parameter for nucleus sampling")
-    parser.add_argument("--stop_newline", type=ast.literal_eval, choices=[True, False], default=False, help="whether to stop generation at newline")
+    parser.add_argument(
+        "--stop_newline",
+        type=ast.literal_eval,
+        choices=[True, False],
+        default=False,
+        help="whether to stop generation at newline",
+    )
 
     # model specific settings
     parser.add_argument("--seed", type=int, default=42, help="random seed")
     parser.add_argument("--no_cuda", action="store_true", help="disable cuda")
     parser.add_argument("--no_bf16", action="store_true", help="disable bf16 and use fp32")
     parser.add_argument("--no_torch_compile", action="store_true", help="disable cuda")
-    parser.add_argument("--use_chat_template", type=ast.literal_eval, choices=[True, False], default=False, help="whether to use chat template")
+    parser.add_argument(
+        "--use_chat_template",
+        type=ast.literal_eval,
+        choices=[True, False],
+        default=False,
+        help="whether to use chat template",
+    )
     parser.add_argument("--rope_theta", type=int, default=None, help="override rope theta")
 
     # misc
     parser.add_argument("--debug", action="store_true", help="for debugging")
-    parser.add_argument("--count_tokens", action="store_true", help="instead of running generation, just count the number of tokens (only for HF models not API)")
+    parser.add_argument(
+        "--count_tokens",
+        action="store_true",
+        help="instead of running generation, just count the number of tokens (only for HF models not API)",
+    )
 
     args = parser.parse_args()
     config = yaml.safe_load(open(args.config)) if args.config is not None else {}

diff --git a/evals/evaluation/HELMET/configs/cite.yaml b/evals/evaluation/HELMET/configs/cite.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 input_max_length: 131072,131072
 datasets: alce_asqa_700,alce_qampari_700
 generation_max_length: 300,300

diff --git a/evals/evaluation/HELMET/configs/cite_short.yaml b/evals/evaluation/HELMET/configs/cite_short.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 input_max_length: 8192,16384,32768,65536,8192,16384,32768,65536
 datasets: alce_asqa_30,alce_asqa_75,alce_asqa_165,alce_asqa_345,alce_qampari_30,alce_qampari_75,alce_qampari_165,alce_qampari_345
 generation_max_length: 300,300,300,300,300,300,300,300

diff --git a/evals/evaluation/HELMET/configs/icl.yaml b/evals/evaluation/HELMET/configs/icl.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 input_max_length: 131072,131072,131072,131072,131072
 datasets: icl_trec_coarse_6600shot_balance,icl_trec_fine_6400shot_balance,icl_banking77_5900shot_balance,icl_clinic150_7050shot_balance,icl_nlu_8296shot_balance
 generation_max_length: 20,20,20,20,20

diff --git a/evals/evaluation/HELMET/configs/icl_short.yaml b/evals/evaluation/HELMET/configs/icl_short.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 input_max_length: 8192,16384,32768,65536,8192,16384,32768,65536,8192,16384,32768,65536,8192,16384,32768,65536,8192,16384,32768,65536
 datasets: icl_trec_coarse_400shot_balance,icl_trec_coarse_800shot_balance,icl_trec_coarse_1600shot_balance,icl_trec_coarse_3300shot_balance,icl_trec_fine_400shot_balance,icl_trec_fine_800shot_balance,icl_trec_fine_1600shot_balance,icl_trec_fine_3200shot_balance,icl_banking77_360shot_balance,icl_banking77_720shot_balance,icl_banking77_1450shot_balance,icl_banking77_2900shot_balance,icl_clinic150_440shot_balance,icl_clinic150_880shot_balance,icl_clinic150_1750shot_balance,icl_clinic150_3525shot_balance,icl_nlu_510shot_balance,icl_nlu_1020shot_balance,icl_nlu_2040shot_balance,icl_nlu_4080shot_balance
 generation_max_length: 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20

diff --git a/evals/evaluation/HELMET/configs/longqa.yaml b/evals/evaluation/HELMET/configs/longqa.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 input_max_length: 131072,131072,131072
 datasets: narrativeqa_130772,infbench_qa_eng_130862,infbench_choice_eng_130862
 generation_max_length: 100,10,10

diff --git a/evals/evaluation/HELMET/configs/longqa_short.yaml b/evals/evaluation/HELMET/configs/longqa_short.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 input_max_length: 8192,16384,32768,65536,8192,16384,32768,65536,8192,16384,32768,65536
 datasets: narrativeqa_7892,narrativeqa_16084,narrativeqa_32468,narrativeqa_65236,infbench_qa_eng_7982,infbench_qa_eng_16174,infbench_qa_eng_32558,infbench_qa_eng_65326,infbench_choice_eng_7982,infbench_choice_eng_16174,infbench_choice_eng_32558,infbench_choice_eng_65326
 generation_max_length: 100,100,100,100,10,10,10,10,10,10,10,10

diff --git a/evals/evaluation/HELMET/configs/niah.yaml b/evals/evaluation/HELMET/configs/niah.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 input_max_length: 131072
 datasets: ruler_niah_s_2
 generation_max_length: 50

diff --git a/evals/evaluation/HELMET/configs/niah_long.yaml b/evals/evaluation/HELMET/configs/niah_long.yaml
@@ -1,7 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 input_max_length: 65536,131072,65536,131072,65536,131072,65536,131072,65536,131072,65536,131072,65536,131072,65536,131072,65536,131072,65536,131072,65536,131072,65536,131072,65536,131072
 datasets: ruler_niah_s_1,ruler_niah_s_1,ruler_niah_s_2,ruler_niah_s_2,ruler_niah_s_3,ruler_niah_s_3,ruler_niah_mk_1,ruler_niah_mk_1,ruler_niah_mk_2,ruler_niah_mk_2,ruler_niah_mk_3,ruler_niah_mk_3,ruler_niah_mq,ruler_niah_mq,ruler_niah_mv,ruler_niah_mv,ruler_cwe,ruler_cwe,ruler_fwe,ruler_fwe,ruler_vt,ruler_vt,ruler_qa_1,ruler_qa_1,ruler_qa_2,ruler_qa_2
 generation_max_length: 50,50,50,50,50,50,50,50,50,50,100,100,100,100,50,50,100,100,50,50,50,50,50,50,50,50
-test_files: data/ruler/niah_single_1/validation_65536.jsonl,data/ruler/niah_single_1/validation_131072.jsonl,data/ruler/niah_single_2/validation_65536.jsonl,data/ruler/niah_single_2/validation_131072.jsonl,data/ruler/niah_single_3/validation_65536.jsonl,data/ruler/niah_single_3/validation_131072.jsonl,data/ruler/niah_multikey_1/validation_65536.jsonl,data/ruler/niah_multikey_1/validation_131072.jsonl,data/ruler/niah_multikey_2/validation_65536.jsonl,data/ruler/niah_multikey_2/validation_131072.jsonl,data/ruler/niah_multikey_3/validation_65536.jsonl,data/ruler/niah_multikey_3/validation_131072.jsonl,data/ruler/niah_multiquery/validation_65536.jsonl,data/ruler/niah_multiquery/validation_131072.jsonl,data/ruler/niah_multivalue/validation_65536.jsonl,data/ruler/niah_multivalue/validation_131072.jsonl,data/ruler/cwe/validation_65536.jsonl,data/ruler/cwe/validation_131072.jsonl,data/ruler/fwe/validation_65536.jsonl,data/ruler/fwe/validation_131072.jsonl,data/ruler/vt/validation_65536.jsonl,data/ruler/vt/validation_131072.jsonl,data/ruler/qa_1/validation_65536.jsonl,data/ruler/qa_1/validation_131072.jsonl,data/ruler/qa_2/validation_65536.jsonl,data/ruler/qa_2/validation_131072.jsonl
+test_files: data/ruler/niah_single_1/validation_65536.jsonl,data/ruler/niah_single_1/validation_131072.jsonl,data/ruler/niah_single_2/validation_65536.jsonl,data/ruler/niah_single_2/validation_131072.jsonl,data/ruler/niah_single_3/validation_65536.jsonl,data/ruler/niah_single_3/validation_131072.jsonl,data/ruler/niah_multikey_1/validation_65536.jsonl,data/ruler/niah_multikey_1/validation_131072.jsonl,data/ruler/niah_multikey_2/validation_65536.jsonl,data/ruler/niah_multikey_2/validation_131072.jsonl,data/ruler/niah_multikey_3/validation_65536.jsonl,data/ruler/niah_multikey_3/validation_131072.jsonl,data/ruler/niah_multiquery/validation_65536.jsonl,data/ruler/niah_multiquery/validation_131072.jsonl,data/ruler/niah_multivalue/validation_65536.jsonl,data/ruler/niah_multivalue/validation_131072.jsonl,data/ruler/cwe/validation_65536.jsonl,data/ruler/cwe/validation_131072.jsonl,data/ruler/few/validation_65536.jsonl,data/ruler/few/validation_131072.jsonl,data/ruler/vt/validation_65536.jsonl,data/ruler/vt/validation_131072.jsonl,data/ruler/qa_1/validation_65536.jsonl,data/ruler/qa_1/validation_131072.jsonl,data/ruler/qa_2/validation_65536.jsonl,data/ruler/qa_2/validation_131072.jsonl
 demo_files: ',,,,,,,,,,,,,,,,,,,,,,,,,'
 use_chat_template: false
 max_test_samples: 100

diff --git a/evals/evaluation/HELMET/configs/rag.yaml b/evals/evaluation/HELMET/configs/rag.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 input_max_length: 131072,131072,131072,131072
 datasets: kilt_nq,kilt_triviaqa,kilt_hotpotqa,kilt_popqa_3
 generation_max_length: 20,20,20,20

diff --git a/evals/evaluation/HELMET/configs/rag_short.yaml b/evals/evaluation/HELMET/configs/rag_short.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 input_max_length: 8192,16384,32768,65536,8192,16384,32768,65536,8192,16384,32768,65536,8192,16384,32768,65536
 datasets: kilt_nq,kilt_nq,kilt_nq,kilt_nq,kilt_triviaqa,kilt_triviaqa,kilt_triviaqa,kilt_triviaqa,kilt_hotpotqa,kilt_hotpotqa,kilt_hotpotqa,kilt_hotpotqa,kilt_popqa_3,kilt_popqa_3,kilt_popqa_3,kilt_popqa_3
 generation_max_length: 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20

diff --git a/evals/evaluation/HELMET/configs/recall.yaml b/evals/evaluation/HELMET/configs/recall.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 input_max_length: 131072,131072,131072,131072
 datasets: ruler_niah_mk_2,ruler_niah_mk_3,ruler_niah_mv,json_kv
 generation_max_length: 50,100,50,100

diff --git a/evals/evaluation/HELMET/configs/recall_short.yaml b/evals/evaluation/HELMET/configs/recall_short.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 input_max_length: 8192,16384,32768,65536,8192,16384,32768,65536,8192,16384,32768,65536,8192,16384,32768,65536
 datasets: ruler_niah_mk_2,ruler_niah_mk_2,ruler_niah_mk_2,ruler_niah_mk_2,ruler_niah_mk_3,ruler_niah_mk_3,ruler_niah_mk_3,ruler_niah_mk_3,ruler_niah_mv,ruler_niah_mv,ruler_niah_mv,ruler_niah_mv,json_kv,json_kv,json_kv,json_kv
 generation_max_length: 50,50,50,50,100,100,100,100,50,50,50,50,100,100,100,100

diff --git a/evals/evaluation/HELMET/configs/rerank.yaml b/evals/evaluation/HELMET/configs/rerank.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 input_max_length: '131072'
 datasets: msmarco_rerank_psg
 generation_max_length: '200'

diff --git a/evals/evaluation/HELMET/configs/rerank_short.yaml b/evals/evaluation/HELMET/configs/rerank_short.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 input_max_length: 8192,16384,32768,65536
 datasets: msmarco_rerank_psg,msmarco_rerank_psg,msmarco_rerank_psg,msmarco_rerank_psg
 generation_max_length: 200,200,200,200

diff --git a/evals/evaluation/HELMET/configs/summ.yaml b/evals/evaluation/HELMET/configs/summ.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 input_max_length: 131072,131072
 datasets: infbench_sum_eng_129672,multi_lexsum_130372
 generation_max_length: 1200,400

diff --git a/evals/evaluation/HELMET/configs/summ_short.yaml b/evals/evaluation/HELMET/configs/summ_short.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 input_max_length: 8192,16384,32768,65536,8192,16384,32768,65536
 datasets: infbench_sum_eng_6792,infbench_sum_eng_14984,infbench_sum_eng_31368,infbench_sum_eng_64136,multi_lexsum_7492,multi_lexsum_15684,multi_lexsum_32068,multi_lexsum_64836
 generation_max_length: 1200,1200,1200,1200,400,400,400,400