From 75fc4e172ac0ba2b64c61de11c77a583cb738193 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Mon, 8 Apr 2024 16:53:47 +0000 Subject: [PATCH 1/7] edit process multiple-choice --- lm_eval/tasks/bigbench/generate_tasks.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/lm_eval/tasks/bigbench/generate_tasks.py b/lm_eval/tasks/bigbench/generate_tasks.py index 169c664655..5ab36ce458 100644 --- a/lm_eval/tasks/bigbench/generate_tasks.py +++ b/lm_eval/tasks/bigbench/generate_tasks.py @@ -1,5 +1,5 @@ import os - +import datasets import yaml @@ -173,6 +173,10 @@ "word_unscrambling", ] +skip_tasks = [ + "simple_arithmetic_json_multiple_choice", + "simple_arithmetic_multiple_targets_json", +] def main() -> None: for path, task_type in zip( @@ -183,11 +187,25 @@ def main() -> None: for task in all_subtasks: file_name = f"{task}.yaml" try: + template_file = task_type + if path == "multiple_choice": + print(f"Checking {task} for multiple choices") + if task in skip_tasks: + continue + data = datasets.load_dataset("hails/bigbench", task+"_zero_shot") + multiple_choice_targets = data['default'][0]["multiple_choice_targets"] + if len(multiple_choice_targets) == 0: + continue + else: + template_file = "multiple_choice_template_b_yaml" + if set(data['default'][0]["targets"]) < set(multiple_choice_targets): + template_file = "multiple_choice_template_a_yaml" + with open(f"{path}/{file_name}", "w", encoding="utf-8") as f: f.write("# Generated by utils.py\n") yaml.dump( { - "include": f"../{task_type}", + "include": f"../{template_file}", "task": "bigbench_" + task + "_{}".format(task_type.split("_template_yaml")[0]), From 1480949847614236d2be14a81d5b87c44cdffee5 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Mon, 8 Apr 2024 16:56:07 +0000 Subject: [PATCH 2/7] split template yaml --- .../bigbench/multiple_choice_template_a_yaml | 15 +++++++++++++++ .../bigbench/multiple_choice_template_b_yaml | 15 +++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 lm_eval/tasks/bigbench/multiple_choice_template_a_yaml create mode 100644 lm_eval/tasks/bigbench/multiple_choice_template_b_yaml diff --git a/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml b/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml new file mode 100644 index 0000000000..10fce5c1c3 --- /dev/null +++ b/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml @@ -0,0 +1,15 @@ +group: bigbench_multiple_choice +dataset_path: hails/bigbench +dataset_kwargs: + # num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods + # subtask_name: null +output_type: multiple_choice +test_split: default +doc_to_text: inputs +doc_to_target: "{{multiple_choice_targets.index(targets[0])}}" +doc_to_choice: "{{multiple_choice_targets}}" +metric_list: + - metric: acc + # TODO: brier score and other metrics +metadata: + version: 0.0 diff --git a/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml b/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml new file mode 100644 index 0000000000..36b4eb921f --- /dev/null +++ b/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml @@ -0,0 +1,15 @@ +group: bigbench_multiple_choice +dataset_path: hails/bigbench +dataset_kwargs: + # num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods + # subtask_name: null +output_type: multiple_choice +test_split: default +doc_to_text: inputs +doc_to_target: "{{multiple_choice_scores.index(1)}}" +doc_to_choice: "{{multiple_choice_targets}}" +metric_list: + - metric: acc + # TODO: brier score and other metrics +metadata: + version: 0.0 From f82d21a6d2eb254d40eae70d7bfe928fc13f9dc6 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Mon, 8 Apr 2024 16:56:19 +0000 Subject: [PATCH 3/7] remove --- .../tasks/bigbench/multiple_choice_template_yaml | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 lm_eval/tasks/bigbench/multiple_choice_template_yaml diff --git a/lm_eval/tasks/bigbench/multiple_choice_template_yaml b/lm_eval/tasks/bigbench/multiple_choice_template_yaml deleted file mode 100644 index 10fce5c1c3..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice_template_yaml +++ /dev/null @@ -1,15 +0,0 @@ -group: bigbench_multiple_choice -dataset_path: hails/bigbench -dataset_kwargs: - # num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods - # subtask_name: null -output_type: multiple_choice -test_split: default -doc_to_text: inputs -doc_to_target: "{{multiple_choice_targets.index(targets[0])}}" -doc_to_choice: "{{multiple_choice_targets}}" -metric_list: - - metric: acc - # TODO: brier score and other metrics -metadata: - version: 0.0 From cf8054686830fc5f7a8d807a0e1a7961ef550171 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Mon, 8 Apr 2024 16:56:55 +0000 Subject: [PATCH 4/7] modified multiple_choice tasks --- .../multiple_choice/abstract_narrative_understanding.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml | 2 +- .../tasks/bigbench/multiple_choice/analogical_similarity.yaml | 2 +- .../tasks/bigbench/multiple_choice/analytic_entailment.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml | 2 +- .../bigbench/multiple_choice/ascii_word_recognition.yaml | 4 ---- .../bigbench/multiple_choice/authorship_verification.yaml | 2 +- .../tasks/bigbench/multiple_choice/auto_categorization.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/auto_debugging.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml | 2 +- .../multiple_choice/bridging_anaphora_resolution_barqa.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/causal_judgement.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/causal_judgment.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/cause_and_effect.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/checkmate_in_one.yaml | 2 +- .../tasks/bigbench/multiple_choice/chess_state_tracking.yaml | 4 ---- .../bigbench/multiple_choice/chinese_remainder_theorem.yaml | 4 ---- .../bigbench/multiple_choice/cifar10_classification.yaml | 2 +- .../tasks/bigbench/multiple_choice/code_line_description.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/codenames.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/color.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml | 2 +- .../bigbench/multiple_choice/conceptual_combinations.yaml | 2 +- .../tasks/bigbench/multiple_choice/conlang_translation.yaml | 4 ---- .../contextual_parametric_knowledge_conflicts.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml | 2 +- .../tasks/bigbench/multiple_choice/cryobiology_spanish.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml | 2 +- .../tasks/bigbench/multiple_choice/dark_humor_detection.yaml | 2 +- .../tasks/bigbench/multiple_choice/date_understanding.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml | 2 +- .../bigbench/multiple_choice/discourse_marker_prediction.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml | 2 +- .../tasks/bigbench/multiple_choice/elementary_math_qa.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml | 2 +- .../bigbench/multiple_choice/emojis_emotion_prediction.yaml | 2 +- .../tasks/bigbench/multiple_choice/empirical_judgments.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/english_proverbs.yaml | 2 +- .../bigbench/multiple_choice/english_russian_proverbs.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/entailed_polarity.yaml | 2 +- .../bigbench/multiple_choice/entailed_polarity_hindi.yaml | 2 +- .../tasks/bigbench/multiple_choice/epistemic_reasoning.yaml | 2 +- .../multiple_choice/evaluating_information_essentiality.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/fact_checker.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/fantasy_reasoning.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/few_shot_nlg.yaml | 4 ---- .../bigbench/multiple_choice/figure_of_speech_detection.yaml | 2 +- .../multiple_choice/formal_fallacies_syllogisms_negation.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/gem.yaml | 4 ---- .../multiple_choice/gender_inclusive_sentences_german.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/general_knowledge.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/geometric_shapes.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/goal_step_wikihow.yaml | 2 +- .../bigbench/multiple_choice/gre_reading_comprehension.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/hhh_alignment.yaml | 2 +- .../bigbench/multiple_choice/hindi_question_answering.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/hindu_knowledge.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/hinglish_toxicity.yaml | 2 +- .../tasks/bigbench/multiple_choice/human_organs_senses.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/hyperbaton.yaml | 2 +- .../bigbench/multiple_choice/identify_math_theorems.yaml | 2 +- .../tasks/bigbench/multiple_choice/identify_odd_metaphor.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/implicatures.yaml | 2 +- .../tasks/bigbench/multiple_choice/implicit_relations.yaml | 2 +- .../tasks/bigbench/multiple_choice/intent_recognition.yaml | 2 +- .../multiple_choice/international_phonetic_alphabet_nli.yaml | 2 +- .../international_phonetic_alphabet_transliterate.yaml | 4 ---- .../tasks/bigbench/multiple_choice/intersect_geometry.yaml | 2 +- .../tasks/bigbench/multiple_choice/irony_identification.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/kanji_ascii.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/kannada.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/key_value_maps.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/known_unknowns.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/language_games.yaml | 4 ---- .../bigbench/multiple_choice/language_identification.yaml | 2 +- .../tasks/bigbench/multiple_choice/linguistic_mappings.yaml | 4 ---- .../tasks/bigbench/multiple_choice/linguistics_puzzles.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/list_functions.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/logic_grid_puzzle.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/logical_args.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/logical_deduction.yaml | 2 +- .../bigbench/multiple_choice/logical_fallacy_detection.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/logical_sequence.yaml | 2 +- .../bigbench/multiple_choice/mathematical_induction.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/matrixshapes.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/metaphor_boolean.yaml | 2 +- .../bigbench/multiple_choice/metaphor_understanding.yaml | 2 +- .../tasks/bigbench/multiple_choice/minute_mysteries_qa.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/misconceptions.yaml | 2 +- .../bigbench/multiple_choice/misconceptions_russian.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/mnist_ascii.yaml | 2 +- .../tasks/bigbench/multiple_choice/modified_arithmetic.yaml | 4 ---- .../tasks/bigbench/multiple_choice/moral_permissibility.yaml | 2 +- .../multiple_choice/movie_dialog_same_or_different.yaml | 2 +- .../tasks/bigbench/multiple_choice/movie_recommendation.yaml | 2 +- .../tasks/bigbench/multiple_choice/mult_data_wrangling.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/multiemo.yaml | 2 +- .../tasks/bigbench/multiple_choice/natural_instructions.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/navigate.yaml | 2 +- .../bigbench/multiple_choice/nonsense_words_grammar.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/novel_concepts.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/object_counting.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/odd_one_out.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/operators.yaml | 4 ---- .../bigbench/multiple_choice/paragraph_segmentation.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/parsinlu_qa.yaml | 2 +- .../multiple_choice/parsinlu_reading_comprehension.yaml | 4 ---- .../tasks/bigbench/multiple_choice/penguins_in_a_table.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/periodic_elements.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/persian_idioms.yaml | 2 +- .../tasks/bigbench/multiple_choice/phrase_relatedness.yaml | 2 +- .../tasks/bigbench/multiple_choice/physical_intuition.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/physics.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/physics_questions.yaml | 4 ---- .../multiple_choice/play_dialog_same_or_different.yaml | 2 +- .../bigbench/multiple_choice/polish_sequence_labeling.yaml | 4 ---- .../bigbench/multiple_choice/presuppositions_as_nli.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/qa_wikidata.yaml | 4 ---- .../tasks/bigbench/multiple_choice/question_selection.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/real_or_fake_text.yaml | 2 +- .../multiple_choice/reasoning_about_colored_objects.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/repeat_copy_logic.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/rephrase.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/riddle_sense.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/ruin_names.yaml | 2 +- .../multiple_choice/salient_translation_error_detection.yaml | 2 +- .../bigbench/multiple_choice/scientific_press_release.yaml | 4 ---- .../multiple_choice/semantic_parsing_in_context_sparc.yaml | 4 ---- .../bigbench/multiple_choice/semantic_parsing_spider.yaml | 4 ---- .../tasks/bigbench/multiple_choice/sentence_ambiguity.yaml | 2 +- .../bigbench/multiple_choice/similarities_abstraction.yaml | 2 +- .../tasks/bigbench/multiple_choice/simp_turing_concept.yaml | 4 ---- .../bigbench/multiple_choice/simple_arithmetic_json.yaml | 4 ---- .../simple_arithmetic_json_multiple_choice.yaml | 4 ---- .../multiple_choice/simple_arithmetic_json_subtasks.yaml | 4 ---- .../simple_arithmetic_multiple_targets_json.yaml | 4 ---- .../bigbench/multiple_choice/simple_ethical_questions.yaml | 2 +- .../tasks/bigbench/multiple_choice/simple_text_editing.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/snarks.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/social_iqa.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/social_support.yaml | 2 +- .../tasks/bigbench/multiple_choice/sports_understanding.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/strange_stories.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/strategyqa.yaml | 2 +- .../bigbench/multiple_choice/sufficient_information.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/suicide_risk.yaml | 2 +- .../bigbench/multiple_choice/swahili_english_proverbs.yaml | 2 +- .../bigbench/multiple_choice/swedish_to_german_proverbs.yaml | 2 +- .../tasks/bigbench/multiple_choice/symbol_interpretation.yaml | 2 +- .../tasks/bigbench/multiple_choice/temporal_sequences.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/tense.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/timedial.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/topical_chat.yaml | 4 ---- .../bigbench/multiple_choice/tracking_shuffled_objects.yaml | 2 +- .../tasks/bigbench/multiple_choice/understanding_fables.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/undo_permutation.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/unit_conversion.yaml | 2 +- .../tasks/bigbench/multiple_choice/unit_interpretation.yaml | 2 +- .../multiple_choice/unnatural_in_context_learning.yaml | 4 ---- .../bigbench/multiple_choice/vitaminc_fact_verification.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/what_is_the_tao.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/which_wiki_edit.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/winowhy.yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice/word_sorting.yaml | 4 ---- lm_eval/tasks/bigbench/multiple_choice/word_unscrambling.yaml | 4 ---- 168 files changed, 119 insertions(+), 315 deletions(-) delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/ascii_word_recognition.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/auto_categorization.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/auto_debugging.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/bridging_anaphora_resolution_barqa.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/causal_judgement.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/chess_state_tracking.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/chinese_remainder_theorem.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/codenames.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/conlang_translation.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/few_shot_nlg.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/gem.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/gender_inclusive_sentences_german.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/hindi_question_answering.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_transliterate.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/language_games.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/linguistic_mappings.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/linguistics_puzzles.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/list_functions.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/matrixshapes.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/minute_mysteries_qa.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/modified_arithmetic.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/mult_data_wrangling.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/natural_instructions.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/object_counting.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/operators.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/paragraph_segmentation.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/parsinlu_reading_comprehension.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/physics_questions.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/polish_sequence_labeling.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/qa_wikidata.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/repeat_copy_logic.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/rephrase.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/scientific_press_release.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_in_context_sparc.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_spider.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/simp_turing_concept.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_multiple_choice.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_subtasks.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_multiple_targets_json.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/simple_text_editing.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/sufficient_information.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/tense.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/topical_chat.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/unnatural_in_context_learning.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/word_sorting.yaml delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/word_unscrambling.yaml diff --git a/lm_eval/tasks/bigbench/multiple_choice/abstract_narrative_understanding.yaml b/lm_eval/tasks/bigbench/multiple_choice/abstract_narrative_understanding.yaml index 34cefc2543..5798d5e1d6 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/abstract_narrative_understanding.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/abstract_narrative_understanding.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: abstract_narrative_understanding_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_abstract_narrative_understanding_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml b/lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml index b1e2903c3a..9b83a2ad09 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: anachronisms_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_anachronisms_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/analogical_similarity.yaml b/lm_eval/tasks/bigbench/multiple_choice/analogical_similarity.yaml index 6e20092e9d..d20cfb20d6 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/analogical_similarity.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/analogical_similarity.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: analogical_similarity_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_analogical_similarity_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/analytic_entailment.yaml b/lm_eval/tasks/bigbench/multiple_choice/analytic_entailment.yaml index 9ecf8fb5f3..ee278f54ac 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/analytic_entailment.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/analytic_entailment.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: analytic_entailment_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_analytic_entailment_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml b/lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml index 9b19b92fde..877268c6d8 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: arithmetic_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_arithmetic_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/ascii_word_recognition.yaml b/lm_eval/tasks/bigbench/multiple_choice/ascii_word_recognition.yaml deleted file mode 100644 index 254f115b65..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/ascii_word_recognition.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: ascii_word_recognition_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_ascii_word_recognition_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/authorship_verification.yaml b/lm_eval/tasks/bigbench/multiple_choice/authorship_verification.yaml index 4caeacd4db..3e43911cee 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/authorship_verification.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/authorship_verification.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: authorship_verification_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_authorship_verification_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/auto_categorization.yaml b/lm_eval/tasks/bigbench/multiple_choice/auto_categorization.yaml deleted file mode 100644 index 16e62e69ba..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/auto_categorization.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: auto_categorization_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_auto_categorization_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/auto_debugging.yaml b/lm_eval/tasks/bigbench/multiple_choice/auto_debugging.yaml deleted file mode 100644 index 72db1d8ee2..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/auto_debugging.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: auto_debugging_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_auto_debugging_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml b/lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml index 3c4be30443..ab248ee294 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: bbq_lite_json_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_bbq_lite_json_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/bridging_anaphora_resolution_barqa.yaml b/lm_eval/tasks/bigbench/multiple_choice/bridging_anaphora_resolution_barqa.yaml deleted file mode 100644 index 73448ad929..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/bridging_anaphora_resolution_barqa.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: bridging_anaphora_resolution_barqa_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_bridging_anaphora_resolution_barqa_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/causal_judgement.yaml b/lm_eval/tasks/bigbench/multiple_choice/causal_judgement.yaml deleted file mode 100644 index e8011772b9..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/causal_judgement.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: causal_judgment_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_causal_judgement_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/causal_judgment.yaml b/lm_eval/tasks/bigbench/multiple_choice/causal_judgment.yaml index 1d09f2d463..ce3894c88e 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/causal_judgment.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/causal_judgment.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: causal_judgment_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_causal_judgment_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/cause_and_effect.yaml b/lm_eval/tasks/bigbench/multiple_choice/cause_and_effect.yaml index c39ec27809..9f613ac4d3 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/cause_and_effect.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/cause_and_effect.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: cause_and_effect_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_cause_and_effect_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/checkmate_in_one.yaml b/lm_eval/tasks/bigbench/multiple_choice/checkmate_in_one.yaml index 0a9883d0eb..3729168542 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/checkmate_in_one.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/checkmate_in_one.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: checkmate_in_one_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_checkmate_in_one_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/chess_state_tracking.yaml b/lm_eval/tasks/bigbench/multiple_choice/chess_state_tracking.yaml deleted file mode 100644 index ea29979786..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/chess_state_tracking.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: chess_state_tracking_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_chess_state_tracking_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/chinese_remainder_theorem.yaml b/lm_eval/tasks/bigbench/multiple_choice/chinese_remainder_theorem.yaml deleted file mode 100644 index c24d5761fd..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/chinese_remainder_theorem.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: chinese_remainder_theorem_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_chinese_remainder_theorem_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/cifar10_classification.yaml b/lm_eval/tasks/bigbench/multiple_choice/cifar10_classification.yaml index f5918e604d..1dd79a3170 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/cifar10_classification.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/cifar10_classification.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: cifar10_classification_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_cifar10_classification_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/code_line_description.yaml b/lm_eval/tasks/bigbench/multiple_choice/code_line_description.yaml index 9360f759ce..3e579579f7 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/code_line_description.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/code_line_description.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: code_line_description_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_code_line_description_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/codenames.yaml b/lm_eval/tasks/bigbench/multiple_choice/codenames.yaml deleted file mode 100644 index 5655ea1f5a..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/codenames.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: codenames_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_codenames_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/color.yaml b/lm_eval/tasks/bigbench/multiple_choice/color.yaml index 7350013f1b..eaf5e1e344 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/color.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/color.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: color_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_color_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml b/lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml index bf8f3aca16..595887615f 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: common_morpheme_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_common_morpheme_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml b/lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml index 3ee13b377b..41177eee8e 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: conceptual_combinations_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_conceptual_combinations_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/conlang_translation.yaml b/lm_eval/tasks/bigbench/multiple_choice/conlang_translation.yaml deleted file mode 100644 index e5a28097c2..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/conlang_translation.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: conlang_translation_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_conlang_translation_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml b/lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml index 3bf9d9bf56..b63ab92299 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: contextual_parametric_knowledge_conflicts_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_contextual_parametric_knowledge_conflicts_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml b/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml index 4aca69ad45..2bcc97ad76 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: crash_blossom_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_crash_blossom_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml b/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml index ac7c1820d4..a675efdb29 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: crass_ai_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_crass_ai_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml b/lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml index c187505d30..dcd7e2b267 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: cryobiology_spanish_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_cryobiology_spanish_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml b/lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml deleted file mode 100644 index c5e0519f0f..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: cryptonite_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_cryptonite_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml b/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml index 0b8e694c07..b5e3b94e0f 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: cs_algorithms_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_cs_algorithms_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml b/lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml index 3a77ea4476..b1851f164d 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: dark_humor_detection_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_dark_humor_detection_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/date_understanding.yaml b/lm_eval/tasks/bigbench/multiple_choice/date_understanding.yaml index 2851f0bbbb..5c75486cf5 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/date_understanding.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/date_understanding.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: date_understanding_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_date_understanding_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml b/lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml index 2827232a60..80ad2aa267 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: disambiguation_qa_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_disambiguation_qa_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/discourse_marker_prediction.yaml b/lm_eval/tasks/bigbench/multiple_choice/discourse_marker_prediction.yaml index 5a18733fb7..01089de840 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/discourse_marker_prediction.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/discourse_marker_prediction.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: discourse_marker_prediction_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_discourse_marker_prediction_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml b/lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml deleted file mode 100644 index bf8494cf94..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: disfl_qa_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_disfl_qa_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml b/lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml index 48d6f32e45..33be7d1b57 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: dyck_languages_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_dyck_languages_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/elementary_math_qa.yaml b/lm_eval/tasks/bigbench/multiple_choice/elementary_math_qa.yaml index 64cb58ff24..8f9dea9701 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/elementary_math_qa.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/elementary_math_qa.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: elementary_math_qa_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_elementary_math_qa_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml b/lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml index 0604d97d83..4fc57aa269 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: emoji_movie_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_emoji_movie_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/emojis_emotion_prediction.yaml b/lm_eval/tasks/bigbench/multiple_choice/emojis_emotion_prediction.yaml index ff648d9c8f..c117b3041e 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/emojis_emotion_prediction.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/emojis_emotion_prediction.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: emojis_emotion_prediction_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_emojis_emotion_prediction_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/empirical_judgments.yaml b/lm_eval/tasks/bigbench/multiple_choice/empirical_judgments.yaml index c848740b2c..10fcfaaa41 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/empirical_judgments.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/empirical_judgments.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: empirical_judgments_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_empirical_judgments_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/english_proverbs.yaml b/lm_eval/tasks/bigbench/multiple_choice/english_proverbs.yaml index 8adc12e96e..705eaa864b 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/english_proverbs.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/english_proverbs.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: english_proverbs_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_english_proverbs_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/english_russian_proverbs.yaml b/lm_eval/tasks/bigbench/multiple_choice/english_russian_proverbs.yaml index ed26147aec..9510d14cd7 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/english_russian_proverbs.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/english_russian_proverbs.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: english_russian_proverbs_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_english_russian_proverbs_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity.yaml b/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity.yaml index 24444e55d0..5e298a34b4 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: entailed_polarity_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_entailed_polarity_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity_hindi.yaml b/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity_hindi.yaml index 32878c8ba9..c41565dd63 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity_hindi.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity_hindi.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: entailed_polarity_hindi_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_entailed_polarity_hindi_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/epistemic_reasoning.yaml b/lm_eval/tasks/bigbench/multiple_choice/epistemic_reasoning.yaml index 2c35581af4..22fa9ed806 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/epistemic_reasoning.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/epistemic_reasoning.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: epistemic_reasoning_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_epistemic_reasoning_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/evaluating_information_essentiality.yaml b/lm_eval/tasks/bigbench/multiple_choice/evaluating_information_essentiality.yaml index b85acd95ae..f421ea2f70 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/evaluating_information_essentiality.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/evaluating_information_essentiality.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: evaluating_information_essentiality_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_evaluating_information_essentiality_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/fact_checker.yaml b/lm_eval/tasks/bigbench/multiple_choice/fact_checker.yaml index 4fbed8039d..c126ae2280 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/fact_checker.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/fact_checker.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: fact_checker_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_fact_checker_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/fantasy_reasoning.yaml b/lm_eval/tasks/bigbench/multiple_choice/fantasy_reasoning.yaml index 68a55e4739..721e10d654 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/fantasy_reasoning.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/fantasy_reasoning.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: fantasy_reasoning_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_fantasy_reasoning_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/few_shot_nlg.yaml b/lm_eval/tasks/bigbench/multiple_choice/few_shot_nlg.yaml deleted file mode 100644 index 39fcd9cf49..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/few_shot_nlg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: few_shot_nlg_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_few_shot_nlg_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/figure_of_speech_detection.yaml b/lm_eval/tasks/bigbench/multiple_choice/figure_of_speech_detection.yaml index 68a83956eb..84a88054de 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/figure_of_speech_detection.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/figure_of_speech_detection.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: figure_of_speech_detection_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_figure_of_speech_detection_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/formal_fallacies_syllogisms_negation.yaml b/lm_eval/tasks/bigbench/multiple_choice/formal_fallacies_syllogisms_negation.yaml index 7ff37fd7b3..38f9f9c9da 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/formal_fallacies_syllogisms_negation.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/formal_fallacies_syllogisms_negation.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: formal_fallacies_syllogisms_negation_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_formal_fallacies_syllogisms_negation_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/gem.yaml b/lm_eval/tasks/bigbench/multiple_choice/gem.yaml deleted file mode 100644 index bf81e88006..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/gem.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: gem_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_gem_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/gender_inclusive_sentences_german.yaml b/lm_eval/tasks/bigbench/multiple_choice/gender_inclusive_sentences_german.yaml deleted file mode 100644 index 39eee21af5..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/gender_inclusive_sentences_german.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: gender_inclusive_sentences_german_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_gender_inclusive_sentences_german_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/general_knowledge.yaml b/lm_eval/tasks/bigbench/multiple_choice/general_knowledge.yaml index 8083b8698e..f1922e434b 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/general_knowledge.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/general_knowledge.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: general_knowledge_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_general_knowledge_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/geometric_shapes.yaml b/lm_eval/tasks/bigbench/multiple_choice/geometric_shapes.yaml index 7b80acbf1d..289969bdb8 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/geometric_shapes.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/geometric_shapes.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: geometric_shapes_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_geometric_shapes_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/goal_step_wikihow.yaml b/lm_eval/tasks/bigbench/multiple_choice/goal_step_wikihow.yaml index 6413fb0337..789f79cccb 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/goal_step_wikihow.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/goal_step_wikihow.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: goal_step_wikihow_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_goal_step_wikihow_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/gre_reading_comprehension.yaml b/lm_eval/tasks/bigbench/multiple_choice/gre_reading_comprehension.yaml index 53523c3321..6fd844f33e 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/gre_reading_comprehension.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/gre_reading_comprehension.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: gre_reading_comprehension_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_gre_reading_comprehension_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/hhh_alignment.yaml b/lm_eval/tasks/bigbench/multiple_choice/hhh_alignment.yaml index c5e4f24aa7..aae1ecb429 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/hhh_alignment.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/hhh_alignment.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: hhh_alignment_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_hhh_alignment_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/hindi_question_answering.yaml b/lm_eval/tasks/bigbench/multiple_choice/hindi_question_answering.yaml deleted file mode 100644 index ed1ed27862..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/hindi_question_answering.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: hindi_question_answering_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_hindi_question_answering_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/hindu_knowledge.yaml b/lm_eval/tasks/bigbench/multiple_choice/hindu_knowledge.yaml index 321f751375..3733d45d8f 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/hindu_knowledge.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/hindu_knowledge.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: hindu_knowledge_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_hindu_knowledge_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/hinglish_toxicity.yaml b/lm_eval/tasks/bigbench/multiple_choice/hinglish_toxicity.yaml index 5dac090fd4..0502dca382 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/hinglish_toxicity.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/hinglish_toxicity.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: hinglish_toxicity_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_hinglish_toxicity_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/human_organs_senses.yaml b/lm_eval/tasks/bigbench/multiple_choice/human_organs_senses.yaml index 2fef6d9301..d95bbf9dbb 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/human_organs_senses.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/human_organs_senses.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: human_organs_senses_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_human_organs_senses_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/hyperbaton.yaml b/lm_eval/tasks/bigbench/multiple_choice/hyperbaton.yaml index 34b3771018..9766a3a2e4 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/hyperbaton.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/hyperbaton.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: hyperbaton_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_hyperbaton_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/identify_math_theorems.yaml b/lm_eval/tasks/bigbench/multiple_choice/identify_math_theorems.yaml index f716129d6d..00789ddba9 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/identify_math_theorems.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/identify_math_theorems.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: identify_math_theorems_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_identify_math_theorems_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/identify_odd_metaphor.yaml b/lm_eval/tasks/bigbench/multiple_choice/identify_odd_metaphor.yaml index 93c4c24487..6a1ea57a50 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/identify_odd_metaphor.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/identify_odd_metaphor.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: identify_odd_metaphor_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_identify_odd_metaphor_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/implicatures.yaml b/lm_eval/tasks/bigbench/multiple_choice/implicatures.yaml index 9a26fd55ce..9e71d8b50c 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/implicatures.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/implicatures.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: implicatures_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_implicatures_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/implicit_relations.yaml b/lm_eval/tasks/bigbench/multiple_choice/implicit_relations.yaml index 9bb0844203..2fc417ba44 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/implicit_relations.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/implicit_relations.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: implicit_relations_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_implicit_relations_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/intent_recognition.yaml b/lm_eval/tasks/bigbench/multiple_choice/intent_recognition.yaml index 720ac92ae4..0f1078dc81 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/intent_recognition.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/intent_recognition.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: intent_recognition_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_intent_recognition_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_nli.yaml b/lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_nli.yaml index 89d7742d5e..1a6b0d52d5 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_nli.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_nli.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: international_phonetic_alphabet_nli_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_international_phonetic_alphabet_nli_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_transliterate.yaml b/lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_transliterate.yaml deleted file mode 100644 index c8e866e2cc..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_transliterate.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: international_phonetic_alphabet_transliterate_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_international_phonetic_alphabet_transliterate_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/intersect_geometry.yaml b/lm_eval/tasks/bigbench/multiple_choice/intersect_geometry.yaml index 6014a175f1..2477ad3bfb 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/intersect_geometry.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/intersect_geometry.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: intersect_geometry_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_intersect_geometry_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/irony_identification.yaml b/lm_eval/tasks/bigbench/multiple_choice/irony_identification.yaml index a19ff99e55..447095ac24 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/irony_identification.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/irony_identification.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: irony_identification_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_irony_identification_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/kanji_ascii.yaml b/lm_eval/tasks/bigbench/multiple_choice/kanji_ascii.yaml index a90a828609..97cc4aac61 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/kanji_ascii.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/kanji_ascii.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: kanji_ascii_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_kanji_ascii_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/kannada.yaml b/lm_eval/tasks/bigbench/multiple_choice/kannada.yaml index 910cec477c..aebb585efe 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/kannada.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/kannada.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: kannada_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_kannada_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/key_value_maps.yaml b/lm_eval/tasks/bigbench/multiple_choice/key_value_maps.yaml index 75a673c896..1644ed24cc 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/key_value_maps.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/key_value_maps.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: key_value_maps_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_key_value_maps_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/known_unknowns.yaml b/lm_eval/tasks/bigbench/multiple_choice/known_unknowns.yaml index 1c5f629386..90012e6a3d 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/known_unknowns.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/known_unknowns.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: known_unknowns_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_known_unknowns_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/language_games.yaml b/lm_eval/tasks/bigbench/multiple_choice/language_games.yaml deleted file mode 100644 index 07e2711b45..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/language_games.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: language_games_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_language_games_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/language_identification.yaml b/lm_eval/tasks/bigbench/multiple_choice/language_identification.yaml index 9ea141fb04..5e27f25e4d 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/language_identification.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/language_identification.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: language_identification_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_language_identification_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/linguistic_mappings.yaml b/lm_eval/tasks/bigbench/multiple_choice/linguistic_mappings.yaml deleted file mode 100644 index 50800d9deb..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/linguistic_mappings.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: linguistic_mappings_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_linguistic_mappings_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/linguistics_puzzles.yaml b/lm_eval/tasks/bigbench/multiple_choice/linguistics_puzzles.yaml deleted file mode 100644 index e269cd04e9..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/linguistics_puzzles.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: linguistics_puzzles_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_linguistics_puzzles_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/list_functions.yaml b/lm_eval/tasks/bigbench/multiple_choice/list_functions.yaml deleted file mode 100644 index 4f4f2ca117..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/list_functions.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: list_functions_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_list_functions_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/logic_grid_puzzle.yaml b/lm_eval/tasks/bigbench/multiple_choice/logic_grid_puzzle.yaml index da6a018fa8..ea69d370bf 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/logic_grid_puzzle.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/logic_grid_puzzle.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: logic_grid_puzzle_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_logic_grid_puzzle_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/logical_args.yaml b/lm_eval/tasks/bigbench/multiple_choice/logical_args.yaml index 84f55f6449..3bc8b59310 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/logical_args.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/logical_args.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: logical_args_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_logical_args_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/logical_deduction.yaml b/lm_eval/tasks/bigbench/multiple_choice/logical_deduction.yaml index 592d2afa8b..2b41e9b256 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/logical_deduction.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/logical_deduction.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: logical_deduction_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_logical_deduction_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/logical_fallacy_detection.yaml b/lm_eval/tasks/bigbench/multiple_choice/logical_fallacy_detection.yaml index 1c6411afc8..c7bbe8472e 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/logical_fallacy_detection.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/logical_fallacy_detection.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: logical_fallacy_detection_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_logical_fallacy_detection_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/logical_sequence.yaml b/lm_eval/tasks/bigbench/multiple_choice/logical_sequence.yaml index 6567189438..e03574c113 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/logical_sequence.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/logical_sequence.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: logical_sequence_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_logical_sequence_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/mathematical_induction.yaml b/lm_eval/tasks/bigbench/multiple_choice/mathematical_induction.yaml index 4ed0ad3c0d..b7bf8081e8 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/mathematical_induction.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/mathematical_induction.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: mathematical_induction_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_mathematical_induction_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/matrixshapes.yaml b/lm_eval/tasks/bigbench/multiple_choice/matrixshapes.yaml deleted file mode 100644 index 9facf63967..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/matrixshapes.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: matrixshapes_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_matrixshapes_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/metaphor_boolean.yaml b/lm_eval/tasks/bigbench/multiple_choice/metaphor_boolean.yaml index 7c476c4eb9..e2669ee075 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/metaphor_boolean.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/metaphor_boolean.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: metaphor_boolean_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_metaphor_boolean_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/metaphor_understanding.yaml b/lm_eval/tasks/bigbench/multiple_choice/metaphor_understanding.yaml index 6661a54f7f..58dfee1ee1 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/metaphor_understanding.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/metaphor_understanding.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: metaphor_understanding_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_metaphor_understanding_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/minute_mysteries_qa.yaml b/lm_eval/tasks/bigbench/multiple_choice/minute_mysteries_qa.yaml deleted file mode 100644 index 67109c8cbb..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/minute_mysteries_qa.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: minute_mysteries_qa_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_minute_mysteries_qa_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/misconceptions.yaml b/lm_eval/tasks/bigbench/multiple_choice/misconceptions.yaml index 63d0fcda69..de7c546b1b 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/misconceptions.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/misconceptions.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: misconceptions_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_misconceptions_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/misconceptions_russian.yaml b/lm_eval/tasks/bigbench/multiple_choice/misconceptions_russian.yaml index f9c5db38f8..139266f269 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/misconceptions_russian.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/misconceptions_russian.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: misconceptions_russian_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_misconceptions_russian_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/mnist_ascii.yaml b/lm_eval/tasks/bigbench/multiple_choice/mnist_ascii.yaml index a1b091da92..d2808bfc3e 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/mnist_ascii.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/mnist_ascii.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: mnist_ascii_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_mnist_ascii_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/modified_arithmetic.yaml b/lm_eval/tasks/bigbench/multiple_choice/modified_arithmetic.yaml deleted file mode 100644 index c8a2373588..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/modified_arithmetic.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: modified_arithmetic_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_modified_arithmetic_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/moral_permissibility.yaml b/lm_eval/tasks/bigbench/multiple_choice/moral_permissibility.yaml index 3829555221..bdf202d1c8 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/moral_permissibility.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/moral_permissibility.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: moral_permissibility_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_moral_permissibility_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/movie_dialog_same_or_different.yaml b/lm_eval/tasks/bigbench/multiple_choice/movie_dialog_same_or_different.yaml index 89b93d9d80..536e40e9a9 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/movie_dialog_same_or_different.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/movie_dialog_same_or_different.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: movie_dialog_same_or_different_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_movie_dialog_same_or_different_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/movie_recommendation.yaml b/lm_eval/tasks/bigbench/multiple_choice/movie_recommendation.yaml index 7055028ee9..beded58696 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/movie_recommendation.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/movie_recommendation.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: movie_recommendation_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_movie_recommendation_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/mult_data_wrangling.yaml b/lm_eval/tasks/bigbench/multiple_choice/mult_data_wrangling.yaml deleted file mode 100644 index 17b67bcc6d..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/mult_data_wrangling.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: mult_data_wrangling_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_mult_data_wrangling_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/multiemo.yaml b/lm_eval/tasks/bigbench/multiple_choice/multiemo.yaml index 10ff48ea58..500cac065e 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/multiemo.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/multiemo.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: multiemo_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_multiemo_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/natural_instructions.yaml b/lm_eval/tasks/bigbench/multiple_choice/natural_instructions.yaml deleted file mode 100644 index 4874dd155b..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/natural_instructions.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: natural_instructions_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_natural_instructions_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/navigate.yaml b/lm_eval/tasks/bigbench/multiple_choice/navigate.yaml index e69f27904b..e1466c0695 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/navigate.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/navigate.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: navigate_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_navigate_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/nonsense_words_grammar.yaml b/lm_eval/tasks/bigbench/multiple_choice/nonsense_words_grammar.yaml index 52d25bcacd..608b6e67ae 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/nonsense_words_grammar.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/nonsense_words_grammar.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: nonsense_words_grammar_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_nonsense_words_grammar_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/novel_concepts.yaml b/lm_eval/tasks/bigbench/multiple_choice/novel_concepts.yaml index 3fc74aa9ce..cb2213a750 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/novel_concepts.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/novel_concepts.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: novel_concepts_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_novel_concepts_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/object_counting.yaml b/lm_eval/tasks/bigbench/multiple_choice/object_counting.yaml deleted file mode 100644 index 277d843d7c..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/object_counting.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: object_counting_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_object_counting_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/odd_one_out.yaml b/lm_eval/tasks/bigbench/multiple_choice/odd_one_out.yaml index aaa43e678e..30bbf63972 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/odd_one_out.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/odd_one_out.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: odd_one_out_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_odd_one_out_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/operators.yaml b/lm_eval/tasks/bigbench/multiple_choice/operators.yaml deleted file mode 100644 index 951db6f99e..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/operators.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: operators_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_operators_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/paragraph_segmentation.yaml b/lm_eval/tasks/bigbench/multiple_choice/paragraph_segmentation.yaml deleted file mode 100644 index 2cfc8283e8..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/paragraph_segmentation.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: paragraph_segmentation_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_paragraph_segmentation_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/parsinlu_qa.yaml b/lm_eval/tasks/bigbench/multiple_choice/parsinlu_qa.yaml index 7a9b61fb16..20a880d8ff 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/parsinlu_qa.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/parsinlu_qa.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: parsinlu_qa_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_parsinlu_qa_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/parsinlu_reading_comprehension.yaml b/lm_eval/tasks/bigbench/multiple_choice/parsinlu_reading_comprehension.yaml deleted file mode 100644 index 5fa0eccce9..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/parsinlu_reading_comprehension.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: parsinlu_reading_comprehension_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_parsinlu_reading_comprehension_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/penguins_in_a_table.yaml b/lm_eval/tasks/bigbench/multiple_choice/penguins_in_a_table.yaml index de024e2e7f..c7b5cbb424 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/penguins_in_a_table.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/penguins_in_a_table.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: penguins_in_a_table_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_penguins_in_a_table_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/periodic_elements.yaml b/lm_eval/tasks/bigbench/multiple_choice/periodic_elements.yaml index b7a644f9d7..6bd1314c9c 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/periodic_elements.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/periodic_elements.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: periodic_elements_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_periodic_elements_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/persian_idioms.yaml b/lm_eval/tasks/bigbench/multiple_choice/persian_idioms.yaml index 6fa92ed3a8..9a45e47914 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/persian_idioms.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/persian_idioms.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: persian_idioms_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_persian_idioms_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/phrase_relatedness.yaml b/lm_eval/tasks/bigbench/multiple_choice/phrase_relatedness.yaml index c797aec6e6..e81cb20651 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/phrase_relatedness.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/phrase_relatedness.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: phrase_relatedness_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_phrase_relatedness_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/physical_intuition.yaml b/lm_eval/tasks/bigbench/multiple_choice/physical_intuition.yaml index 089376dd8e..fc54acaf05 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/physical_intuition.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/physical_intuition.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: physical_intuition_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_physical_intuition_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/physics.yaml b/lm_eval/tasks/bigbench/multiple_choice/physics.yaml index bc06f79dff..d4c4ff4baf 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/physics.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/physics.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: physics_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_physics_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/physics_questions.yaml b/lm_eval/tasks/bigbench/multiple_choice/physics_questions.yaml deleted file mode 100644 index 44646f146a..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/physics_questions.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: physics_questions_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_physics_questions_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/play_dialog_same_or_different.yaml b/lm_eval/tasks/bigbench/multiple_choice/play_dialog_same_or_different.yaml index 85aac7f4b6..494c0949a7 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/play_dialog_same_or_different.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/play_dialog_same_or_different.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: play_dialog_same_or_different_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_play_dialog_same_or_different_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/polish_sequence_labeling.yaml b/lm_eval/tasks/bigbench/multiple_choice/polish_sequence_labeling.yaml deleted file mode 100644 index d61345feb5..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/polish_sequence_labeling.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: polish_sequence_labeling_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_polish_sequence_labeling_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/presuppositions_as_nli.yaml b/lm_eval/tasks/bigbench/multiple_choice/presuppositions_as_nli.yaml index 71a56aa805..5ca6d0f47a 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/presuppositions_as_nli.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/presuppositions_as_nli.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: presuppositions_as_nli_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_presuppositions_as_nli_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/qa_wikidata.yaml b/lm_eval/tasks/bigbench/multiple_choice/qa_wikidata.yaml deleted file mode 100644 index 263d61ebe6..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/qa_wikidata.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: qa_wikidata_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_qa_wikidata_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/question_selection.yaml b/lm_eval/tasks/bigbench/multiple_choice/question_selection.yaml index 3b3dd0d70e..4e2a1ef6bb 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/question_selection.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/question_selection.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: question_selection_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_question_selection_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/real_or_fake_text.yaml b/lm_eval/tasks/bigbench/multiple_choice/real_or_fake_text.yaml index 8138791fff..2013e5b9c7 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/real_or_fake_text.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/real_or_fake_text.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: real_or_fake_text_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_real_or_fake_text_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/reasoning_about_colored_objects.yaml b/lm_eval/tasks/bigbench/multiple_choice/reasoning_about_colored_objects.yaml index 3ab6d5e062..92ee379e71 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/reasoning_about_colored_objects.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/reasoning_about_colored_objects.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: reasoning_about_colored_objects_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_reasoning_about_colored_objects_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/repeat_copy_logic.yaml b/lm_eval/tasks/bigbench/multiple_choice/repeat_copy_logic.yaml deleted file mode 100644 index 666aa49b06..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/repeat_copy_logic.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: repeat_copy_logic_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_repeat_copy_logic_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/rephrase.yaml b/lm_eval/tasks/bigbench/multiple_choice/rephrase.yaml deleted file mode 100644 index 49e3cb4b8d..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/rephrase.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: rephrase_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_rephrase_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/riddle_sense.yaml b/lm_eval/tasks/bigbench/multiple_choice/riddle_sense.yaml index 93434e2c6d..3a11b6d599 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/riddle_sense.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/riddle_sense.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: riddle_sense_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_riddle_sense_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/ruin_names.yaml b/lm_eval/tasks/bigbench/multiple_choice/ruin_names.yaml index 32c38ba378..4b7cb5e731 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/ruin_names.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/ruin_names.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: ruin_names_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_ruin_names_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/salient_translation_error_detection.yaml b/lm_eval/tasks/bigbench/multiple_choice/salient_translation_error_detection.yaml index d930e7419a..fd57656994 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/salient_translation_error_detection.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/salient_translation_error_detection.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: salient_translation_error_detection_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_salient_translation_error_detection_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/scientific_press_release.yaml b/lm_eval/tasks/bigbench/multiple_choice/scientific_press_release.yaml deleted file mode 100644 index f23190e7ac..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/scientific_press_release.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: scientific_press_release_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_scientific_press_release_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_in_context_sparc.yaml b/lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_in_context_sparc.yaml deleted file mode 100644 index 00574b2f53..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_in_context_sparc.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: semantic_parsing_in_context_sparc_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_semantic_parsing_in_context_sparc_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_spider.yaml b/lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_spider.yaml deleted file mode 100644 index a988e54c51..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_spider.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: semantic_parsing_spider_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_semantic_parsing_spider_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/sentence_ambiguity.yaml b/lm_eval/tasks/bigbench/multiple_choice/sentence_ambiguity.yaml index 4e4a18f1ad..07282c2882 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/sentence_ambiguity.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/sentence_ambiguity.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: sentence_ambiguity_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_sentence_ambiguity_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/similarities_abstraction.yaml b/lm_eval/tasks/bigbench/multiple_choice/similarities_abstraction.yaml index 82b86d1b47..71408e96ee 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/similarities_abstraction.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/similarities_abstraction.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: similarities_abstraction_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_similarities_abstraction_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/simp_turing_concept.yaml b/lm_eval/tasks/bigbench/multiple_choice/simp_turing_concept.yaml deleted file mode 100644 index 7b1849d5e5..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/simp_turing_concept.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: simp_turing_concept_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_simp_turing_concept_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json.yaml b/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json.yaml deleted file mode 100644 index cd1b61b9b0..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: simple_arithmetic_json_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_simple_arithmetic_json_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_multiple_choice.yaml b/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_multiple_choice.yaml deleted file mode 100644 index 4e63fce945..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_multiple_choice.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: simple_arithmetic_json_multiple_choice_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_simple_arithmetic_json_multiple_choice_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_subtasks.yaml b/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_subtasks.yaml deleted file mode 100644 index 8688512bda..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_subtasks.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: simple_arithmetic_json_subtasks_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_simple_arithmetic_json_subtasks_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_multiple_targets_json.yaml b/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_multiple_targets_json.yaml deleted file mode 100644 index 685ec17c1a..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_multiple_targets_json.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: simple_arithmetic_multiple_targets_json_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_simple_arithmetic_multiple_targets_json_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/simple_ethical_questions.yaml b/lm_eval/tasks/bigbench/multiple_choice/simple_ethical_questions.yaml index 0983381ba2..66db4664d1 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/simple_ethical_questions.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/simple_ethical_questions.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: simple_ethical_questions_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_simple_ethical_questions_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/simple_text_editing.yaml b/lm_eval/tasks/bigbench/multiple_choice/simple_text_editing.yaml deleted file mode 100644 index 13b67888cd..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/simple_text_editing.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: simple_text_editing_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_simple_text_editing_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/snarks.yaml b/lm_eval/tasks/bigbench/multiple_choice/snarks.yaml index 3e79f1ce10..7819aa4c17 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/snarks.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/snarks.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: snarks_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_snarks_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/social_iqa.yaml b/lm_eval/tasks/bigbench/multiple_choice/social_iqa.yaml index a4da50c90c..8ec2dd1dd2 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/social_iqa.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/social_iqa.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: social_iqa_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_social_iqa_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/social_support.yaml b/lm_eval/tasks/bigbench/multiple_choice/social_support.yaml index 1b3bd5936e..247f558a33 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/social_support.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/social_support.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: social_support_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_social_support_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/sports_understanding.yaml b/lm_eval/tasks/bigbench/multiple_choice/sports_understanding.yaml index e5a123fc93..ae2ba852ee 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/sports_understanding.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/sports_understanding.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: sports_understanding_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_sports_understanding_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/strange_stories.yaml b/lm_eval/tasks/bigbench/multiple_choice/strange_stories.yaml index 30877750e5..bed6b55f29 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/strange_stories.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/strange_stories.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: strange_stories_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_strange_stories_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/strategyqa.yaml b/lm_eval/tasks/bigbench/multiple_choice/strategyqa.yaml index f988071bad..f1d6ae3b2e 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/strategyqa.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/strategyqa.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: strategyqa_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_b_yaml task: bigbench_strategyqa_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/sufficient_information.yaml b/lm_eval/tasks/bigbench/multiple_choice/sufficient_information.yaml deleted file mode 100644 index f53d677caa..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/sufficient_information.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: sufficient_information_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_sufficient_information_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/suicide_risk.yaml b/lm_eval/tasks/bigbench/multiple_choice/suicide_risk.yaml index ecf7465ff2..138c2dff78 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/suicide_risk.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/suicide_risk.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: suicide_risk_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_suicide_risk_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/swahili_english_proverbs.yaml b/lm_eval/tasks/bigbench/multiple_choice/swahili_english_proverbs.yaml index 40103274e9..46d66147c4 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/swahili_english_proverbs.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/swahili_english_proverbs.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: swahili_english_proverbs_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_swahili_english_proverbs_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/swedish_to_german_proverbs.yaml b/lm_eval/tasks/bigbench/multiple_choice/swedish_to_german_proverbs.yaml index d2f31d3c03..a08c437e07 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/swedish_to_german_proverbs.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/swedish_to_german_proverbs.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: swedish_to_german_proverbs_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_swedish_to_german_proverbs_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/symbol_interpretation.yaml b/lm_eval/tasks/bigbench/multiple_choice/symbol_interpretation.yaml index 98e3d5b369..1d519f3e72 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/symbol_interpretation.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/symbol_interpretation.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: symbol_interpretation_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_symbol_interpretation_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/temporal_sequences.yaml b/lm_eval/tasks/bigbench/multiple_choice/temporal_sequences.yaml index abd8834b0f..046e4eeba4 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/temporal_sequences.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/temporal_sequences.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: temporal_sequences_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_temporal_sequences_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/tense.yaml b/lm_eval/tasks/bigbench/multiple_choice/tense.yaml deleted file mode 100644 index 6a2676f087..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/tense.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: tense_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_tense_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/timedial.yaml b/lm_eval/tasks/bigbench/multiple_choice/timedial.yaml index 350d4e786c..ea069173bd 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/timedial.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/timedial.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: timedial_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_timedial_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/topical_chat.yaml b/lm_eval/tasks/bigbench/multiple_choice/topical_chat.yaml deleted file mode 100644 index b9a03639a2..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/topical_chat.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: topical_chat_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_topical_chat_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/tracking_shuffled_objects.yaml b/lm_eval/tasks/bigbench/multiple_choice/tracking_shuffled_objects.yaml index f9aa366b7a..62ebc5d610 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/tracking_shuffled_objects.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/tracking_shuffled_objects.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: tracking_shuffled_objects_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_tracking_shuffled_objects_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/understanding_fables.yaml b/lm_eval/tasks/bigbench/multiple_choice/understanding_fables.yaml index 263793af42..5cdd779d7b 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/understanding_fables.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/understanding_fables.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: understanding_fables_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_understanding_fables_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/undo_permutation.yaml b/lm_eval/tasks/bigbench/multiple_choice/undo_permutation.yaml index f7e1feb052..bfe91a2b08 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/undo_permutation.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/undo_permutation.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: undo_permutation_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_undo_permutation_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/unit_conversion.yaml b/lm_eval/tasks/bigbench/multiple_choice/unit_conversion.yaml index 21a67c437b..d1c50a6523 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/unit_conversion.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/unit_conversion.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: unit_conversion_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_unit_conversion_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/unit_interpretation.yaml b/lm_eval/tasks/bigbench/multiple_choice/unit_interpretation.yaml index 68614cfddf..7d87db233a 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/unit_interpretation.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/unit_interpretation.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: unit_interpretation_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_unit_interpretation_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/unnatural_in_context_learning.yaml b/lm_eval/tasks/bigbench/multiple_choice/unnatural_in_context_learning.yaml deleted file mode 100644 index 45943005c7..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/unnatural_in_context_learning.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: unnatural_in_context_learning_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_unnatural_in_context_learning_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/vitaminc_fact_verification.yaml b/lm_eval/tasks/bigbench/multiple_choice/vitaminc_fact_verification.yaml index 84305bf33b..42db495738 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/vitaminc_fact_verification.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/vitaminc_fact_verification.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: vitaminc_fact_verification_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_vitaminc_fact_verification_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/what_is_the_tao.yaml b/lm_eval/tasks/bigbench/multiple_choice/what_is_the_tao.yaml index 7879d1661e..8c4e15d3ad 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/what_is_the_tao.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/what_is_the_tao.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: what_is_the_tao_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_what_is_the_tao_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/which_wiki_edit.yaml b/lm_eval/tasks/bigbench/multiple_choice/which_wiki_edit.yaml index 3dbfb0305e..a08b9b3efe 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/which_wiki_edit.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/which_wiki_edit.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: which_wiki_edit_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_which_wiki_edit_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/winowhy.yaml b/lm_eval/tasks/bigbench/multiple_choice/winowhy.yaml index 98bc6e4b23..23ffc4bdd9 100644 --- a/lm_eval/tasks/bigbench/multiple_choice/winowhy.yaml +++ b/lm_eval/tasks/bigbench/multiple_choice/winowhy.yaml @@ -1,4 +1,4 @@ # Generated by utils.py dataset_name: winowhy_zero_shot -include: ../multiple_choice_template_yaml +include: ../multiple_choice_template_a_yaml task: bigbench_winowhy_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/word_sorting.yaml b/lm_eval/tasks/bigbench/multiple_choice/word_sorting.yaml deleted file mode 100644 index 71e79ae363..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/word_sorting.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: word_sorting_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_word_sorting_multiple_choice diff --git a/lm_eval/tasks/bigbench/multiple_choice/word_unscrambling.yaml b/lm_eval/tasks/bigbench/multiple_choice/word_unscrambling.yaml deleted file mode 100644 index bbfeb14458..0000000000 --- a/lm_eval/tasks/bigbench/multiple_choice/word_unscrambling.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by utils.py -dataset_name: word_unscrambling_zero_shot -include: ../multiple_choice_template_yaml -task: bigbench_word_unscrambling_multiple_choice From 86098661f5b1b97cbc54cd1fe9987ec3671951c2 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Tue, 9 Apr 2024 11:00:43 +0000 Subject: [PATCH 5/7] udpate --- lm_eval/tasks/bigbench/generate_tasks.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lm_eval/tasks/bigbench/generate_tasks.py b/lm_eval/tasks/bigbench/generate_tasks.py index 5ab36ce458..5e7923dd1e 100644 --- a/lm_eval/tasks/bigbench/generate_tasks.py +++ b/lm_eval/tasks/bigbench/generate_tasks.py @@ -1,4 +1,5 @@ import os + import datasets import yaml @@ -178,6 +179,7 @@ "simple_arithmetic_multiple_targets_json", ] + def main() -> None: for path, task_type in zip( ["multiple_choice", "generate_until"], @@ -192,13 +194,17 @@ def main() -> None: print(f"Checking {task} for multiple choices") if task in skip_tasks: continue - data = datasets.load_dataset("hails/bigbench", task+"_zero_shot") - multiple_choice_targets = data['default'][0]["multiple_choice_targets"] + data = datasets.load_dataset("hails/bigbench", task + "_zero_shot") + multiple_choice_targets = data["default"][0][ + "multiple_choice_targets" + ] if len(multiple_choice_targets) == 0: continue else: template_file = "multiple_choice_template_b_yaml" - if set(data['default'][0]["targets"]) < set(multiple_choice_targets): + if set(data["default"][0]["targets"]) < set( + multiple_choice_targets + ): template_file = "multiple_choice_template_a_yaml" with open(f"{path}/{file_name}", "w", encoding="utf-8") as f: From 6adb5e69236ab948299b15a397f812eb62b9c42c Mon Sep 17 00:00:00 2001 From: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com> Date: Fri, 24 May 2024 11:59:18 -0400 Subject: [PATCH 6/7] Update multiple_choice_template_b_yaml --- lm_eval/tasks/bigbench/multiple_choice_template_b_yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml b/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml index 36b4eb921f..2900103e0d 100644 --- a/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml +++ b/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml @@ -12,4 +12,4 @@ metric_list: - metric: acc # TODO: brier score and other metrics metadata: - version: 0.0 + version: 1.0 From 3de4ccf59146fb665e2479d49f50cf498df576fd Mon Sep 17 00:00:00 2001 From: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com> Date: Fri, 24 May 2024 11:59:33 -0400 Subject: [PATCH 7/7] Update multiple_choice_template_a_yaml --- lm_eval/tasks/bigbench/multiple_choice_template_a_yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml b/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml index 10fce5c1c3..4b5f9e8929 100644 --- a/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml +++ b/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml @@ -12,4 +12,4 @@ metric_list: - metric: acc # TODO: brier score and other metrics metadata: - version: 0.0 + version: 1.0