From 75fc4e172ac0ba2b64c61de11c77a583cb738193 Mon Sep 17 00:00:00 2001
From: lintangsutawika <lintang@eleuther.ai>
Date: Mon, 8 Apr 2024 16:53:47 +0000
Subject: [PATCH 1/7] edit process multiple-choice

---
 lm_eval/tasks/bigbench/generate_tasks.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/lm_eval/tasks/bigbench/generate_tasks.py b/lm_eval/tasks/bigbench/generate_tasks.py
index 169c664655..5ab36ce458 100644
--- a/lm_eval/tasks/bigbench/generate_tasks.py
+++ b/lm_eval/tasks/bigbench/generate_tasks.py
@@ -1,5 +1,5 @@
 import os
-
+import datasets
 import yaml
 
 
@@ -173,6 +173,10 @@
     "word_unscrambling",
 ]
 
+skip_tasks = [
+    "simple_arithmetic_json_multiple_choice",
+    "simple_arithmetic_multiple_targets_json",
+]
 
 def main() -> None:
     for path, task_type in zip(
@@ -183,11 +187,25 @@ def main() -> None:
         for task in all_subtasks:
             file_name = f"{task}.yaml"
             try:
+                template_file = task_type
+                if path == "multiple_choice":
+                    print(f"Checking {task} for multiple choices")
+                    if task in skip_tasks:
+                        continue
+                    data = datasets.load_dataset("hails/bigbench", task+"_zero_shot")
+                    multiple_choice_targets = data['default'][0]["multiple_choice_targets"]
+                    if len(multiple_choice_targets) == 0:
+                        continue
+                    else:
+                        template_file = "multiple_choice_template_b_yaml"
+                        if set(data['default'][0]["targets"]) < set(multiple_choice_targets):
+                            template_file = "multiple_choice_template_a_yaml"
+
                 with open(f"{path}/{file_name}", "w", encoding="utf-8") as f:
                     f.write("# Generated by utils.py\n")
                     yaml.dump(
                         {
-                            "include": f"../{task_type}",
+                            "include": f"../{template_file}",
                             "task": "bigbench_"
                             + task
                             + "_{}".format(task_type.split("_template_yaml")[0]),

From 1480949847614236d2be14a81d5b87c44cdffee5 Mon Sep 17 00:00:00 2001
From: lintangsutawika <lintang@eleuther.ai>
Date: Mon, 8 Apr 2024 16:56:07 +0000
Subject: [PATCH 2/7] split template yaml

---
 .../bigbench/multiple_choice_template_a_yaml      | 15 +++++++++++++++
 .../bigbench/multiple_choice_template_b_yaml      | 15 +++++++++++++++
 2 files changed, 30 insertions(+)
 create mode 100644 lm_eval/tasks/bigbench/multiple_choice_template_a_yaml
 create mode 100644 lm_eval/tasks/bigbench/multiple_choice_template_b_yaml

diff --git a/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml b/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml
new file mode 100644
index 0000000000..10fce5c1c3
--- /dev/null
+++ b/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml
@@ -0,0 +1,15 @@
+group: bigbench_multiple_choice
+dataset_path: hails/bigbench
+dataset_kwargs:
+  # num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods
+  # subtask_name: null
+output_type: multiple_choice
+test_split: default
+doc_to_text: inputs
+doc_to_target: "{{multiple_choice_targets.index(targets[0])}}"
+doc_to_choice: "{{multiple_choice_targets}}"
+metric_list:
+  - metric: acc
+  # TODO: brier score and other metrics
+metadata:
+  version: 0.0
diff --git a/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml b/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml
new file mode 100644
index 0000000000..36b4eb921f
--- /dev/null
+++ b/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml
@@ -0,0 +1,15 @@
+group: bigbench_multiple_choice
+dataset_path: hails/bigbench
+dataset_kwargs:
+  # num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods
+  # subtask_name: null
+output_type: multiple_choice
+test_split: default
+doc_to_text: inputs
+doc_to_target: "{{multiple_choice_scores.index(1)}}"
+doc_to_choice: "{{multiple_choice_targets}}"
+metric_list:
+  - metric: acc
+  # TODO: brier score and other metrics
+metadata:
+  version: 0.0

From f82d21a6d2eb254d40eae70d7bfe928fc13f9dc6 Mon Sep 17 00:00:00 2001
From: lintangsutawika <lintang@eleuther.ai>
Date: Mon, 8 Apr 2024 16:56:19 +0000
Subject: [PATCH 3/7] remove

---
 .../tasks/bigbench/multiple_choice_template_yaml  | 15 ---------------
 1 file changed, 15 deletions(-)
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice_template_yaml

diff --git a/lm_eval/tasks/bigbench/multiple_choice_template_yaml b/lm_eval/tasks/bigbench/multiple_choice_template_yaml
deleted file mode 100644
index 10fce5c1c3..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice_template_yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-group: bigbench_multiple_choice
-dataset_path: hails/bigbench
-dataset_kwargs:
-  # num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods
-  # subtask_name: null
-output_type: multiple_choice
-test_split: default
-doc_to_text: inputs
-doc_to_target: "{{multiple_choice_targets.index(targets[0])}}"
-doc_to_choice: "{{multiple_choice_targets}}"
-metric_list:
-  - metric: acc
-  # TODO: brier score and other metrics
-metadata:
-  version: 0.0

From cf8054686830fc5f7a8d807a0e1a7961ef550171 Mon Sep 17 00:00:00 2001
From: lintangsutawika <lintang@eleuther.ai>
Date: Mon, 8 Apr 2024 16:56:55 +0000
Subject: [PATCH 4/7] modified multiple_choice tasks

---
 .../multiple_choice/abstract_narrative_understanding.yaml     | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml      | 2 +-
 .../tasks/bigbench/multiple_choice/analogical_similarity.yaml | 2 +-
 .../tasks/bigbench/multiple_choice/analytic_entailment.yaml   | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml        | 2 +-
 .../bigbench/multiple_choice/ascii_word_recognition.yaml      | 4 ----
 .../bigbench/multiple_choice/authorship_verification.yaml     | 2 +-
 .../tasks/bigbench/multiple_choice/auto_categorization.yaml   | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/auto_debugging.yaml    | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml     | 2 +-
 .../multiple_choice/bridging_anaphora_resolution_barqa.yaml   | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/causal_judgement.yaml  | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/causal_judgment.yaml   | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/cause_and_effect.yaml  | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/checkmate_in_one.yaml  | 2 +-
 .../tasks/bigbench/multiple_choice/chess_state_tracking.yaml  | 4 ----
 .../bigbench/multiple_choice/chinese_remainder_theorem.yaml   | 4 ----
 .../bigbench/multiple_choice/cifar10_classification.yaml      | 2 +-
 .../tasks/bigbench/multiple_choice/code_line_description.yaml | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/codenames.yaml         | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/color.yaml             | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml   | 2 +-
 .../bigbench/multiple_choice/conceptual_combinations.yaml     | 2 +-
 .../tasks/bigbench/multiple_choice/conlang_translation.yaml   | 4 ----
 .../contextual_parametric_knowledge_conflicts.yaml            | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml     | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml          | 2 +-
 .../tasks/bigbench/multiple_choice/cryobiology_spanish.yaml   | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml        | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml     | 2 +-
 .../tasks/bigbench/multiple_choice/dark_humor_detection.yaml  | 2 +-
 .../tasks/bigbench/multiple_choice/date_understanding.yaml    | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml | 2 +-
 .../bigbench/multiple_choice/discourse_marker_prediction.yaml | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml          | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml    | 2 +-
 .../tasks/bigbench/multiple_choice/elementary_math_qa.yaml    | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml       | 2 +-
 .../bigbench/multiple_choice/emojis_emotion_prediction.yaml   | 2 +-
 .../tasks/bigbench/multiple_choice/empirical_judgments.yaml   | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/english_proverbs.yaml  | 2 +-
 .../bigbench/multiple_choice/english_russian_proverbs.yaml    | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/entailed_polarity.yaml | 2 +-
 .../bigbench/multiple_choice/entailed_polarity_hindi.yaml     | 2 +-
 .../tasks/bigbench/multiple_choice/epistemic_reasoning.yaml   | 2 +-
 .../multiple_choice/evaluating_information_essentiality.yaml  | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/fact_checker.yaml      | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/fantasy_reasoning.yaml | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/few_shot_nlg.yaml      | 4 ----
 .../bigbench/multiple_choice/figure_of_speech_detection.yaml  | 2 +-
 .../multiple_choice/formal_fallacies_syllogisms_negation.yaml | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/gem.yaml               | 4 ----
 .../multiple_choice/gender_inclusive_sentences_german.yaml    | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/general_knowledge.yaml | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/geometric_shapes.yaml  | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/goal_step_wikihow.yaml | 2 +-
 .../bigbench/multiple_choice/gre_reading_comprehension.yaml   | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/hhh_alignment.yaml     | 2 +-
 .../bigbench/multiple_choice/hindi_question_answering.yaml    | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/hindu_knowledge.yaml   | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/hinglish_toxicity.yaml | 2 +-
 .../tasks/bigbench/multiple_choice/human_organs_senses.yaml   | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/hyperbaton.yaml        | 2 +-
 .../bigbench/multiple_choice/identify_math_theorems.yaml      | 2 +-
 .../tasks/bigbench/multiple_choice/identify_odd_metaphor.yaml | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/implicatures.yaml      | 2 +-
 .../tasks/bigbench/multiple_choice/implicit_relations.yaml    | 2 +-
 .../tasks/bigbench/multiple_choice/intent_recognition.yaml    | 2 +-
 .../multiple_choice/international_phonetic_alphabet_nli.yaml  | 2 +-
 .../international_phonetic_alphabet_transliterate.yaml        | 4 ----
 .../tasks/bigbench/multiple_choice/intersect_geometry.yaml    | 2 +-
 .../tasks/bigbench/multiple_choice/irony_identification.yaml  | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/kanji_ascii.yaml       | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/kannada.yaml           | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/key_value_maps.yaml    | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/known_unknowns.yaml    | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/language_games.yaml    | 4 ----
 .../bigbench/multiple_choice/language_identification.yaml     | 2 +-
 .../tasks/bigbench/multiple_choice/linguistic_mappings.yaml   | 4 ----
 .../tasks/bigbench/multiple_choice/linguistics_puzzles.yaml   | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/list_functions.yaml    | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/logic_grid_puzzle.yaml | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/logical_args.yaml      | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/logical_deduction.yaml | 2 +-
 .../bigbench/multiple_choice/logical_fallacy_detection.yaml   | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/logical_sequence.yaml  | 2 +-
 .../bigbench/multiple_choice/mathematical_induction.yaml      | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/matrixshapes.yaml      | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/metaphor_boolean.yaml  | 2 +-
 .../bigbench/multiple_choice/metaphor_understanding.yaml      | 2 +-
 .../tasks/bigbench/multiple_choice/minute_mysteries_qa.yaml   | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/misconceptions.yaml    | 2 +-
 .../bigbench/multiple_choice/misconceptions_russian.yaml      | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/mnist_ascii.yaml       | 2 +-
 .../tasks/bigbench/multiple_choice/modified_arithmetic.yaml   | 4 ----
 .../tasks/bigbench/multiple_choice/moral_permissibility.yaml  | 2 +-
 .../multiple_choice/movie_dialog_same_or_different.yaml       | 2 +-
 .../tasks/bigbench/multiple_choice/movie_recommendation.yaml  | 2 +-
 .../tasks/bigbench/multiple_choice/mult_data_wrangling.yaml   | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/multiemo.yaml          | 2 +-
 .../tasks/bigbench/multiple_choice/natural_instructions.yaml  | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/navigate.yaml          | 2 +-
 .../bigbench/multiple_choice/nonsense_words_grammar.yaml      | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/novel_concepts.yaml    | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/object_counting.yaml   | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/odd_one_out.yaml       | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/operators.yaml         | 4 ----
 .../bigbench/multiple_choice/paragraph_segmentation.yaml      | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/parsinlu_qa.yaml       | 2 +-
 .../multiple_choice/parsinlu_reading_comprehension.yaml       | 4 ----
 .../tasks/bigbench/multiple_choice/penguins_in_a_table.yaml   | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/periodic_elements.yaml | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/persian_idioms.yaml    | 2 +-
 .../tasks/bigbench/multiple_choice/phrase_relatedness.yaml    | 2 +-
 .../tasks/bigbench/multiple_choice/physical_intuition.yaml    | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/physics.yaml           | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/physics_questions.yaml | 4 ----
 .../multiple_choice/play_dialog_same_or_different.yaml        | 2 +-
 .../bigbench/multiple_choice/polish_sequence_labeling.yaml    | 4 ----
 .../bigbench/multiple_choice/presuppositions_as_nli.yaml      | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/qa_wikidata.yaml       | 4 ----
 .../tasks/bigbench/multiple_choice/question_selection.yaml    | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/real_or_fake_text.yaml | 2 +-
 .../multiple_choice/reasoning_about_colored_objects.yaml      | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/repeat_copy_logic.yaml | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/rephrase.yaml          | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/riddle_sense.yaml      | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/ruin_names.yaml        | 2 +-
 .../multiple_choice/salient_translation_error_detection.yaml  | 2 +-
 .../bigbench/multiple_choice/scientific_press_release.yaml    | 4 ----
 .../multiple_choice/semantic_parsing_in_context_sparc.yaml    | 4 ----
 .../bigbench/multiple_choice/semantic_parsing_spider.yaml     | 4 ----
 .../tasks/bigbench/multiple_choice/sentence_ambiguity.yaml    | 2 +-
 .../bigbench/multiple_choice/similarities_abstraction.yaml    | 2 +-
 .../tasks/bigbench/multiple_choice/simp_turing_concept.yaml   | 4 ----
 .../bigbench/multiple_choice/simple_arithmetic_json.yaml      | 4 ----
 .../simple_arithmetic_json_multiple_choice.yaml               | 4 ----
 .../multiple_choice/simple_arithmetic_json_subtasks.yaml      | 4 ----
 .../simple_arithmetic_multiple_targets_json.yaml              | 4 ----
 .../bigbench/multiple_choice/simple_ethical_questions.yaml    | 2 +-
 .../tasks/bigbench/multiple_choice/simple_text_editing.yaml   | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/snarks.yaml            | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/social_iqa.yaml        | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/social_support.yaml    | 2 +-
 .../tasks/bigbench/multiple_choice/sports_understanding.yaml  | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/strange_stories.yaml   | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/strategyqa.yaml        | 2 +-
 .../bigbench/multiple_choice/sufficient_information.yaml      | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/suicide_risk.yaml      | 2 +-
 .../bigbench/multiple_choice/swahili_english_proverbs.yaml    | 2 +-
 .../bigbench/multiple_choice/swedish_to_german_proverbs.yaml  | 2 +-
 .../tasks/bigbench/multiple_choice/symbol_interpretation.yaml | 2 +-
 .../tasks/bigbench/multiple_choice/temporal_sequences.yaml    | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/tense.yaml             | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/timedial.yaml          | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/topical_chat.yaml      | 4 ----
 .../bigbench/multiple_choice/tracking_shuffled_objects.yaml   | 2 +-
 .../tasks/bigbench/multiple_choice/understanding_fables.yaml  | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/undo_permutation.yaml  | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/unit_conversion.yaml   | 2 +-
 .../tasks/bigbench/multiple_choice/unit_interpretation.yaml   | 2 +-
 .../multiple_choice/unnatural_in_context_learning.yaml        | 4 ----
 .../bigbench/multiple_choice/vitaminc_fact_verification.yaml  | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/what_is_the_tao.yaml   | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/which_wiki_edit.yaml   | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/winowhy.yaml           | 2 +-
 lm_eval/tasks/bigbench/multiple_choice/word_sorting.yaml      | 4 ----
 lm_eval/tasks/bigbench/multiple_choice/word_unscrambling.yaml | 4 ----
 168 files changed, 119 insertions(+), 315 deletions(-)
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/ascii_word_recognition.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/auto_categorization.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/auto_debugging.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/bridging_anaphora_resolution_barqa.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/causal_judgement.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/chess_state_tracking.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/chinese_remainder_theorem.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/codenames.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/conlang_translation.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/few_shot_nlg.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/gem.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/gender_inclusive_sentences_german.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/hindi_question_answering.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_transliterate.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/language_games.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/linguistic_mappings.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/linguistics_puzzles.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/list_functions.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/matrixshapes.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/minute_mysteries_qa.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/modified_arithmetic.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/mult_data_wrangling.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/natural_instructions.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/object_counting.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/operators.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/paragraph_segmentation.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/parsinlu_reading_comprehension.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/physics_questions.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/polish_sequence_labeling.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/qa_wikidata.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/repeat_copy_logic.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/rephrase.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/scientific_press_release.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_in_context_sparc.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_spider.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/simp_turing_concept.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_multiple_choice.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_subtasks.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_multiple_targets_json.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/simple_text_editing.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/sufficient_information.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/tense.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/topical_chat.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/unnatural_in_context_learning.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/word_sorting.yaml
 delete mode 100644 lm_eval/tasks/bigbench/multiple_choice/word_unscrambling.yaml

diff --git a/lm_eval/tasks/bigbench/multiple_choice/abstract_narrative_understanding.yaml b/lm_eval/tasks/bigbench/multiple_choice/abstract_narrative_understanding.yaml
index 34cefc2543..5798d5e1d6 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/abstract_narrative_understanding.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/abstract_narrative_understanding.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: abstract_narrative_understanding_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_abstract_narrative_understanding_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml b/lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml
index b1e2903c3a..9b83a2ad09 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: anachronisms_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_anachronisms_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/analogical_similarity.yaml b/lm_eval/tasks/bigbench/multiple_choice/analogical_similarity.yaml
index 6e20092e9d..d20cfb20d6 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/analogical_similarity.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/analogical_similarity.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: analogical_similarity_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_analogical_similarity_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/analytic_entailment.yaml b/lm_eval/tasks/bigbench/multiple_choice/analytic_entailment.yaml
index 9ecf8fb5f3..ee278f54ac 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/analytic_entailment.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/analytic_entailment.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: analytic_entailment_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_analytic_entailment_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml b/lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml
index 9b19b92fde..877268c6d8 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: arithmetic_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_arithmetic_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/ascii_word_recognition.yaml b/lm_eval/tasks/bigbench/multiple_choice/ascii_word_recognition.yaml
deleted file mode 100644
index 254f115b65..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/ascii_word_recognition.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: ascii_word_recognition_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_ascii_word_recognition_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/authorship_verification.yaml b/lm_eval/tasks/bigbench/multiple_choice/authorship_verification.yaml
index 4caeacd4db..3e43911cee 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/authorship_verification.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/authorship_verification.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: authorship_verification_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_authorship_verification_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/auto_categorization.yaml b/lm_eval/tasks/bigbench/multiple_choice/auto_categorization.yaml
deleted file mode 100644
index 16e62e69ba..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/auto_categorization.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: auto_categorization_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_auto_categorization_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/auto_debugging.yaml b/lm_eval/tasks/bigbench/multiple_choice/auto_debugging.yaml
deleted file mode 100644
index 72db1d8ee2..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/auto_debugging.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: auto_debugging_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_auto_debugging_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml b/lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml
index 3c4be30443..ab248ee294 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: bbq_lite_json_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_bbq_lite_json_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/bridging_anaphora_resolution_barqa.yaml b/lm_eval/tasks/bigbench/multiple_choice/bridging_anaphora_resolution_barqa.yaml
deleted file mode 100644
index 73448ad929..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/bridging_anaphora_resolution_barqa.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: bridging_anaphora_resolution_barqa_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_bridging_anaphora_resolution_barqa_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/causal_judgement.yaml b/lm_eval/tasks/bigbench/multiple_choice/causal_judgement.yaml
deleted file mode 100644
index e8011772b9..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/causal_judgement.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: causal_judgment_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_causal_judgement_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/causal_judgment.yaml b/lm_eval/tasks/bigbench/multiple_choice/causal_judgment.yaml
index 1d09f2d463..ce3894c88e 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/causal_judgment.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/causal_judgment.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: causal_judgment_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_causal_judgment_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/cause_and_effect.yaml b/lm_eval/tasks/bigbench/multiple_choice/cause_and_effect.yaml
index c39ec27809..9f613ac4d3 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/cause_and_effect.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/cause_and_effect.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: cause_and_effect_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_cause_and_effect_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/checkmate_in_one.yaml b/lm_eval/tasks/bigbench/multiple_choice/checkmate_in_one.yaml
index 0a9883d0eb..3729168542 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/checkmate_in_one.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/checkmate_in_one.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: checkmate_in_one_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_checkmate_in_one_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/chess_state_tracking.yaml b/lm_eval/tasks/bigbench/multiple_choice/chess_state_tracking.yaml
deleted file mode 100644
index ea29979786..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/chess_state_tracking.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: chess_state_tracking_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_chess_state_tracking_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/chinese_remainder_theorem.yaml b/lm_eval/tasks/bigbench/multiple_choice/chinese_remainder_theorem.yaml
deleted file mode 100644
index c24d5761fd..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/chinese_remainder_theorem.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: chinese_remainder_theorem_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_chinese_remainder_theorem_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/cifar10_classification.yaml b/lm_eval/tasks/bigbench/multiple_choice/cifar10_classification.yaml
index f5918e604d..1dd79a3170 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/cifar10_classification.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/cifar10_classification.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: cifar10_classification_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_cifar10_classification_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/code_line_description.yaml b/lm_eval/tasks/bigbench/multiple_choice/code_line_description.yaml
index 9360f759ce..3e579579f7 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/code_line_description.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/code_line_description.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: code_line_description_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_code_line_description_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/codenames.yaml b/lm_eval/tasks/bigbench/multiple_choice/codenames.yaml
deleted file mode 100644
index 5655ea1f5a..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/codenames.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: codenames_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_codenames_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/color.yaml b/lm_eval/tasks/bigbench/multiple_choice/color.yaml
index 7350013f1b..eaf5e1e344 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/color.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/color.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: color_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_color_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml b/lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml
index bf8f3aca16..595887615f 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: common_morpheme_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_common_morpheme_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml b/lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml
index 3ee13b377b..41177eee8e 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: conceptual_combinations_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_conceptual_combinations_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/conlang_translation.yaml b/lm_eval/tasks/bigbench/multiple_choice/conlang_translation.yaml
deleted file mode 100644
index e5a28097c2..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/conlang_translation.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: conlang_translation_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_conlang_translation_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml b/lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml
index 3bf9d9bf56..b63ab92299 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: contextual_parametric_knowledge_conflicts_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_contextual_parametric_knowledge_conflicts_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml b/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml
index 4aca69ad45..2bcc97ad76 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: crash_blossom_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_crash_blossom_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml b/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml
index ac7c1820d4..a675efdb29 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: crass_ai_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_crass_ai_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml b/lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml
index c187505d30..dcd7e2b267 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: cryobiology_spanish_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_cryobiology_spanish_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml b/lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml
deleted file mode 100644
index c5e0519f0f..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: cryptonite_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_cryptonite_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml b/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml
index 0b8e694c07..b5e3b94e0f 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: cs_algorithms_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_cs_algorithms_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml b/lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml
index 3a77ea4476..b1851f164d 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: dark_humor_detection_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_dark_humor_detection_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/date_understanding.yaml b/lm_eval/tasks/bigbench/multiple_choice/date_understanding.yaml
index 2851f0bbbb..5c75486cf5 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/date_understanding.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/date_understanding.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: date_understanding_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_date_understanding_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml b/lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml
index 2827232a60..80ad2aa267 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: disambiguation_qa_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_disambiguation_qa_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/discourse_marker_prediction.yaml b/lm_eval/tasks/bigbench/multiple_choice/discourse_marker_prediction.yaml
index 5a18733fb7..01089de840 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/discourse_marker_prediction.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/discourse_marker_prediction.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: discourse_marker_prediction_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_discourse_marker_prediction_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml b/lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml
deleted file mode 100644
index bf8494cf94..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: disfl_qa_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_disfl_qa_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml b/lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml
index 48d6f32e45..33be7d1b57 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: dyck_languages_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_dyck_languages_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/elementary_math_qa.yaml b/lm_eval/tasks/bigbench/multiple_choice/elementary_math_qa.yaml
index 64cb58ff24..8f9dea9701 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/elementary_math_qa.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/elementary_math_qa.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: elementary_math_qa_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_elementary_math_qa_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml b/lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml
index 0604d97d83..4fc57aa269 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: emoji_movie_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_emoji_movie_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/emojis_emotion_prediction.yaml b/lm_eval/tasks/bigbench/multiple_choice/emojis_emotion_prediction.yaml
index ff648d9c8f..c117b3041e 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/emojis_emotion_prediction.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/emojis_emotion_prediction.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: emojis_emotion_prediction_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_emojis_emotion_prediction_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/empirical_judgments.yaml b/lm_eval/tasks/bigbench/multiple_choice/empirical_judgments.yaml
index c848740b2c..10fcfaaa41 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/empirical_judgments.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/empirical_judgments.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: empirical_judgments_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_empirical_judgments_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/english_proverbs.yaml b/lm_eval/tasks/bigbench/multiple_choice/english_proverbs.yaml
index 8adc12e96e..705eaa864b 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/english_proverbs.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/english_proverbs.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: english_proverbs_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_english_proverbs_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/english_russian_proverbs.yaml b/lm_eval/tasks/bigbench/multiple_choice/english_russian_proverbs.yaml
index ed26147aec..9510d14cd7 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/english_russian_proverbs.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/english_russian_proverbs.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: english_russian_proverbs_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_english_russian_proverbs_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity.yaml b/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity.yaml
index 24444e55d0..5e298a34b4 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: entailed_polarity_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_entailed_polarity_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity_hindi.yaml b/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity_hindi.yaml
index 32878c8ba9..c41565dd63 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity_hindi.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity_hindi.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: entailed_polarity_hindi_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_entailed_polarity_hindi_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/epistemic_reasoning.yaml b/lm_eval/tasks/bigbench/multiple_choice/epistemic_reasoning.yaml
index 2c35581af4..22fa9ed806 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/epistemic_reasoning.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/epistemic_reasoning.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: epistemic_reasoning_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_epistemic_reasoning_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/evaluating_information_essentiality.yaml b/lm_eval/tasks/bigbench/multiple_choice/evaluating_information_essentiality.yaml
index b85acd95ae..f421ea2f70 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/evaluating_information_essentiality.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/evaluating_information_essentiality.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: evaluating_information_essentiality_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_evaluating_information_essentiality_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/fact_checker.yaml b/lm_eval/tasks/bigbench/multiple_choice/fact_checker.yaml
index 4fbed8039d..c126ae2280 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/fact_checker.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/fact_checker.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: fact_checker_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_fact_checker_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/fantasy_reasoning.yaml b/lm_eval/tasks/bigbench/multiple_choice/fantasy_reasoning.yaml
index 68a55e4739..721e10d654 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/fantasy_reasoning.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/fantasy_reasoning.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: fantasy_reasoning_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_fantasy_reasoning_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/few_shot_nlg.yaml b/lm_eval/tasks/bigbench/multiple_choice/few_shot_nlg.yaml
deleted file mode 100644
index 39fcd9cf49..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/few_shot_nlg.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: few_shot_nlg_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_few_shot_nlg_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/figure_of_speech_detection.yaml b/lm_eval/tasks/bigbench/multiple_choice/figure_of_speech_detection.yaml
index 68a83956eb..84a88054de 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/figure_of_speech_detection.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/figure_of_speech_detection.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: figure_of_speech_detection_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_figure_of_speech_detection_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/formal_fallacies_syllogisms_negation.yaml b/lm_eval/tasks/bigbench/multiple_choice/formal_fallacies_syllogisms_negation.yaml
index 7ff37fd7b3..38f9f9c9da 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/formal_fallacies_syllogisms_negation.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/formal_fallacies_syllogisms_negation.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: formal_fallacies_syllogisms_negation_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_formal_fallacies_syllogisms_negation_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/gem.yaml b/lm_eval/tasks/bigbench/multiple_choice/gem.yaml
deleted file mode 100644
index bf81e88006..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/gem.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: gem_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_gem_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/gender_inclusive_sentences_german.yaml b/lm_eval/tasks/bigbench/multiple_choice/gender_inclusive_sentences_german.yaml
deleted file mode 100644
index 39eee21af5..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/gender_inclusive_sentences_german.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: gender_inclusive_sentences_german_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_gender_inclusive_sentences_german_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/general_knowledge.yaml b/lm_eval/tasks/bigbench/multiple_choice/general_knowledge.yaml
index 8083b8698e..f1922e434b 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/general_knowledge.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/general_knowledge.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: general_knowledge_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_general_knowledge_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/geometric_shapes.yaml b/lm_eval/tasks/bigbench/multiple_choice/geometric_shapes.yaml
index 7b80acbf1d..289969bdb8 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/geometric_shapes.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/geometric_shapes.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: geometric_shapes_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_geometric_shapes_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/goal_step_wikihow.yaml b/lm_eval/tasks/bigbench/multiple_choice/goal_step_wikihow.yaml
index 6413fb0337..789f79cccb 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/goal_step_wikihow.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/goal_step_wikihow.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: goal_step_wikihow_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_goal_step_wikihow_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/gre_reading_comprehension.yaml b/lm_eval/tasks/bigbench/multiple_choice/gre_reading_comprehension.yaml
index 53523c3321..6fd844f33e 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/gre_reading_comprehension.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/gre_reading_comprehension.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: gre_reading_comprehension_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_gre_reading_comprehension_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/hhh_alignment.yaml b/lm_eval/tasks/bigbench/multiple_choice/hhh_alignment.yaml
index c5e4f24aa7..aae1ecb429 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/hhh_alignment.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/hhh_alignment.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: hhh_alignment_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_hhh_alignment_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/hindi_question_answering.yaml b/lm_eval/tasks/bigbench/multiple_choice/hindi_question_answering.yaml
deleted file mode 100644
index ed1ed27862..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/hindi_question_answering.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: hindi_question_answering_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_hindi_question_answering_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/hindu_knowledge.yaml b/lm_eval/tasks/bigbench/multiple_choice/hindu_knowledge.yaml
index 321f751375..3733d45d8f 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/hindu_knowledge.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/hindu_knowledge.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: hindu_knowledge_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_hindu_knowledge_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/hinglish_toxicity.yaml b/lm_eval/tasks/bigbench/multiple_choice/hinglish_toxicity.yaml
index 5dac090fd4..0502dca382 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/hinglish_toxicity.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/hinglish_toxicity.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: hinglish_toxicity_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_hinglish_toxicity_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/human_organs_senses.yaml b/lm_eval/tasks/bigbench/multiple_choice/human_organs_senses.yaml
index 2fef6d9301..d95bbf9dbb 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/human_organs_senses.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/human_organs_senses.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: human_organs_senses_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_human_organs_senses_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/hyperbaton.yaml b/lm_eval/tasks/bigbench/multiple_choice/hyperbaton.yaml
index 34b3771018..9766a3a2e4 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/hyperbaton.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/hyperbaton.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: hyperbaton_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_hyperbaton_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/identify_math_theorems.yaml b/lm_eval/tasks/bigbench/multiple_choice/identify_math_theorems.yaml
index f716129d6d..00789ddba9 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/identify_math_theorems.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/identify_math_theorems.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: identify_math_theorems_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_identify_math_theorems_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/identify_odd_metaphor.yaml b/lm_eval/tasks/bigbench/multiple_choice/identify_odd_metaphor.yaml
index 93c4c24487..6a1ea57a50 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/identify_odd_metaphor.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/identify_odd_metaphor.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: identify_odd_metaphor_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_identify_odd_metaphor_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/implicatures.yaml b/lm_eval/tasks/bigbench/multiple_choice/implicatures.yaml
index 9a26fd55ce..9e71d8b50c 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/implicatures.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/implicatures.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: implicatures_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_implicatures_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/implicit_relations.yaml b/lm_eval/tasks/bigbench/multiple_choice/implicit_relations.yaml
index 9bb0844203..2fc417ba44 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/implicit_relations.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/implicit_relations.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: implicit_relations_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_implicit_relations_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/intent_recognition.yaml b/lm_eval/tasks/bigbench/multiple_choice/intent_recognition.yaml
index 720ac92ae4..0f1078dc81 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/intent_recognition.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/intent_recognition.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: intent_recognition_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_intent_recognition_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_nli.yaml b/lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_nli.yaml
index 89d7742d5e..1a6b0d52d5 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_nli.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_nli.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: international_phonetic_alphabet_nli_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_international_phonetic_alphabet_nli_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_transliterate.yaml b/lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_transliterate.yaml
deleted file mode 100644
index c8e866e2cc..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_transliterate.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: international_phonetic_alphabet_transliterate_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_international_phonetic_alphabet_transliterate_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/intersect_geometry.yaml b/lm_eval/tasks/bigbench/multiple_choice/intersect_geometry.yaml
index 6014a175f1..2477ad3bfb 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/intersect_geometry.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/intersect_geometry.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: intersect_geometry_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_intersect_geometry_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/irony_identification.yaml b/lm_eval/tasks/bigbench/multiple_choice/irony_identification.yaml
index a19ff99e55..447095ac24 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/irony_identification.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/irony_identification.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: irony_identification_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_irony_identification_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/kanji_ascii.yaml b/lm_eval/tasks/bigbench/multiple_choice/kanji_ascii.yaml
index a90a828609..97cc4aac61 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/kanji_ascii.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/kanji_ascii.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: kanji_ascii_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_kanji_ascii_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/kannada.yaml b/lm_eval/tasks/bigbench/multiple_choice/kannada.yaml
index 910cec477c..aebb585efe 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/kannada.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/kannada.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: kannada_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_kannada_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/key_value_maps.yaml b/lm_eval/tasks/bigbench/multiple_choice/key_value_maps.yaml
index 75a673c896..1644ed24cc 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/key_value_maps.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/key_value_maps.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: key_value_maps_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_key_value_maps_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/known_unknowns.yaml b/lm_eval/tasks/bigbench/multiple_choice/known_unknowns.yaml
index 1c5f629386..90012e6a3d 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/known_unknowns.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/known_unknowns.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: known_unknowns_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_known_unknowns_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/language_games.yaml b/lm_eval/tasks/bigbench/multiple_choice/language_games.yaml
deleted file mode 100644
index 07e2711b45..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/language_games.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: language_games_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_language_games_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/language_identification.yaml b/lm_eval/tasks/bigbench/multiple_choice/language_identification.yaml
index 9ea141fb04..5e27f25e4d 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/language_identification.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/language_identification.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: language_identification_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_language_identification_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/linguistic_mappings.yaml b/lm_eval/tasks/bigbench/multiple_choice/linguistic_mappings.yaml
deleted file mode 100644
index 50800d9deb..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/linguistic_mappings.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: linguistic_mappings_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_linguistic_mappings_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/linguistics_puzzles.yaml b/lm_eval/tasks/bigbench/multiple_choice/linguistics_puzzles.yaml
deleted file mode 100644
index e269cd04e9..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/linguistics_puzzles.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: linguistics_puzzles_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_linguistics_puzzles_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/list_functions.yaml b/lm_eval/tasks/bigbench/multiple_choice/list_functions.yaml
deleted file mode 100644
index 4f4f2ca117..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/list_functions.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: list_functions_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_list_functions_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/logic_grid_puzzle.yaml b/lm_eval/tasks/bigbench/multiple_choice/logic_grid_puzzle.yaml
index da6a018fa8..ea69d370bf 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/logic_grid_puzzle.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/logic_grid_puzzle.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: logic_grid_puzzle_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_logic_grid_puzzle_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/logical_args.yaml b/lm_eval/tasks/bigbench/multiple_choice/logical_args.yaml
index 84f55f6449..3bc8b59310 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/logical_args.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/logical_args.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: logical_args_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_logical_args_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/logical_deduction.yaml b/lm_eval/tasks/bigbench/multiple_choice/logical_deduction.yaml
index 592d2afa8b..2b41e9b256 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/logical_deduction.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/logical_deduction.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: logical_deduction_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_logical_deduction_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/logical_fallacy_detection.yaml b/lm_eval/tasks/bigbench/multiple_choice/logical_fallacy_detection.yaml
index 1c6411afc8..c7bbe8472e 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/logical_fallacy_detection.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/logical_fallacy_detection.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: logical_fallacy_detection_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_logical_fallacy_detection_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/logical_sequence.yaml b/lm_eval/tasks/bigbench/multiple_choice/logical_sequence.yaml
index 6567189438..e03574c113 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/logical_sequence.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/logical_sequence.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: logical_sequence_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_logical_sequence_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/mathematical_induction.yaml b/lm_eval/tasks/bigbench/multiple_choice/mathematical_induction.yaml
index 4ed0ad3c0d..b7bf8081e8 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/mathematical_induction.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/mathematical_induction.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: mathematical_induction_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_mathematical_induction_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/matrixshapes.yaml b/lm_eval/tasks/bigbench/multiple_choice/matrixshapes.yaml
deleted file mode 100644
index 9facf63967..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/matrixshapes.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: matrixshapes_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_matrixshapes_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/metaphor_boolean.yaml b/lm_eval/tasks/bigbench/multiple_choice/metaphor_boolean.yaml
index 7c476c4eb9..e2669ee075 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/metaphor_boolean.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/metaphor_boolean.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: metaphor_boolean_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_metaphor_boolean_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/metaphor_understanding.yaml b/lm_eval/tasks/bigbench/multiple_choice/metaphor_understanding.yaml
index 6661a54f7f..58dfee1ee1 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/metaphor_understanding.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/metaphor_understanding.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: metaphor_understanding_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_metaphor_understanding_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/minute_mysteries_qa.yaml b/lm_eval/tasks/bigbench/multiple_choice/minute_mysteries_qa.yaml
deleted file mode 100644
index 67109c8cbb..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/minute_mysteries_qa.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: minute_mysteries_qa_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_minute_mysteries_qa_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/misconceptions.yaml b/lm_eval/tasks/bigbench/multiple_choice/misconceptions.yaml
index 63d0fcda69..de7c546b1b 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/misconceptions.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/misconceptions.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: misconceptions_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_misconceptions_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/misconceptions_russian.yaml b/lm_eval/tasks/bigbench/multiple_choice/misconceptions_russian.yaml
index f9c5db38f8..139266f269 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/misconceptions_russian.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/misconceptions_russian.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: misconceptions_russian_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_misconceptions_russian_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/mnist_ascii.yaml b/lm_eval/tasks/bigbench/multiple_choice/mnist_ascii.yaml
index a1b091da92..d2808bfc3e 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/mnist_ascii.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/mnist_ascii.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: mnist_ascii_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_mnist_ascii_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/modified_arithmetic.yaml b/lm_eval/tasks/bigbench/multiple_choice/modified_arithmetic.yaml
deleted file mode 100644
index c8a2373588..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/modified_arithmetic.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: modified_arithmetic_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_modified_arithmetic_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/moral_permissibility.yaml b/lm_eval/tasks/bigbench/multiple_choice/moral_permissibility.yaml
index 3829555221..bdf202d1c8 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/moral_permissibility.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/moral_permissibility.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: moral_permissibility_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_moral_permissibility_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/movie_dialog_same_or_different.yaml b/lm_eval/tasks/bigbench/multiple_choice/movie_dialog_same_or_different.yaml
index 89b93d9d80..536e40e9a9 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/movie_dialog_same_or_different.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/movie_dialog_same_or_different.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: movie_dialog_same_or_different_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_movie_dialog_same_or_different_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/movie_recommendation.yaml b/lm_eval/tasks/bigbench/multiple_choice/movie_recommendation.yaml
index 7055028ee9..beded58696 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/movie_recommendation.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/movie_recommendation.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: movie_recommendation_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_movie_recommendation_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/mult_data_wrangling.yaml b/lm_eval/tasks/bigbench/multiple_choice/mult_data_wrangling.yaml
deleted file mode 100644
index 17b67bcc6d..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/mult_data_wrangling.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: mult_data_wrangling_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_mult_data_wrangling_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/multiemo.yaml b/lm_eval/tasks/bigbench/multiple_choice/multiemo.yaml
index 10ff48ea58..500cac065e 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/multiemo.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/multiemo.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: multiemo_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_multiemo_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/natural_instructions.yaml b/lm_eval/tasks/bigbench/multiple_choice/natural_instructions.yaml
deleted file mode 100644
index 4874dd155b..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/natural_instructions.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: natural_instructions_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_natural_instructions_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/navigate.yaml b/lm_eval/tasks/bigbench/multiple_choice/navigate.yaml
index e69f27904b..e1466c0695 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/navigate.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/navigate.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: navigate_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_navigate_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/nonsense_words_grammar.yaml b/lm_eval/tasks/bigbench/multiple_choice/nonsense_words_grammar.yaml
index 52d25bcacd..608b6e67ae 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/nonsense_words_grammar.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/nonsense_words_grammar.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: nonsense_words_grammar_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_nonsense_words_grammar_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/novel_concepts.yaml b/lm_eval/tasks/bigbench/multiple_choice/novel_concepts.yaml
index 3fc74aa9ce..cb2213a750 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/novel_concepts.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/novel_concepts.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: novel_concepts_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_novel_concepts_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/object_counting.yaml b/lm_eval/tasks/bigbench/multiple_choice/object_counting.yaml
deleted file mode 100644
index 277d843d7c..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/object_counting.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: object_counting_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_object_counting_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/odd_one_out.yaml b/lm_eval/tasks/bigbench/multiple_choice/odd_one_out.yaml
index aaa43e678e..30bbf63972 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/odd_one_out.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/odd_one_out.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: odd_one_out_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_odd_one_out_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/operators.yaml b/lm_eval/tasks/bigbench/multiple_choice/operators.yaml
deleted file mode 100644
index 951db6f99e..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/operators.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: operators_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_operators_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/paragraph_segmentation.yaml b/lm_eval/tasks/bigbench/multiple_choice/paragraph_segmentation.yaml
deleted file mode 100644
index 2cfc8283e8..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/paragraph_segmentation.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: paragraph_segmentation_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_paragraph_segmentation_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/parsinlu_qa.yaml b/lm_eval/tasks/bigbench/multiple_choice/parsinlu_qa.yaml
index 7a9b61fb16..20a880d8ff 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/parsinlu_qa.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/parsinlu_qa.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: parsinlu_qa_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_parsinlu_qa_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/parsinlu_reading_comprehension.yaml b/lm_eval/tasks/bigbench/multiple_choice/parsinlu_reading_comprehension.yaml
deleted file mode 100644
index 5fa0eccce9..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/parsinlu_reading_comprehension.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: parsinlu_reading_comprehension_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_parsinlu_reading_comprehension_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/penguins_in_a_table.yaml b/lm_eval/tasks/bigbench/multiple_choice/penguins_in_a_table.yaml
index de024e2e7f..c7b5cbb424 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/penguins_in_a_table.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/penguins_in_a_table.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: penguins_in_a_table_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_penguins_in_a_table_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/periodic_elements.yaml b/lm_eval/tasks/bigbench/multiple_choice/periodic_elements.yaml
index b7a644f9d7..6bd1314c9c 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/periodic_elements.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/periodic_elements.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: periodic_elements_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_periodic_elements_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/persian_idioms.yaml b/lm_eval/tasks/bigbench/multiple_choice/persian_idioms.yaml
index 6fa92ed3a8..9a45e47914 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/persian_idioms.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/persian_idioms.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: persian_idioms_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_persian_idioms_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/phrase_relatedness.yaml b/lm_eval/tasks/bigbench/multiple_choice/phrase_relatedness.yaml
index c797aec6e6..e81cb20651 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/phrase_relatedness.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/phrase_relatedness.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: phrase_relatedness_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_phrase_relatedness_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/physical_intuition.yaml b/lm_eval/tasks/bigbench/multiple_choice/physical_intuition.yaml
index 089376dd8e..fc54acaf05 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/physical_intuition.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/physical_intuition.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: physical_intuition_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_physical_intuition_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/physics.yaml b/lm_eval/tasks/bigbench/multiple_choice/physics.yaml
index bc06f79dff..d4c4ff4baf 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/physics.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/physics.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: physics_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_physics_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/physics_questions.yaml b/lm_eval/tasks/bigbench/multiple_choice/physics_questions.yaml
deleted file mode 100644
index 44646f146a..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/physics_questions.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: physics_questions_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_physics_questions_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/play_dialog_same_or_different.yaml b/lm_eval/tasks/bigbench/multiple_choice/play_dialog_same_or_different.yaml
index 85aac7f4b6..494c0949a7 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/play_dialog_same_or_different.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/play_dialog_same_or_different.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: play_dialog_same_or_different_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_play_dialog_same_or_different_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/polish_sequence_labeling.yaml b/lm_eval/tasks/bigbench/multiple_choice/polish_sequence_labeling.yaml
deleted file mode 100644
index d61345feb5..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/polish_sequence_labeling.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: polish_sequence_labeling_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_polish_sequence_labeling_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/presuppositions_as_nli.yaml b/lm_eval/tasks/bigbench/multiple_choice/presuppositions_as_nli.yaml
index 71a56aa805..5ca6d0f47a 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/presuppositions_as_nli.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/presuppositions_as_nli.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: presuppositions_as_nli_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_presuppositions_as_nli_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/qa_wikidata.yaml b/lm_eval/tasks/bigbench/multiple_choice/qa_wikidata.yaml
deleted file mode 100644
index 263d61ebe6..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/qa_wikidata.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: qa_wikidata_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_qa_wikidata_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/question_selection.yaml b/lm_eval/tasks/bigbench/multiple_choice/question_selection.yaml
index 3b3dd0d70e..4e2a1ef6bb 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/question_selection.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/question_selection.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: question_selection_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_question_selection_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/real_or_fake_text.yaml b/lm_eval/tasks/bigbench/multiple_choice/real_or_fake_text.yaml
index 8138791fff..2013e5b9c7 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/real_or_fake_text.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/real_or_fake_text.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: real_or_fake_text_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_real_or_fake_text_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/reasoning_about_colored_objects.yaml b/lm_eval/tasks/bigbench/multiple_choice/reasoning_about_colored_objects.yaml
index 3ab6d5e062..92ee379e71 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/reasoning_about_colored_objects.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/reasoning_about_colored_objects.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: reasoning_about_colored_objects_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_reasoning_about_colored_objects_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/repeat_copy_logic.yaml b/lm_eval/tasks/bigbench/multiple_choice/repeat_copy_logic.yaml
deleted file mode 100644
index 666aa49b06..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/repeat_copy_logic.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: repeat_copy_logic_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_repeat_copy_logic_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/rephrase.yaml b/lm_eval/tasks/bigbench/multiple_choice/rephrase.yaml
deleted file mode 100644
index 49e3cb4b8d..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/rephrase.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: rephrase_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_rephrase_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/riddle_sense.yaml b/lm_eval/tasks/bigbench/multiple_choice/riddle_sense.yaml
index 93434e2c6d..3a11b6d599 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/riddle_sense.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/riddle_sense.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: riddle_sense_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_riddle_sense_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/ruin_names.yaml b/lm_eval/tasks/bigbench/multiple_choice/ruin_names.yaml
index 32c38ba378..4b7cb5e731 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/ruin_names.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/ruin_names.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: ruin_names_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_ruin_names_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/salient_translation_error_detection.yaml b/lm_eval/tasks/bigbench/multiple_choice/salient_translation_error_detection.yaml
index d930e7419a..fd57656994 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/salient_translation_error_detection.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/salient_translation_error_detection.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: salient_translation_error_detection_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_salient_translation_error_detection_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/scientific_press_release.yaml b/lm_eval/tasks/bigbench/multiple_choice/scientific_press_release.yaml
deleted file mode 100644
index f23190e7ac..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/scientific_press_release.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: scientific_press_release_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_scientific_press_release_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_in_context_sparc.yaml b/lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_in_context_sparc.yaml
deleted file mode 100644
index 00574b2f53..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_in_context_sparc.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: semantic_parsing_in_context_sparc_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_semantic_parsing_in_context_sparc_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_spider.yaml b/lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_spider.yaml
deleted file mode 100644
index a988e54c51..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/semantic_parsing_spider.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: semantic_parsing_spider_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_semantic_parsing_spider_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/sentence_ambiguity.yaml b/lm_eval/tasks/bigbench/multiple_choice/sentence_ambiguity.yaml
index 4e4a18f1ad..07282c2882 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/sentence_ambiguity.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/sentence_ambiguity.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: sentence_ambiguity_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_sentence_ambiguity_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/similarities_abstraction.yaml b/lm_eval/tasks/bigbench/multiple_choice/similarities_abstraction.yaml
index 82b86d1b47..71408e96ee 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/similarities_abstraction.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/similarities_abstraction.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: similarities_abstraction_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_similarities_abstraction_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/simp_turing_concept.yaml b/lm_eval/tasks/bigbench/multiple_choice/simp_turing_concept.yaml
deleted file mode 100644
index 7b1849d5e5..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/simp_turing_concept.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: simp_turing_concept_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_simp_turing_concept_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json.yaml b/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json.yaml
deleted file mode 100644
index cd1b61b9b0..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: simple_arithmetic_json_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_simple_arithmetic_json_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_multiple_choice.yaml b/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_multiple_choice.yaml
deleted file mode 100644
index 4e63fce945..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_multiple_choice.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: simple_arithmetic_json_multiple_choice_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_simple_arithmetic_json_multiple_choice_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_subtasks.yaml b/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_subtasks.yaml
deleted file mode 100644
index 8688512bda..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_json_subtasks.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: simple_arithmetic_json_subtasks_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_simple_arithmetic_json_subtasks_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_multiple_targets_json.yaml b/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_multiple_targets_json.yaml
deleted file mode 100644
index 685ec17c1a..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/simple_arithmetic_multiple_targets_json.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: simple_arithmetic_multiple_targets_json_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_simple_arithmetic_multiple_targets_json_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/simple_ethical_questions.yaml b/lm_eval/tasks/bigbench/multiple_choice/simple_ethical_questions.yaml
index 0983381ba2..66db4664d1 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/simple_ethical_questions.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/simple_ethical_questions.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: simple_ethical_questions_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_simple_ethical_questions_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/simple_text_editing.yaml b/lm_eval/tasks/bigbench/multiple_choice/simple_text_editing.yaml
deleted file mode 100644
index 13b67888cd..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/simple_text_editing.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: simple_text_editing_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_simple_text_editing_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/snarks.yaml b/lm_eval/tasks/bigbench/multiple_choice/snarks.yaml
index 3e79f1ce10..7819aa4c17 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/snarks.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/snarks.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: snarks_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_snarks_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/social_iqa.yaml b/lm_eval/tasks/bigbench/multiple_choice/social_iqa.yaml
index a4da50c90c..8ec2dd1dd2 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/social_iqa.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/social_iqa.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: social_iqa_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_social_iqa_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/social_support.yaml b/lm_eval/tasks/bigbench/multiple_choice/social_support.yaml
index 1b3bd5936e..247f558a33 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/social_support.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/social_support.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: social_support_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_social_support_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/sports_understanding.yaml b/lm_eval/tasks/bigbench/multiple_choice/sports_understanding.yaml
index e5a123fc93..ae2ba852ee 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/sports_understanding.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/sports_understanding.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: sports_understanding_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_sports_understanding_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/strange_stories.yaml b/lm_eval/tasks/bigbench/multiple_choice/strange_stories.yaml
index 30877750e5..bed6b55f29 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/strange_stories.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/strange_stories.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: strange_stories_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_strange_stories_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/strategyqa.yaml b/lm_eval/tasks/bigbench/multiple_choice/strategyqa.yaml
index f988071bad..f1d6ae3b2e 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/strategyqa.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/strategyqa.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: strategyqa_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_b_yaml
 task: bigbench_strategyqa_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/sufficient_information.yaml b/lm_eval/tasks/bigbench/multiple_choice/sufficient_information.yaml
deleted file mode 100644
index f53d677caa..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/sufficient_information.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: sufficient_information_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_sufficient_information_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/suicide_risk.yaml b/lm_eval/tasks/bigbench/multiple_choice/suicide_risk.yaml
index ecf7465ff2..138c2dff78 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/suicide_risk.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/suicide_risk.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: suicide_risk_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_suicide_risk_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/swahili_english_proverbs.yaml b/lm_eval/tasks/bigbench/multiple_choice/swahili_english_proverbs.yaml
index 40103274e9..46d66147c4 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/swahili_english_proverbs.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/swahili_english_proverbs.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: swahili_english_proverbs_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_swahili_english_proverbs_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/swedish_to_german_proverbs.yaml b/lm_eval/tasks/bigbench/multiple_choice/swedish_to_german_proverbs.yaml
index d2f31d3c03..a08c437e07 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/swedish_to_german_proverbs.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/swedish_to_german_proverbs.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: swedish_to_german_proverbs_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_swedish_to_german_proverbs_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/symbol_interpretation.yaml b/lm_eval/tasks/bigbench/multiple_choice/symbol_interpretation.yaml
index 98e3d5b369..1d519f3e72 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/symbol_interpretation.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/symbol_interpretation.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: symbol_interpretation_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_symbol_interpretation_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/temporal_sequences.yaml b/lm_eval/tasks/bigbench/multiple_choice/temporal_sequences.yaml
index abd8834b0f..046e4eeba4 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/temporal_sequences.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/temporal_sequences.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: temporal_sequences_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_temporal_sequences_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/tense.yaml b/lm_eval/tasks/bigbench/multiple_choice/tense.yaml
deleted file mode 100644
index 6a2676f087..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/tense.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: tense_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_tense_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/timedial.yaml b/lm_eval/tasks/bigbench/multiple_choice/timedial.yaml
index 350d4e786c..ea069173bd 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/timedial.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/timedial.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: timedial_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_timedial_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/topical_chat.yaml b/lm_eval/tasks/bigbench/multiple_choice/topical_chat.yaml
deleted file mode 100644
index b9a03639a2..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/topical_chat.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: topical_chat_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_topical_chat_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/tracking_shuffled_objects.yaml b/lm_eval/tasks/bigbench/multiple_choice/tracking_shuffled_objects.yaml
index f9aa366b7a..62ebc5d610 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/tracking_shuffled_objects.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/tracking_shuffled_objects.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: tracking_shuffled_objects_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_tracking_shuffled_objects_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/understanding_fables.yaml b/lm_eval/tasks/bigbench/multiple_choice/understanding_fables.yaml
index 263793af42..5cdd779d7b 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/understanding_fables.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/understanding_fables.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: understanding_fables_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_understanding_fables_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/undo_permutation.yaml b/lm_eval/tasks/bigbench/multiple_choice/undo_permutation.yaml
index f7e1feb052..bfe91a2b08 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/undo_permutation.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/undo_permutation.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: undo_permutation_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_undo_permutation_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/unit_conversion.yaml b/lm_eval/tasks/bigbench/multiple_choice/unit_conversion.yaml
index 21a67c437b..d1c50a6523 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/unit_conversion.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/unit_conversion.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: unit_conversion_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_unit_conversion_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/unit_interpretation.yaml b/lm_eval/tasks/bigbench/multiple_choice/unit_interpretation.yaml
index 68614cfddf..7d87db233a 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/unit_interpretation.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/unit_interpretation.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: unit_interpretation_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_unit_interpretation_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/unnatural_in_context_learning.yaml b/lm_eval/tasks/bigbench/multiple_choice/unnatural_in_context_learning.yaml
deleted file mode 100644
index 45943005c7..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/unnatural_in_context_learning.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: unnatural_in_context_learning_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_unnatural_in_context_learning_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/vitaminc_fact_verification.yaml b/lm_eval/tasks/bigbench/multiple_choice/vitaminc_fact_verification.yaml
index 84305bf33b..42db495738 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/vitaminc_fact_verification.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/vitaminc_fact_verification.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: vitaminc_fact_verification_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_vitaminc_fact_verification_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/what_is_the_tao.yaml b/lm_eval/tasks/bigbench/multiple_choice/what_is_the_tao.yaml
index 7879d1661e..8c4e15d3ad 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/what_is_the_tao.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/what_is_the_tao.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: what_is_the_tao_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_what_is_the_tao_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/which_wiki_edit.yaml b/lm_eval/tasks/bigbench/multiple_choice/which_wiki_edit.yaml
index 3dbfb0305e..a08b9b3efe 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/which_wiki_edit.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/which_wiki_edit.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: which_wiki_edit_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_which_wiki_edit_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/winowhy.yaml b/lm_eval/tasks/bigbench/multiple_choice/winowhy.yaml
index 98bc6e4b23..23ffc4bdd9 100644
--- a/lm_eval/tasks/bigbench/multiple_choice/winowhy.yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice/winowhy.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: winowhy_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_winowhy_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/word_sorting.yaml b/lm_eval/tasks/bigbench/multiple_choice/word_sorting.yaml
deleted file mode 100644
index 71e79ae363..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/word_sorting.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: word_sorting_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_word_sorting_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/word_unscrambling.yaml b/lm_eval/tasks/bigbench/multiple_choice/word_unscrambling.yaml
deleted file mode 100644
index bbfeb14458..0000000000
--- a/lm_eval/tasks/bigbench/multiple_choice/word_unscrambling.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Generated by utils.py
-dataset_name: word_unscrambling_zero_shot
-include: ../multiple_choice_template_yaml
-task: bigbench_word_unscrambling_multiple_choice

From 86098661f5b1b97cbc54cd1fe9987ec3671951c2 Mon Sep 17 00:00:00 2001
From: lintangsutawika <lintang@eleuther.ai>
Date: Tue, 9 Apr 2024 11:00:43 +0000
Subject: [PATCH 5/7] udpate

---
 lm_eval/tasks/bigbench/generate_tasks.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/lm_eval/tasks/bigbench/generate_tasks.py b/lm_eval/tasks/bigbench/generate_tasks.py
index 5ab36ce458..5e7923dd1e 100644
--- a/lm_eval/tasks/bigbench/generate_tasks.py
+++ b/lm_eval/tasks/bigbench/generate_tasks.py
@@ -1,4 +1,5 @@
 import os
+
 import datasets
 import yaml
 
@@ -178,6 +179,7 @@
     "simple_arithmetic_multiple_targets_json",
 ]
 
+
 def main() -> None:
     for path, task_type in zip(
         ["multiple_choice", "generate_until"],
@@ -192,13 +194,17 @@ def main() -> None:
                     print(f"Checking {task} for multiple choices")
                     if task in skip_tasks:
                         continue
-                    data = datasets.load_dataset("hails/bigbench", task+"_zero_shot")
-                    multiple_choice_targets = data['default'][0]["multiple_choice_targets"]
+                    data = datasets.load_dataset("hails/bigbench", task + "_zero_shot")
+                    multiple_choice_targets = data["default"][0][
+                        "multiple_choice_targets"
+                    ]
                     if len(multiple_choice_targets) == 0:
                         continue
                     else:
                         template_file = "multiple_choice_template_b_yaml"
-                        if set(data['default'][0]["targets"]) < set(multiple_choice_targets):
+                        if set(data["default"][0]["targets"]) < set(
+                            multiple_choice_targets
+                        ):
                             template_file = "multiple_choice_template_a_yaml"
 
                 with open(f"{path}/{file_name}", "w", encoding="utf-8") as f:

From 6adb5e69236ab948299b15a397f812eb62b9c42c Mon Sep 17 00:00:00 2001
From: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com>
Date: Fri, 24 May 2024 11:59:18 -0400
Subject: [PATCH 6/7] Update multiple_choice_template_b_yaml

---
 lm_eval/tasks/bigbench/multiple_choice_template_b_yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml b/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml
index 36b4eb921f..2900103e0d 100644
--- a/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml
@@ -12,4 +12,4 @@ metric_list:
   - metric: acc
   # TODO: brier score and other metrics
 metadata:
-  version: 0.0
+  version: 1.0

From 3de4ccf59146fb665e2479d49f50cf498df576fd Mon Sep 17 00:00:00 2001
From: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com>
Date: Fri, 24 May 2024 11:59:33 -0400
Subject: [PATCH 7/7] Update multiple_choice_template_a_yaml

---
 lm_eval/tasks/bigbench/multiple_choice_template_a_yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml b/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml
index 10fce5c1c3..4b5f9e8929 100644
--- a/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml
+++ b/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml
@@ -12,4 +12,4 @@ metric_list:
   - metric: acc
   # TODO: brier score and other metrics
 metadata:
-  version: 0.0
+  version: 1.0