Bigbench fix (EleutherAI#1686)

* edit process multiple-choice * split template yaml * remove * modified multiple_choice tasks * udpate * Update multiple_choice_template_b_yaml * Update multiple_choice_template_a_yaml --------- Co-authored-by: Hailey Schoelkopf <[email protected]>
speakleash · Aug 2, 2024 · 83f9d66 · 83f9d66
1 parent ee44bf2
commit 83f9d66
Show file tree

Hide file tree

Showing 171 changed files with 160 additions and 317 deletions.
diff --git a/lm_eval/tasks/bigbench/generate_tasks.py b/lm_eval/tasks/bigbench/generate_tasks.py
@@ -1,5 +1,6 @@
 import os
 
+import datasets
 import yaml
 
 
@@ -173,6 +174,11 @@
     "word_unscrambling",
 ]
 
+skip_tasks = [
+    "simple_arithmetic_json_multiple_choice",
+    "simple_arithmetic_multiple_targets_json",
+]
+
 
 def main() -> None:
     for path, task_type in zip(
@@ -183,11 +189,29 @@ def main() -> None:
         for task in all_subtasks:
             file_name = f"{task}.yaml"
             try:
+                template_file = task_type
+                if path == "multiple_choice":
+                    print(f"Checking {task} for multiple choices")
+                    if task in skip_tasks:
+                        continue
+                    data = datasets.load_dataset("hails/bigbench", task + "_zero_shot")
+                    multiple_choice_targets = data["default"][0][
+                        "multiple_choice_targets"
+                    ]
+                    if len(multiple_choice_targets) == 0:
+                        continue
+                    else:
+                        template_file = "multiple_choice_template_b_yaml"
+                        if set(data["default"][0]["targets"]) < set(
+                            multiple_choice_targets
+                        ):
+                            template_file = "multiple_choice_template_a_yaml"
+
                 with open(f"{path}/{file_name}", "w", encoding="utf-8") as f:
                     f.write("# Generated by utils.py\n")
                     yaml.dump(
                         {
-                            "include": f"../{task_type}",
+                            "include": f"../{template_file}",
                             "task": "bigbench_"
                             + task
                             + "_{}".format(task_type.split("_template_yaml")[0]),

diff --git a/lm_eval/tasks/bigbench/multiple_choice/abstract_narrative_understanding.yaml b/lm_eval/tasks/bigbench/multiple_choice/abstract_narrative_understanding.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: abstract_narrative_understanding_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_abstract_narrative_understanding_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml b/lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: anachronisms_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_anachronisms_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/analogical_similarity.yaml b/lm_eval/tasks/bigbench/multiple_choice/analogical_similarity.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: analogical_similarity_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_analogical_similarity_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/analytic_entailment.yaml b/lm_eval/tasks/bigbench/multiple_choice/analytic_entailment.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: analytic_entailment_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_analytic_entailment_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml b/lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: arithmetic_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_arithmetic_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/ascii_word_recognition.yaml b/lm_eval/tasks/bigbench/multiple_choice/ascii_word_recognition.yaml
diff --git a/lm_eval/tasks/bigbench/multiple_choice/authorship_verification.yaml b/lm_eval/tasks/bigbench/multiple_choice/authorship_verification.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: authorship_verification_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_authorship_verification_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/auto_categorization.yaml b/lm_eval/tasks/bigbench/multiple_choice/auto_categorization.yaml
diff --git a/lm_eval/tasks/bigbench/multiple_choice/auto_debugging.yaml b/lm_eval/tasks/bigbench/multiple_choice/auto_debugging.yaml
diff --git a/lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml b/lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: bbq_lite_json_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_bbq_lite_json_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/bridging_anaphora_resolution_barqa.yaml b/lm_eval/tasks/bigbench/multiple_choice/bridging_anaphora_resolution_barqa.yaml
diff --git a/lm_eval/tasks/bigbench/multiple_choice/causal_judgement.yaml b/lm_eval/tasks/bigbench/multiple_choice/causal_judgement.yaml
diff --git a/lm_eval/tasks/bigbench/multiple_choice/causal_judgment.yaml b/lm_eval/tasks/bigbench/multiple_choice/causal_judgment.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: causal_judgment_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_causal_judgment_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/cause_and_effect.yaml b/lm_eval/tasks/bigbench/multiple_choice/cause_and_effect.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: cause_and_effect_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_cause_and_effect_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/checkmate_in_one.yaml b/lm_eval/tasks/bigbench/multiple_choice/checkmate_in_one.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: checkmate_in_one_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_checkmate_in_one_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/chess_state_tracking.yaml b/lm_eval/tasks/bigbench/multiple_choice/chess_state_tracking.yaml
diff --git a/lm_eval/tasks/bigbench/multiple_choice/chinese_remainder_theorem.yaml b/lm_eval/tasks/bigbench/multiple_choice/chinese_remainder_theorem.yaml
diff --git a/lm_eval/tasks/bigbench/multiple_choice/cifar10_classification.yaml b/lm_eval/tasks/bigbench/multiple_choice/cifar10_classification.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: cifar10_classification_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_cifar10_classification_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/code_line_description.yaml b/lm_eval/tasks/bigbench/multiple_choice/code_line_description.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: code_line_description_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_code_line_description_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/codenames.yaml b/lm_eval/tasks/bigbench/multiple_choice/codenames.yaml
diff --git a/lm_eval/tasks/bigbench/multiple_choice/color.yaml b/lm_eval/tasks/bigbench/multiple_choice/color.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: color_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_color_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml b/lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: common_morpheme_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_common_morpheme_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml b/lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: conceptual_combinations_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_conceptual_combinations_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/conlang_translation.yaml b/lm_eval/tasks/bigbench/multiple_choice/conlang_translation.yaml
diff --git a/lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml b/lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: contextual_parametric_knowledge_conflicts_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_contextual_parametric_knowledge_conflicts_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml b/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: crash_blossom_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_crash_blossom_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml b/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: crass_ai_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_crass_ai_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml b/lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: cryobiology_spanish_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_cryobiology_spanish_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml b/lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml
diff --git a/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml b/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: cs_algorithms_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_cs_algorithms_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml b/lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: dark_humor_detection_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_dark_humor_detection_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/date_understanding.yaml b/lm_eval/tasks/bigbench/multiple_choice/date_understanding.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: date_understanding_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_date_understanding_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml b/lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: disambiguation_qa_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_disambiguation_qa_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/discourse_marker_prediction.yaml b/lm_eval/tasks/bigbench/multiple_choice/discourse_marker_prediction.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: discourse_marker_prediction_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_discourse_marker_prediction_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml b/lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml
diff --git a/lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml b/lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: dyck_languages_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_dyck_languages_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/elementary_math_qa.yaml b/lm_eval/tasks/bigbench/multiple_choice/elementary_math_qa.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: elementary_math_qa_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_elementary_math_qa_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml b/lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: emoji_movie_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_emoji_movie_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/emojis_emotion_prediction.yaml b/lm_eval/tasks/bigbench/multiple_choice/emojis_emotion_prediction.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: emojis_emotion_prediction_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_emojis_emotion_prediction_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/empirical_judgments.yaml b/lm_eval/tasks/bigbench/multiple_choice/empirical_judgments.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: empirical_judgments_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_empirical_judgments_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/english_proverbs.yaml b/lm_eval/tasks/bigbench/multiple_choice/english_proverbs.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: english_proverbs_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_english_proverbs_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/english_russian_proverbs.yaml b/lm_eval/tasks/bigbench/multiple_choice/english_russian_proverbs.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: english_russian_proverbs_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_english_russian_proverbs_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity.yaml b/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: entailed_polarity_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_entailed_polarity_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity_hindi.yaml b/lm_eval/tasks/bigbench/multiple_choice/entailed_polarity_hindi.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: entailed_polarity_hindi_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_entailed_polarity_hindi_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/epistemic_reasoning.yaml b/lm_eval/tasks/bigbench/multiple_choice/epistemic_reasoning.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: epistemic_reasoning_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_epistemic_reasoning_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/evaluating_information_essentiality.yaml b/lm_eval/tasks/bigbench/multiple_choice/evaluating_information_essentiality.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: evaluating_information_essentiality_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_evaluating_information_essentiality_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/fact_checker.yaml b/lm_eval/tasks/bigbench/multiple_choice/fact_checker.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: fact_checker_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_fact_checker_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/fantasy_reasoning.yaml b/lm_eval/tasks/bigbench/multiple_choice/fantasy_reasoning.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: fantasy_reasoning_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_fantasy_reasoning_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/few_shot_nlg.yaml b/lm_eval/tasks/bigbench/multiple_choice/few_shot_nlg.yaml
diff --git a/lm_eval/tasks/bigbench/multiple_choice/figure_of_speech_detection.yaml b/lm_eval/tasks/bigbench/multiple_choice/figure_of_speech_detection.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: figure_of_speech_detection_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_figure_of_speech_detection_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/formal_fallacies_syllogisms_negation.yaml b/lm_eval/tasks/bigbench/multiple_choice/formal_fallacies_syllogisms_negation.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: formal_fallacies_syllogisms_negation_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_formal_fallacies_syllogisms_negation_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/gem.yaml b/lm_eval/tasks/bigbench/multiple_choice/gem.yaml
diff --git a/lm_eval/tasks/bigbench/multiple_choice/gender_inclusive_sentences_german.yaml b/lm_eval/tasks/bigbench/multiple_choice/gender_inclusive_sentences_german.yaml
diff --git a/lm_eval/tasks/bigbench/multiple_choice/general_knowledge.yaml b/lm_eval/tasks/bigbench/multiple_choice/general_knowledge.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: general_knowledge_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_general_knowledge_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/geometric_shapes.yaml b/lm_eval/tasks/bigbench/multiple_choice/geometric_shapes.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: geometric_shapes_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_geometric_shapes_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/goal_step_wikihow.yaml b/lm_eval/tasks/bigbench/multiple_choice/goal_step_wikihow.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: goal_step_wikihow_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_goal_step_wikihow_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/gre_reading_comprehension.yaml b/lm_eval/tasks/bigbench/multiple_choice/gre_reading_comprehension.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: gre_reading_comprehension_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_gre_reading_comprehension_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/hhh_alignment.yaml b/lm_eval/tasks/bigbench/multiple_choice/hhh_alignment.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: hhh_alignment_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_hhh_alignment_multiple_choice
diff --git a/lm_eval/tasks/bigbench/multiple_choice/hindi_question_answering.yaml b/lm_eval/tasks/bigbench/multiple_choice/hindi_question_answering.yaml
diff --git a/lm_eval/tasks/bigbench/multiple_choice/hindu_knowledge.yaml b/lm_eval/tasks/bigbench/multiple_choice/hindu_knowledge.yaml
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: hindu_knowledge_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_hindu_knowledge_multiple_choice