Skip to content

Commit

Permalink
Bigbench fix (EleutherAI#1686)
Browse files Browse the repository at this point in the history
* edit process multiple-choice

* split template yaml

* remove

* modified multiple_choice tasks

* udpate

* Update multiple_choice_template_b_yaml

* Update multiple_choice_template_a_yaml

---------

Co-authored-by: Hailey Schoelkopf <[email protected]>
  • Loading branch information
2 people authored and djstrong committed Aug 2, 2024
1 parent ee44bf2 commit 83f9d66
Show file tree
Hide file tree
Showing 171 changed files with 160 additions and 317 deletions.
26 changes: 25 additions & 1 deletion lm_eval/tasks/bigbench/generate_tasks.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os

import datasets
import yaml


Expand Down Expand Up @@ -173,6 +174,11 @@
"word_unscrambling",
]

skip_tasks = [
"simple_arithmetic_json_multiple_choice",
"simple_arithmetic_multiple_targets_json",
]


def main() -> None:
for path, task_type in zip(
Expand All @@ -183,11 +189,29 @@ def main() -> None:
for task in all_subtasks:
file_name = f"{task}.yaml"
try:
template_file = task_type
if path == "multiple_choice":
print(f"Checking {task} for multiple choices")
if task in skip_tasks:
continue
data = datasets.load_dataset("hails/bigbench", task + "_zero_shot")
multiple_choice_targets = data["default"][0][
"multiple_choice_targets"
]
if len(multiple_choice_targets) == 0:
continue
else:
template_file = "multiple_choice_template_b_yaml"
if set(data["default"][0]["targets"]) < set(
multiple_choice_targets
):
template_file = "multiple_choice_template_a_yaml"

with open(f"{path}/{file_name}", "w", encoding="utf-8") as f:
f.write("# Generated by utils.py\n")
yaml.dump(
{
"include": f"../{task_type}",
"include": f"../{template_file}",
"task": "bigbench_"
+ task
+ "_{}".format(task_type.split("_template_yaml")[0]),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: abstract_narrative_understanding_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_abstract_narrative_understanding_multiple_choice
2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: anachronisms_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_anachronisms_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: analogical_similarity_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_analogical_similarity_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: analytic_entailment_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_analytic_entailment_multiple_choice
2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: arithmetic_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_arithmetic_multiple_choice

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: authorship_verification_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_authorship_verification_multiple_choice

This file was deleted.

4 changes: 0 additions & 4 deletions lm_eval/tasks/bigbench/multiple_choice/auto_debugging.yaml

This file was deleted.

2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: bbq_lite_json_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_bbq_lite_json_multiple_choice

This file was deleted.

4 changes: 0 additions & 4 deletions lm_eval/tasks/bigbench/multiple_choice/causal_judgement.yaml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: causal_judgment_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_causal_judgment_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: cause_and_effect_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_cause_and_effect_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: checkmate_in_one_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_checkmate_in_one_multiple_choice

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: cifar10_classification_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_cifar10_classification_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: code_line_description_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_code_line_description_multiple_choice
4 changes: 0 additions & 4 deletions lm_eval/tasks/bigbench/multiple_choice/codenames.yaml

This file was deleted.

2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/color.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: color_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_color_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: common_morpheme_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_common_morpheme_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: conceptual_combinations_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_conceptual_combinations_multiple_choice

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: contextual_parametric_knowledge_conflicts_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_contextual_parametric_knowledge_conflicts_multiple_choice
2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: crash_blossom_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_crash_blossom_multiple_choice
2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: crass_ai_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_crass_ai_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: cryobiology_spanish_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_cryobiology_spanish_multiple_choice
4 changes: 0 additions & 4 deletions lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml

This file was deleted.

2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: cs_algorithms_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_cs_algorithms_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: dark_humor_detection_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_dark_humor_detection_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: date_understanding_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_date_understanding_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: disambiguation_qa_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_disambiguation_qa_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: discourse_marker_prediction_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_discourse_marker_prediction_multiple_choice
4 changes: 0 additions & 4 deletions lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml

This file was deleted.

2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: dyck_languages_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_dyck_languages_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: elementary_math_qa_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_elementary_math_qa_multiple_choice
2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: emoji_movie_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_emoji_movie_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: emojis_emotion_prediction_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_emojis_emotion_prediction_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: empirical_judgments_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_empirical_judgments_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: english_proverbs_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_english_proverbs_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: english_russian_proverbs_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_english_russian_proverbs_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: entailed_polarity_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_entailed_polarity_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: entailed_polarity_hindi_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_entailed_polarity_hindi_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: epistemic_reasoning_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_epistemic_reasoning_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: evaluating_information_essentiality_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_evaluating_information_essentiality_multiple_choice
2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/fact_checker.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: fact_checker_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_fact_checker_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: fantasy_reasoning_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_fantasy_reasoning_multiple_choice
4 changes: 0 additions & 4 deletions lm_eval/tasks/bigbench/multiple_choice/few_shot_nlg.yaml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: figure_of_speech_detection_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_figure_of_speech_detection_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: formal_fallacies_syllogisms_negation_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_formal_fallacies_syllogisms_negation_multiple_choice
4 changes: 0 additions & 4 deletions lm_eval/tasks/bigbench/multiple_choice/gem.yaml

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: general_knowledge_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_general_knowledge_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: geometric_shapes_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_geometric_shapes_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: goal_step_wikihow_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_goal_step_wikihow_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: gre_reading_comprehension_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_gre_reading_comprehension_multiple_choice
2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/hhh_alignment.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: hhh_alignment_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_hhh_alignment_multiple_choice

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: hindu_knowledge_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_hindu_knowledge_multiple_choice
Loading

0 comments on commit 83f9d66

Please sign in to comment.