Skip to content

Commit

Permalink
Add GPQA (#42)
Browse files Browse the repository at this point in the history
---------

Co-authored-by: Nathan Habib <[email protected]>
  • Loading branch information
clefourrier and NathanHB authored Feb 22, 2024
1 parent 81fc8fd commit 831ad47
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/lighteval/tasks/lighteval_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def fewshot_docs(self) -> list[Doc]:
self._fewshot_docs = []

# If we have no available few shot split, the few shot data is the eval data!
if self.fewshot_split is None:
if self.fewshot_split in [None, [None]]:
self._fewshot_docs = self._get_docs_from_split(self.evaluation_split, few_shots=True)
else: # Normal case
self._fewshot_docs = self._get_docs_from_split(self.fewshot_split, few_shots=True)
Expand Down
20 changes: 20 additions & 0 deletions src/lighteval/tasks/tasks_prompt_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,26 @@ def ethics_virtue(line, task_name: str = None):
)


def gpqa(line, task_name: str = None):
gold_index = random.randint(0, 3)
choices = [line["Incorrect Answer 1"], line["Incorrect Answer 2"], line["Incorrect Answer 3"]]
choices.insert(gold_index, line["Correct Answer"])

instruction = "Select the correct answer to the following questions.\n\n"

query = f"Question: {line['Question']}\n"
query += "".join([f"{key}. {choice}\n" for key, choice in zip(LETTER_INDICES, choices)])
query += "Answer: "

return Doc(
task_name=task_name,
query=f"{instruction}{query}",
choices=LETTER_INDICES[: len(choices)],
gold_index=gold_index,
instruction=instruction,
)


def gsm8k(line, task_name: str = None):
# Has special analysis in metric for number decomposiition
return Doc(
Expand Down
1 change: 1 addition & 0 deletions src/lighteval/tasks/tasks_table.jsonl
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@
{"name":"glue:stsb","suite":["lighteval","glue"],"prompt_function":"stsb","hf_repo":"glue","hf_subset":"stsb","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"glue:wnli","suite":["lighteval","glue"],"prompt_function":"wnli","hf_repo":"glue","hf_subset":"wnli","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"goal_step_wikihow","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"goal_step_wikihow","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"gpqa","suite":["lighteval"],"prompt_function":"gpqa","hf_repo":"Idavidrein/gpqa","hf_subset":"gpqa_main","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"gre_reading_comprehension","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"gre_reading_comprehension","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"gsm8k","suite":["lighteval"],"prompt_function":"gsm8k","hf_repo":"gsm8k","hf_subset":"main","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling_from_train","generation_size":256,"metric":["quasi_exact_match_gsm8k"],"stop_sequence":[":","Question:", "Question"],"output_regex":null,"frozen":false}
{"name":"headqa:en","suite":["lighteval","headqa"],"prompt_function":"headqa","hf_repo":"lighteval/headqa_harness","hf_subset":"en","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
Expand Down

0 comments on commit 831ad47

Please sign in to comment.