-
Notifications
You must be signed in to change notification settings - Fork 119
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New mechanism for evaluation contributions (#47)
* new mechanism for eval contributions * added doc following nathan comments
- Loading branch information
1 parent
831ad47
commit 92e9b50
Showing
3 changed files
with
105 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# ruff: noqa: F405, F403, F401 | ||
""" | ||
Custom evaluation tasks for lighteval. Copy this file and complete it with the info for your task. | ||
This file generally create just a TASKS_TABLE and TASKS_GROUPS which are then imported by LightEval. | ||
Author: | ||
""" | ||
from lighteval.tasks.lighteval_task import LightevalTaskConfig | ||
from lighteval.tasks.requests import Doc | ||
from lighteval.tasks.tasks_prompt_formatting import LETTER_INDICES | ||
|
||
|
||
## EVAL WITH NO SUBSET ## | ||
# This is how you create a simple tasks (like hellaswag) which has one single subset | ||
# attached to it, and one evaluation possible. | ||
task = LightevalTaskConfig( | ||
name="myothertask", | ||
prompt_function="prompt_fn", # must be defined in the file or imported from src/lighteval/tasks/tasks_prompt_formatting.py | ||
suite=["community"], | ||
hf_repo="", | ||
hf_subset="default", | ||
hf_avail_splits=[], | ||
evaluation_splits=[], | ||
few_shots_split="", | ||
few_shots_select="", | ||
metric=[""], | ||
) | ||
|
||
## EVALS WITH SUBSET | ||
# This is how you create a subset task (like MMLU), which has several subset | ||
# each being its own evaluation task. | ||
|
||
# fmt: off | ||
SAMPLE_SUBSETS = [] # list of all the subsets to use for this eval | ||
# fmt: on | ||
|
||
|
||
class CustomSubsetTask(LightevalTaskConfig): | ||
def __init__( | ||
self, | ||
name, | ||
hf_subset, | ||
): | ||
super().__init__( | ||
name=name, | ||
hf_subset=hf_subset, | ||
prompt_function="prompt_fn", # must be defined in the file | ||
hf_repo="", | ||
metric=[""], | ||
hf_avail_splits=[], | ||
evaluation_splits=[], | ||
few_shots_split="", | ||
few_shots_select="", | ||
suite=["community"], | ||
generation_size=-1, | ||
stop_sequence=None, | ||
output_regex=None, | ||
frozen=False, | ||
) | ||
|
||
|
||
## DEFINE YOUR PROMPT FUNCTIONS | ||
# Define as many as you need for your different tasks | ||
def prompt_fn(line, task_name: str = None): | ||
"""Defines how to go from a dataset line to a doc object. | ||
Follow examples in src/lighteval/tasks/tasks_prompt_formatting.py, or get more info | ||
about what this function should do in the README. | ||
""" | ||
return Doc( | ||
task_name=task_name, | ||
query="", | ||
choices="", | ||
gold_index=0, | ||
instruction="", | ||
) | ||
|
||
|
||
## STORE YOUR EVALS | ||
SUBSET_TASKS = [CustomSubsetTask(name=f"mytask:{subset}", hf_subset=subset) for subset in SAMPLE_SUBSETS] | ||
_TASKS = SUBSET_TASKS + [task] | ||
|
||
## MODULE LOGIC | ||
# You should not need to touch this | ||
# Convert to dict for lighteval | ||
TASKS_TABLE = [task.as_dict() for task in _TASKS] | ||
|
||
if __name__ == "__main__": | ||
print(t["name"] for t in TASKS_TABLE) | ||
print(len(TASKS_TABLE)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters