Skip to content

Commit

Permalink
Merge branch 'main' into data_split_depending_on_eval_params
Browse files Browse the repository at this point in the history
  • Loading branch information
clefourrier committed Jul 9, 2024
2 parents 3995494 + 4651531 commit 8a1814b
Show file tree
Hide file tree
Showing 44 changed files with 23,919 additions and 2,213 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ repos:

- repo: https://github.com/charliermarsh/ruff-pre-commit
# Ruff version.
rev: 'v0.1.6'
rev: 'v0.2.2'
hooks:
- id: ruff
args: ['--fix']
Expand Down
108 changes: 74 additions & 34 deletions README.md

Large diffs are not rendered by default.

44 changes: 21 additions & 23 deletions community_tasks/_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,28 @@
from lighteval.tasks.tasks_prompt_formatting import LETTER_INDICES


# DEFINE YOUR PROMPT FUNCTIONS
# Define as many as you need for your different tasks
def prompt_fn(line, task_name: str = None):
"""Defines how to go from a dataset line to a doc object.
Follow examples in src/lighteval/tasks/tasks_prompt_formatting.py, or get more info
about what this function should do in the README.
"""
return Doc(
task_name=task_name,
query="",
choices="",
gold_index=0,
instruction="",
)


# EVAL WITH NO SUBSET ##
# This is how you create a simple tasks (like hellaswag) which has one single subset
# This is how you create a simple task (like hellaswag) which has one single subset
# attached to it, and one evaluation possible.
task = LightevalTaskConfig(
name="myothertask",
prompt_function="prompt_fn", # must be defined in the file or imported from src/lighteval/tasks/tasks_prompt_formatting.py
prompt_function=prompt_fn, # must be defined in the file or imported from src/lighteval/tasks/tasks_prompt_formatting.py
suite=["community"],
hf_repo="",
hf_subset="default",
Expand Down Expand Up @@ -73,7 +89,7 @@ def __init__(
super().__init__(
name=name,
hf_subset=hf_subset,
prompt_function="prompt_fn", # must be defined in the file
prompt_function=prompt_fn, # must be defined in the file or imported from src/lighteval/tasks/tasks_prompt_formatting.py
hf_repo="",
metric=[""],
hf_avail_splits=[],
Expand All @@ -88,25 +104,9 @@ def __init__(
)


# DEFINE YOUR PROMPT FUNCTIONS
# Define as many as you need for your different tasks
def prompt_fn(line, task_name: str = None):
"""Defines how to go from a dataset line to a doc object.
Follow examples in src/lighteval/tasks/tasks_prompt_formatting.py, or get more info
about what this function should do in the README.
"""
return Doc(
task_name=task_name,
query="",
choices="",
gold_index=0,
instruction="",
)


# STORE YOUR EVALS
SUBSET_TASKS = [CustomSubsetTask(name=f"mytask:{subset}", hf_subset=subset) for subset in SAMPLE_SUBSETS]
_TASKS = SUBSET_TASKS + [task]
TASKS_TABLE = SUBSET_TASKS + [task]


# CUSTOM METRIC IF NEEDED
Expand All @@ -124,8 +124,6 @@ def prompt_fn(line, task_name: str = None):
# MODULE LOGIC
# You should not need to touch this
# Convert to dict for lighteval
TASKS_TABLE = [task.as_dict() for task in _TASKS]

if __name__ == "__main__":
print(t["name"] for t in TASKS_TABLE)
print(t.name for t in TASKS_TABLE)
print(len(TASKS_TABLE))
65 changes: 65 additions & 0 deletions community_tasks/aimo_evals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# MIT License

# Copyright (c) 2024 The HuggingFace Team

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# ruff: noqa: F405, F403, F401
"""
Task to evaluate LLMs on the training set of the Kaggle AIMO competition: https://www.kaggle.com/competitions/ai-mathematical-olympiad-prize
"""

from lighteval.tasks.lighteval_task import LightevalTaskConfig
from lighteval.tasks.requests import Doc


def aimo_prompt(line, task_name: str = None):
return Doc(
task_name=task_name,
choices=[str(line["answer"])],
gold_index=0,
query=line["problem"],
)


task = LightevalTaskConfig(
name="aimo_progress_prize_1",
prompt_function=aimo_prompt,
suite=["community"],
hf_subset="",
hf_repo="lighteval/aimo_progress_prize_1",
hf_avail_splits=["train"],
evaluation_splits=["train"],
few_shots_split="train",
few_shots_select="sequential",
metric=["quasi_exact_match_math"],
generation_size=2048,
stop_sequence=None,
)

# STORE YOUR EVALS
TASKS_TABLE = [task]


# MODULE LOGIC
# You should not need to touch this

if __name__ == "__main__":
print(t.name for t in TASKS_TABLE)
print(len(TASKS_TABLE))
Loading

0 comments on commit 8a1814b

Please sign in to comment.