Merge branch 'main' into data_split_depending_on_eval_params

huggingface · Jul 9, 2024 · 8a1814b · 8a1814b
2 parents 3995494 + 4651531
commit 8a1814b
Show file tree

Hide file tree

Showing 44 changed files with 23,919 additions and 2,213 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -34,7 +34,7 @@ repos:
 
   - repo: https://github.com/charliermarsh/ruff-pre-commit
     # Ruff version.
-    rev: 'v0.1.6'
+    rev: 'v0.2.2'
     hooks:
       - id: ruff
         args: ['--fix']

diff --git a/README.md b/README.md
diff --git a/community_tasks/_template.py b/community_tasks/_template.py
@@ -39,12 +39,28 @@
 from lighteval.tasks.tasks_prompt_formatting import LETTER_INDICES
 
 
+# DEFINE YOUR PROMPT FUNCTIONS
+# Define as many as you need for your different tasks
+def prompt_fn(line, task_name: str = None):
+    """Defines how to go from a dataset line to a doc object.
+    Follow examples in src/lighteval/tasks/tasks_prompt_formatting.py, or get more info
+    about what this function should do in the README.
+    """
+    return Doc(
+        task_name=task_name,
+        query="",
+        choices="",
+        gold_index=0,
+        instruction="",
+    )
+
+
 # EVAL WITH NO SUBSET ##
-# This is how you create a simple tasks (like hellaswag) which has one single subset
+# This is how you create a simple task (like hellaswag) which has one single subset
 # attached to it, and one evaluation possible.
 task = LightevalTaskConfig(
     name="myothertask",
-    prompt_function="prompt_fn",  # must be defined in the file or imported from src/lighteval/tasks/tasks_prompt_formatting.py
+    prompt_function=prompt_fn,  # must be defined in the file or imported from src/lighteval/tasks/tasks_prompt_formatting.py
     suite=["community"],
     hf_repo="",
     hf_subset="default",
@@ -73,7 +89,7 @@ def __init__(
         super().__init__(
             name=name,
             hf_subset=hf_subset,
-            prompt_function="prompt_fn",  # must be defined in the file
+            prompt_function=prompt_fn,  # must be defined in the file or imported from src/lighteval/tasks/tasks_prompt_formatting.py
             hf_repo="",
             metric=[""],
             hf_avail_splits=[],
@@ -88,25 +104,9 @@ def __init__(
         )
 
 
-# DEFINE YOUR PROMPT FUNCTIONS
-# Define as many as you need for your different tasks
-def prompt_fn(line, task_name: str = None):
-    """Defines how to go from a dataset line to a doc object.
-    Follow examples in src/lighteval/tasks/tasks_prompt_formatting.py, or get more info
-    about what this function should do in the README.
-    """
-    return Doc(
-        task_name=task_name,
-        query="",
-        choices="",
-        gold_index=0,
-        instruction="",
-    )
-
-
 # STORE YOUR EVALS
 SUBSET_TASKS = [CustomSubsetTask(name=f"mytask:{subset}", hf_subset=subset) for subset in SAMPLE_SUBSETS]
-_TASKS = SUBSET_TASKS + [task]
+TASKS_TABLE = SUBSET_TASKS + [task]
 
 
 # CUSTOM METRIC IF NEEDED
@@ -124,8 +124,6 @@ def prompt_fn(line, task_name: str = None):
 # MODULE LOGIC
 # You should not need to touch this
 # Convert to dict for lighteval
-TASKS_TABLE = [task.as_dict() for task in _TASKS]
-
 if __name__ == "__main__":
-    print(t["name"] for t in TASKS_TABLE)
+    print(t.name for t in TASKS_TABLE)
     print(len(TASKS_TABLE))
diff --git a/community_tasks/aimo_evals.py b/community_tasks/aimo_evals.py
@@ -0,0 +1,65 @@
+# MIT License
+
+# Copyright (c) 2024 The HuggingFace Team
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# ruff: noqa: F405, F403, F401
+"""
+Task to evaluate LLMs on the training set of the Kaggle AIMO competition: https://www.kaggle.com/competitions/ai-mathematical-olympiad-prize
+"""
+
+from lighteval.tasks.lighteval_task import LightevalTaskConfig
+from lighteval.tasks.requests import Doc
+
+
+def aimo_prompt(line, task_name: str = None):
+    return Doc(
+        task_name=task_name,
+        choices=[str(line["answer"])],
+        gold_index=0,
+        query=line["problem"],
+    )
+
+
+task = LightevalTaskConfig(
+    name="aimo_progress_prize_1",
+    prompt_function=aimo_prompt,
+    suite=["community"],
+    hf_subset="",
+    hf_repo="lighteval/aimo_progress_prize_1",
+    hf_avail_splits=["train"],
+    evaluation_splits=["train"],
+    few_shots_split="train",
+    few_shots_select="sequential",
+    metric=["quasi_exact_match_math"],
+    generation_size=2048,
+    stop_sequence=None,
+)
+
+# STORE YOUR EVALS
+TASKS_TABLE = [task]
+
+
+# MODULE LOGIC
+# You should not need to touch this
+
+if __name__ == "__main__":
+    print(t.name for t in TASKS_TABLE)
+    print(len(TASKS_TABLE))