Skip to content

Commit

Permalink
Merge branch 'main' into add_swiss_legal_evals
Browse files Browse the repository at this point in the history
  • Loading branch information
JoelNiklaus authored Dec 17, 2024
2 parents 3746849 + 1b9e2c3 commit ddaadbf
Show file tree
Hide file tree
Showing 21 changed files with 164 additions and 178 deletions.
7 changes: 0 additions & 7 deletions community_tasks/_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,3 @@ def __init__(
sample_level_fn=lambda x: x, # how to compute score for one sample
corpus_level_fn=np.mean, # aggregation
)

# MODULE LOGIC
# You should not need to touch this
# Convert to dict for lighteval
if __name__ == "__main__":
print(t.name for t in TASKS_TABLE)
print(len(TASKS_TABLE))
8 changes: 0 additions & 8 deletions community_tasks/aimo_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,3 @@ def aimo_prompt(line, task_name: str = None):

# STORE YOUR EVALS
TASKS_TABLE = [task]


# MODULE LOGIC
# You should not need to touch this

if __name__ == "__main__":
print(t.name for t in TASKS_TABLE)
print(len(TASKS_TABLE))
4 changes: 0 additions & 4 deletions community_tasks/arabic_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -856,7 +856,3 @@ def __init__(
+ [toxigen_ar_task]
+ [sciq_ar_task]
)

if __name__ == "__main__":
print(t.name for t in TASKS_TABLE)
print(len(TASKS_TABLE))
8 changes: 0 additions & 8 deletions community_tasks/german_rag_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,11 +221,3 @@ def prompt_fn_context_question_match(line, task_name: str = None):

# STORE YOUR EVALS
TASKS_TABLE = [task1, task2, task3, task4]


# MODULE LOGIC
# You should not need to touch this

if __name__ == "__main__":
print(t.name for t in TASKS_TABLE)
print(len(TASKS_TABLE))
5 changes: 0 additions & 5 deletions community_tasks/oz_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,3 @@ def prompt_fn_oz_eval_task(line, task_name: str = None):

# STORE YOUR EVALS
TASKS_TABLE = [oz_eval_task]


if __name__ == "__main__":
print(t["name"] for t in TASKS_TABLE)
print(len(TASKS_TABLE))
4 changes: 0 additions & 4 deletions community_tasks/serbian_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,3 @@ def create_task_config(
mmlu_world_religions,
mmlu_all,
]

if __name__ == "__main__":
print(t.name for t in TASKS_TABLE)
print(len(TASKS_TABLE))
11 changes: 0 additions & 11 deletions docs/source/adding-a-custom-task.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -167,17 +167,6 @@ TASKS_TABLE = SUBSET_TASKS
# TASKS_TABLE = [task]
```

Finally, you need to add a module logic to convert your task to a dict for lighteval.

```python
# MODULE LOGIC
# You should not need to touch this
# Convert to dict for lighteval
if __name__ == "__main__":
print(t.name for t in TASKS_TABLE)
print(len(TASKS_TABLE))
```

Once your file is created you can then run the evaluation with the following command:

```bash
Expand Down
4 changes: 0 additions & 4 deletions examples/nanotron/custom_evaluation_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,3 @@ def __init__(
"all": ",".join(t[1] for t in _TASKS_STRINGS),
"early-signal": EARLY_SIGNAL_TASKS,
}

if __name__ == "__main__":
print(t["name"] for t in TASKS_TABLE)
print(len(TASKS_TABLE))
38 changes: 19 additions & 19 deletions src/lighteval/main_accelerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@
TOKEN = os.getenv("HF_TOKEN")
CACHE_DIR: str = os.getenv("HF_HOME", "/scratch")

HELP_PANNEL_NAME_1 = "Common Paramaters"
HELP_PANNEL_NAME_2 = "Logging Parameters"
HELP_PANNEL_NAME_3 = "Debug Paramaters"
HELP_PANNEL_NAME_4 = "Modeling Paramaters"
HELP_PANEL_NAME_1 = "Common Parameters"
HELP_PANEL_NAME_2 = "Logging Parameters"
HELP_PANEL_NAME_3 = "Debug Parameters"
HELP_PANEL_NAME_4 = "Modeling Parameters"


def accelerate( # noqa C901
Expand All @@ -50,51 +50,51 @@ def accelerate( # noqa C901
tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
# === Common parameters ===
use_chat_template: Annotated[
bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4)
bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANEL_NAME_4)
] = False,
system_prompt: Annotated[
Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4)
Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANEL_NAME_4)
] = None,
dataset_loading_processes: Annotated[
int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANNEL_NAME_1)
int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1)
] = 1,
custom_tasks: Annotated[
Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANNEL_NAME_1)
Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1)
] = None,
cache_dir: Annotated[
Optional[str], Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANNEL_NAME_1)
Optional[str], Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANEL_NAME_1)
] = None,
num_fewshot_seeds: Annotated[
int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANNEL_NAME_1)
int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
] = 1,
# === saving ===
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANNEL_NAME_2)
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
] = "results",
push_to_hub: Annotated[
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANNEL_NAME_2)
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
] = False,
push_to_tensorboard: Annotated[
bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANNEL_NAME_2)
bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANEL_NAME_2)
] = False,
public_run: Annotated[
bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANNEL_NAME_2)
bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANEL_NAME_2)
] = False,
results_org: Annotated[
Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANNEL_NAME_2)
Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANEL_NAME_2)
] = None,
save_details: Annotated[
bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANNEL_NAME_2)
bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2)
] = False,
# === debug ===
max_samples: Annotated[
Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANNEL_NAME_3)
Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3)
] = None,
override_batch_size: Annotated[
int, Option(help="Override batch size for evaluation.", rich_help_panel=HELP_PANNEL_NAME_3)
int, Option(help="Override batch size for evaluation.", rich_help_panel=HELP_PANEL_NAME_3)
] = -1,
job_id: Annotated[
int, Option(help="Optional job id for future refenrence.", rich_help_panel=HELP_PANNEL_NAME_3)
int, Option(help="Optional job id for future reference.", rich_help_panel=HELP_PANEL_NAME_3)
] = 0,
):
"""
Expand Down
18 changes: 9 additions & 9 deletions src/lighteval/main_baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,28 +30,28 @@

CACHE_DIR: str = os.getenv("HF_HOME", "/scratch")

HELP_PANNEL_NAME_1 = "Common Paramaters"
HELP_PANNEL_NAME_2 = "Logging Parameters"
HELP_PANNEL_NAME_3 = "Debug Paramaters"
HELP_PANNEL_NAME_4 = "Modeling Paramaters"
HELP_PANEL_NAME_1 = "Common Parameters"
HELP_PANEL_NAME_2 = "Logging Parameters"
HELP_PANEL_NAME_3 = "Debug Parameters"
HELP_PANEL_NAME_4 = "Modeling Parameters"


def baseline(
tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
cache_dir: Annotated[
str, Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANNEL_NAME_1)
str, Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANEL_NAME_1)
] = CACHE_DIR,
custom_tasks: Annotated[
Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANNEL_NAME_1)
Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1)
] = None,
dataset_loading_processes: Annotated[
int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANNEL_NAME_1)
int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1)
] = 1,
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANNEL_NAME_2)
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
] = "results",
max_samples: Annotated[
Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANNEL_NAME_3)
Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3)
] = None,
):
"""
Expand Down
Loading

0 comments on commit ddaadbf

Please sign in to comment.