From 4651531e4716911f9934b09d6b813fdbe18e7149 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Fourrier?= <22726840+clefourrier@users.noreply.github.com> Date: Tue, 9 Jul 2024 15:29:00 +0200 Subject: [PATCH] Now only uses functions for prompt definition (#213) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add function prompt assigment * add json casting * fix ruff setting + fmt * replaced json tasks by python tasks, step 1 * wip * simplification part 1 * fix extended tasks + typo * fix * fix nanotron example * small fix * now use function, not string, to pass prompts in examples * moved everyone to function calling * LightevalTask now only takes functions * removed templated type which messed up the test suite * last fix + doc udpate * Update src/lighteval/tasks/registry.py Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com> --------- Co-authored-by: Hynek Kydlíček Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- README.md | 12 +- community_tasks/_template.py | 36 +- community_tasks/aimo_evals.py | 21 +- community_tasks/arabic_evals.py | 232 +- community_tasks/german_rag_evals.py | 140 +- examples/nanotron/custom_evaluation_tasks.py | 341 +-- examples/nanotron/custom_task.py | 107 +- src/lighteval/logging/evaluation_tracker.py | 2 + src/lighteval/tasks/default_tasks.py | 2471 +++++++++-------- src/lighteval/tasks/extended/ifeval/main.py | 26 +- src/lighteval/tasks/extended/mt_bench/main.py | 42 +- .../tasks/extended/tiny_benchmarks/main.py | 13 +- src/lighteval/tasks/lighteval_task.py | 36 +- 14 files changed, 1730 insertions(+), 1751 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 441ff70a..0551f915 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,7 +34,7 @@ repos: - repo: https://github.com/charliermarsh/ruff-pre-commit # Ruff version. - rev: 'v0.1.6' + rev: 'v0.2.2' hooks: - id: ruff args: ['--fix'] diff --git a/README.md b/README.md index 8c6f1063..10364fe4 100644 --- a/README.md +++ b/README.md @@ -139,7 +139,7 @@ accelerate launch --multi_gpu --num_processes= run_evals_accelerate.py --output_dir output_dir ``` -You can find the template of the expected model configuration in [examples/model_configs/base_model.yaml_](./examples/model_configs/base_model.yaml). +You can find the template of the expected model configuration in [examples/model_configs/base_model.yaml_](./examples/model_configs/base_model.yaml). ### Evaluating a large model with pipeline parallelism @@ -197,7 +197,7 @@ There are two types of configuration files that can be provided for running on t 1. [endpoint_model.yaml](./examples/model_configs/endpoint_model.yaml): This configuration allows you to launch the model using [HuggingFace's Inference Endpoints](https://huggingface.co/inference-endpoints/dedicated). You can specify in the configuration file all the relevant parameters, and then `lighteval` will automatically deploy the endpoint, run the evaluation, and finally delete the endpoint (unless you specify an endpoint that was already launched, in which case the endpoint won't be deleted afterwards). -2. [tgi_model.yaml](./examples/model_configs/tgi_model.yaml): This configuration lets you specify the URL of a model running in a TGI container, such as one deployed on HuggingFace's serverless inference. +2. [tgi_model.yaml](./examples/model_configs/tgi_model.yaml): This configuration lets you specify the URL of a model running in a TGI container, such as one deployed on HuggingFace's serverless inference. Templates for these configurations can be found in [examples/model_configs](./examples/model_configs/). @@ -266,7 +266,7 @@ However, we are very grateful to the Harness and HELM teams for their continued - [logging](https://github.com/huggingface/lighteval/tree/main/src/lighteval/logging): Our loggers, to display experiment information and push it to the hub after a run - [metrics](https://github.com/huggingface/lighteval/tree/main/src/lighteval/metrics): All the available metrics you can use. They are described in metrics, and divided between sample metrics (applied at the sample level, such as prediction accuracy) and corpus metrics (applied over the whole corpus). You'll also find available normalisation functions. - [models](https://github.com/huggingface/lighteval/tree/main/src/lighteval/models): Possible models to use. We cover transformers (base_model), with adapter or delta weights, as well as TGI models locally deployed (it's likely the code here is out of date though), and brrr/nanotron models. - - [tasks](https://github.com/huggingface/lighteval/tree/main/src/lighteval/tasks): Available tasks. The complete list is in `tasks_table.jsonl`, and you'll find all the prompts in `tasks_prompt_formatting.py`. Popular tasks requiring custom logic are exceptionally added in the [extended tasks](https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/extended). + - [tasks](https://github.com/huggingface/lighteval/tree/main/src/lighteval/tasks): Available tasks. The complete list is in `default_tasks.py`, and you'll find all the prompts in `tasks_prompt_formatting.py`. Popular tasks requiring custom logic are exceptionally added in the [extended tasks](https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/extended). - [examples/tasks](https://github.com/huggingface/lighteval/tree/main/examples/tasks) contains a list of available tasks you can launch. We advise using tasks in the `recommended_set`, as it's possible that some of the other tasks need double checking. - [tests](https://github.com/huggingface/lighteval/tree/main/tests) contains our test suite, which we run at each PR to prevent regressions in metrics/prompts/tasks, for a subset of important tasks. @@ -285,10 +285,10 @@ A popular community evaluation can move to become an extended or core evaluation #### Core evaluations Prompt function: **find a suitable prompt function** in `src.lighteval.tasks.task_prompt_formatting.py`, or code your own. This function must output a `Doc` object, which should contain the `query`, your prompt, and either `gold`, the gold output, or `choices` and `gold_index`, the list of choices and index or indices of correct answers. If your query contains an instruction that should not be repeated in a few shot setup, add it to an `instruction` field. -Summary: create a **line summary** of your evaluation, in `src/lighteval/tasks/tasks_table.jsonl`. This summary should contain the following fields: +Summary: create a `LightevalTaskConfig` summary of your evaluation, in `src/lighteval/tasks/default_tasks.py`. This summary should contain the following fields: - `name` (str), your evaluation name - `suite` (list), the suite(s) to which your evaluation should belong. This field allows us to compare different task implementations and is used as a task selection to differentiate the versions to launch. At the moment, you'll find the keywords ["helm", "bigbench", "original", "lighteval", "community", "custom"]; for core evals, please choose `lighteval`. -- `prompt_function` (str), the name of the prompt function you defined in the step above +- `prompt_function` (Callable), the prompt function you defined in the step above - `hf_repo` (str), the path to your evaluation dataset on the hub - `hf_subset` (str), the specific subset you want to use for your evaluation (note: when the dataset has no subset, fill this field with `"default"`, not with `None` or `""`) - `hf_avail_splits` (list), all the splits available for your dataset (train, valid or validation, test, other...) @@ -310,7 +310,7 @@ Summary: create a **line summary** of your evaluation, in `src/lighteval/tasks/t Make sure you can launch your model with your new task using `--tasks lighteval|yournewtask|2|0`. #### Community evaluations -Copy the `community_tasks/_template.yml` to `community_tasks/yourevalname.py` and edit it to add your custom tasks (the parameters you can use are explained above). It contains an interesting mechanism if the dataset you are adding contains a lot of subsets. +Copy the `community_tasks/_template.py` to `community_tasks/yourevalname.py` and edit it to add your custom tasks (the parameters you can use are explained above). It contains an interesting mechanism if the dataset you are adding contains a lot of subsets. Make sure you can launch your model with your new task using `--tasks community|yournewtask|2|0 --custom_tasks community_tasks/yourevalname.py`. diff --git a/community_tasks/_template.py b/community_tasks/_template.py index fe0d8e1d..5025f741 100644 --- a/community_tasks/_template.py +++ b/community_tasks/_template.py @@ -39,12 +39,28 @@ from lighteval.tasks.tasks_prompt_formatting import LETTER_INDICES +# DEFINE YOUR PROMPT FUNCTIONS +# Define as many as you need for your different tasks +def prompt_fn(line, task_name: str = None): + """Defines how to go from a dataset line to a doc object. + Follow examples in src/lighteval/tasks/tasks_prompt_formatting.py, or get more info + about what this function should do in the README. + """ + return Doc( + task_name=task_name, + query="", + choices="", + gold_index=0, + instruction="", + ) + + # EVAL WITH NO SUBSET ## # This is how you create a simple task (like hellaswag) which has one single subset # attached to it, and one evaluation possible. task = LightevalTaskConfig( name="myothertask", - prompt_function="prompt_fn", # must be defined in the file or imported from src/lighteval/tasks/tasks_prompt_formatting.py + prompt_function=prompt_fn, # must be defined in the file or imported from src/lighteval/tasks/tasks_prompt_formatting.py suite=["community"], hf_repo="", hf_subset="default", @@ -73,7 +89,7 @@ def __init__( super().__init__( name=name, hf_subset=hf_subset, - prompt_function="prompt_fn", # must be defined in the file + prompt_function=prompt_fn, # must be defined in the file or imported from src/lighteval/tasks/tasks_prompt_formatting.py hf_repo="", metric=[""], hf_avail_splits=[], @@ -88,22 +104,6 @@ def __init__( ) -# DEFINE YOUR PROMPT FUNCTIONS -# Define as many as you need for your different tasks -def prompt_fn(line, task_name: str = None): - """Defines how to go from a dataset line to a doc object. - Follow examples in src/lighteval/tasks/tasks_prompt_formatting.py, or get more info - about what this function should do in the README. - """ - return Doc( - task_name=task_name, - query="", - choices="", - gold_index=0, - instruction="", - ) - - # STORE YOUR EVALS SUBSET_TASKS = [CustomSubsetTask(name=f"mytask:{subset}", hf_subset=subset) for subset in SAMPLE_SUBSETS] TASKS_TABLE = SUBSET_TASKS + [task] diff --git a/community_tasks/aimo_evals.py b/community_tasks/aimo_evals.py index 5262a013..950becd5 100644 --- a/community_tasks/aimo_evals.py +++ b/community_tasks/aimo_evals.py @@ -29,9 +29,18 @@ from lighteval.tasks.requests import Doc +def aimo_prompt(line, task_name: str = None): + return Doc( + task_name=task_name, + choices=[str(line["answer"])], + gold_index=0, + query=line["problem"], + ) + + task = LightevalTaskConfig( name="aimo_progress_prize_1", - prompt_function="aimo_prompt", + prompt_function=aimo_prompt, suite=["community"], hf_subset="", hf_repo="lighteval/aimo_progress_prize_1", @@ -44,16 +53,6 @@ stop_sequence=None, ) - -def aimo_prompt(line, task_name: str = None): - return Doc( - task_name=task_name, - choices=[str(line["answer"])], - gold_index=0, - query=line["problem"], - ) - - # STORE YOUR EVALS TASKS_TABLE = [task] diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py index 495c95d9..b0aa15aa 100644 --- a/community_tasks/arabic_evals.py +++ b/community_tasks/arabic_evals.py @@ -53,6 +53,28 @@ # fmt: on +def mmlu_arabic(line, task_name: str = None): + topic = line["subject"] + instruction = f"الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح حول {topic.replace('_', ' ')}. \n\n" + choices = [line["A"], line["B"], line["C"], line["D"]] + # Answers are provided with roman letters - we look for the correct index in LETTER_INDICES, + # it will then be applied to arabic letters + gold_ix = LETTER_INDICES.index(line["answer"]) + + query = f"{instruction}{line['question']}\n" + query += "".join([f"{key}. {choice}\n" for key, choice in zip(LETTER_INDICES_AR[:4], choices)]) + query += "الإجابة:" + + return Doc( + task_name=task_name, + query=query, + choices=LETTER_INDICES_AR[:4], + gold_index=gold_ix, + instruction=instruction, + target_for_fewshot_sorting=LETTER_INDICES_AR[gold_ix], + ) + + class CustomArabicMMLUTask(LightevalTaskConfig): def __init__( self, @@ -62,7 +84,7 @@ def __init__( super().__init__( name=name, hf_subset=hf_subset, - prompt_function="mmlu_arabic", + prompt_function=mmlu_arabic, hf_repo="OALL/Arabic_MMLU", metric=["loglikelihood_acc_norm"], hf_avail_splits=["test", "dev"], @@ -83,29 +105,6 @@ def __init__( CustomArabicMMLUTask(name=f"arabic_mmlu:{subset}", hf_subset=subset) for subset in ARABIC_MMLU_SUBSETS ] - -def mmlu_arabic(line, task_name: str = None): - topic = line["subject"] - instruction = f"الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح حول {topic.replace('_', ' ')}. \n\n" - choices = [line["A"], line["B"], line["C"], line["D"]] - # Answers are provided with roman letters - we look for the correct index in LETTER_INDICES, - # it will then be applied to arabic letters - gold_ix = LETTER_INDICES.index(line["answer"]) - - query = f"{instruction}{line['question']}\n" - query += "".join([f"{key}. {choice}\n" for key, choice in zip(LETTER_INDICES_AR[:4], choices)]) - query += "الإجابة:" - - return Doc( - task_name=task_name, - query=query, - choices=LETTER_INDICES_AR[:4], - gold_index=gold_ix, - instruction=instruction, - target_for_fewshot_sorting=LETTER_INDICES_AR[gold_ix], - ) - - # ACVA ## # fmt: off ACVA_SUBSETS = [ @@ -121,6 +120,18 @@ def mmlu_arabic(line, task_name: str = None): # fmt: on +def acva(line, task_name: str = None): + question = line["question"] + answer = line["answer"] + + return Doc( + task_name=task_name, + query=f"السؤال: {question}\nالإجابة:", + choices=["صح", "خطأ"], + gold_index=["صح", "خطأ"].index(answer), + ) + + class CustomACVATask(LightevalTaskConfig): def __init__( self, @@ -130,7 +141,7 @@ def __init__( super().__init__( name=name, hf_subset=hf_subset, - prompt_function="acva", + prompt_function=acva, hf_repo="OALL/ACVA", metric=["loglikelihood_acc_norm"], hf_avail_splits=["test", "validation"], @@ -150,22 +161,33 @@ def __init__( ACVA_TASKS = [CustomACVATask(name=f"acva:{subset}", hf_subset=subset) for subset in ACVA_SUBSETS] -def acva(line, task_name: str = None): +def arabic_exams(line, task_name: str = None): + topic = line["subject"] question = line["question"] + choices = [line["A"], line["B"], line["C"], line["D"]] + choices_formatted = [f" {LETTER_INDICES_AR[i]}) {choice}\n" for i, choice in enumerate(choices)] answer = line["answer"] + answer_index = LETTER_INDICES.index(answer) + + instruction = f"الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح حول {topic.replace('_', ' ')}. \n\n" + query = f"{instruction}السؤال: {question}\n" + query += "\n".join(choices_formatted) + query += "\nالإجابة:" return Doc( task_name=task_name, - query=f"السؤال: {question}\nالإجابة:", - choices=["صح", "خطأ"], - gold_index=["صح", "خطأ"].index(answer), + query=query, + choices=LETTER_INDICES_AR[:4], + gold_index=answer_index, + instruction=instruction, + target_for_fewshot_sorting=choices[answer_index], ) # ARABIC EXAMS ## arabic_exams_task = LightevalTaskConfig( name="arabic_exams", - prompt_function="arabic_exams", + prompt_function=arabic_exams, suite=["community"], hf_repo="OALL/Arabic_EXAMS", hf_subset="default", @@ -179,39 +201,39 @@ def acva(line, task_name: str = None): ) -def arabic_exams(line, task_name: str = None): - topic = line["subject"] - question = line["question"] - choices = [line["A"], line["B"], line["C"], line["D"]] - choices_formatted = [f" {LETTER_INDICES_AR[i]}) {choice}\n" for i, choice in enumerate(choices)] - answer = line["answer"] - answer_index = LETTER_INDICES.index(answer) +# ALGHAFA NATIVE ## +# fmt: off +ALGHAFA_SUBSETS = [ + "mcq_exams_test_ar", "meta_ar_dialects", "meta_ar_msa", "multiple_choice_facts_truefalse_balanced_task", "multiple_choice_grounded_statement_soqal_task", + "multiple_choice_grounded_statement_xglue_mlqa_task", "multiple_choice_rating_sentiment_no_neutral_task", "multiple_choice_rating_sentiment_task", + "multiple_choice_sentiment_task" +] +# fmt: on - instruction = f"الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح حول {topic.replace('_', ' ')}. \n\n" + +def alghafa_prompt(line, task_name: str = None): + question = line["query"] + answer_index = int(line["label"]) + # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' + choices_keys = [key for key in line.keys() if key not in ["query", "label", "__few_shots"]] + choices = [line[key] for key in choices_keys] + + instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" query = f"{instruction}السؤال: {question}\n" - query += "\n".join(choices_formatted) - query += "\nالإجابة:" + for index, choice in enumerate(choices): + query += f"{index}) {choice}\n" + query += "الإجابة:" return Doc( task_name=task_name, query=query, - choices=LETTER_INDICES_AR[:4], + choices=choices, gold_index=answer_index, instruction=instruction, target_for_fewshot_sorting=choices[answer_index], ) -# ALGHAFA NATIVE ## -# fmt: off -ALGHAFA_SUBSETS = [ - "mcq_exams_test_ar", "meta_ar_dialects", "meta_ar_msa", "multiple_choice_facts_truefalse_balanced_task", "multiple_choice_grounded_statement_soqal_task", - "multiple_choice_grounded_statement_xglue_mlqa_task", "multiple_choice_rating_sentiment_no_neutral_task", "multiple_choice_rating_sentiment_task", - "multiple_choice_sentiment_task" -] -# fmt: on - - class CustomAlGhafaNativeTask(LightevalTaskConfig): def __init__( self, @@ -221,7 +243,7 @@ def __init__( super().__init__( name=name, hf_subset=hf_subset, - prompt_function="alghafa_prompt", + prompt_function=alghafa_prompt, hf_repo="OALL/AlGhafa-Arabic-LLM-Benchmark-Native", metric=["loglikelihood_acc_norm"], hf_avail_splits=["test", "validation"], @@ -239,35 +261,11 @@ def __init__( ALGHAFA_TASKS = [CustomAlGhafaNativeTask(name=f"alghafa:{subset}", hf_subset=subset) for subset in ALGHAFA_SUBSETS] - -def alghafa_prompt(line, task_name: str = None): - question = line["query"] - answer_index = int(line["label"]) - # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' - choices_keys = [key for key in line.keys() if key not in ["query", "label", "__few_shots"]] - choices = [line[key] for key in choices_keys] - - instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" - query = f"{instruction}السؤال: {question}\n" - for index, choice in enumerate(choices): - query += f"{index}) {choice}\n" - query += "الإجابة:" - - return Doc( - task_name=task_name, - query=query, - choices=choices, - gold_index=answer_index, - instruction=instruction, - target_for_fewshot_sorting=choices[answer_index], - ) - - # ALGHAFA TRANSLATED ## # race_ar race_ar_task = LightevalTaskConfig( name="race_ar", - prompt_function="alghafa_prompt", + prompt_function=alghafa_prompt, suite=["community"], hf_repo="OALL/AlGhafa-Arabic-LLM-Benchmark-Translated", hf_subset="race_ar", @@ -284,7 +282,7 @@ def alghafa_prompt(line, task_name: str = None): # piqa_ar piqa_ar_task = LightevalTaskConfig( name="piqa_ar", - prompt_function="alghafa_prompt", + prompt_function=alghafa_prompt, suite=["community"], hf_repo="OALL/AlGhafa-Arabic-LLM-Benchmark-Translated", hf_subset="piqa_ar", @@ -301,7 +299,7 @@ def alghafa_prompt(line, task_name: str = None): # arc_easy_ar arc_easy_ar_task = LightevalTaskConfig( name="arc_easy_ar", - prompt_function="alghafa_prompt", + prompt_function=alghafa_prompt, suite=["community"], hf_repo="OALL/AlGhafa-Arabic-LLM-Benchmark-Translated", hf_subset="arc_easy_ar", @@ -318,7 +316,7 @@ def alghafa_prompt(line, task_name: str = None): # arc_challenge_okapi_ar arc_challenge_okapi_ar_task = LightevalTaskConfig( name="arc_challenge_okapi_ar", - prompt_function="alghafa_prompt", + prompt_function=alghafa_prompt, suite=["community"], hf_repo="OALL/AlGhafa-Arabic-LLM-Benchmark-Translated", hf_subset="arc_challenge_okapi_ar", @@ -335,7 +333,7 @@ def alghafa_prompt(line, task_name: str = None): # mmlu_okapi_ar mmlu_okapi_ar_task = LightevalTaskConfig( name="mmlu_okapi_ar", - prompt_function="alghafa_prompt", + prompt_function=alghafa_prompt, suite=["community"], hf_repo="OALL/AlGhafa-Arabic-LLM-Benchmark-Translated", hf_subset="mmlu_okapi_ar", @@ -352,7 +350,7 @@ def alghafa_prompt(line, task_name: str = None): # openbook_qa_ext_ar openbook_qa_ext_ar_task = LightevalTaskConfig( name="openbook_qa_ext_ar", - prompt_function="alghafa_prompt", + prompt_function=alghafa_prompt, suite=["community"], hf_repo="OALL/AlGhafa-Arabic-LLM-Benchmark-Translated", hf_subset="openbook_qa_ext_ar", @@ -367,20 +365,6 @@ def alghafa_prompt(line, task_name: str = None): # boolq_ar -boolq_ar_task = LightevalTaskConfig( - name="boolq_ar", - prompt_function="boolq_prompt_arabic", - suite=["community"], - hf_repo="OALL/AlGhafa-Arabic-LLM-Benchmark-Translated", - hf_subset="boolq_ar", - hf_avail_splits=["test", "validation"], - evaluation_splits=["test"], - few_shots_split="validation", - few_shots_select="sequential", - metric=["loglikelihood_acc_norm"], - trust_dataset=True, - version=0, -) def boolq_prompt_arabic(line, task_name: str = None): @@ -406,13 +390,12 @@ def boolq_prompt_arabic(line, task_name: str = None): ) -# copa_ext_ar -copa_ext_ar_task = LightevalTaskConfig( - name="copa_ext_ar", - prompt_function="copa_prompt_arabic", +boolq_ar_task = LightevalTaskConfig( + name="boolq_ar", + prompt_function=boolq_prompt_arabic, suite=["community"], hf_repo="OALL/AlGhafa-Arabic-LLM-Benchmark-Translated", - hf_subset="copa_ext_ar", + hf_subset="boolq_ar", hf_avail_splits=["test", "validation"], evaluation_splits=["test"], few_shots_split="validation", @@ -423,6 +406,7 @@ def boolq_prompt_arabic(line, task_name: str = None): ) +# copa_ext_ar def copa_prompt_arabic(line, task_name: str = None): premise = line["premise"] choices = [line["choice1"], line["choice2"]] @@ -442,13 +426,12 @@ def copa_prompt_arabic(line, task_name: str = None): ) -# hellaswag_okapi_ar -hellaswag_okapi_ar_task = LightevalTaskConfig( - name="hellaswag_okapi_ar", - prompt_function="hellaswag_prompt_arabic", +copa_ext_ar_task = LightevalTaskConfig( + name="copa_ext_ar", + prompt_function=copa_prompt_arabic, suite=["community"], hf_repo="OALL/AlGhafa-Arabic-LLM-Benchmark-Translated", - hf_subset="hellaswag_okapi_ar", + hf_subset="copa_ext_ar", hf_avail_splits=["test", "validation"], evaluation_splits=["test"], few_shots_split="validation", @@ -459,6 +442,7 @@ def copa_prompt_arabic(line, task_name: str = None): ) +# hellaswag_okapi_ar def hellaswag_prompt_arabic(line, task_name: str = None): ctx = re.sub(r"\[.*?\]", "", line["ctx"]) # Remove latin words within brackets endings = [ @@ -487,13 +471,12 @@ def hellaswag_prompt_arabic(line, task_name: str = None): ) -# toxigen_ar -toxigen_ar_task = LightevalTaskConfig( - name="toxigen_ar", - prompt_function="toxigen_prompt_arabic", +hellaswag_okapi_ar_task = LightevalTaskConfig( + name="hellaswag_okapi_ar", + prompt_function=hellaswag_prompt_arabic, suite=["community"], hf_repo="OALL/AlGhafa-Arabic-LLM-Benchmark-Translated", - hf_subset="toxigen_ar", + hf_subset="hellaswag_okapi_ar", hf_avail_splits=["test", "validation"], evaluation_splits=["test"], few_shots_split="validation", @@ -504,6 +487,7 @@ def hellaswag_prompt_arabic(line, task_name: str = None): ) +# toxigen_ar def toxigen_prompt_arabic(line, task_name: str = None): text = line["text"] label = 1 if ((line["toxicity_ai"] + line["toxicity_human"]) > 5.5) else 0 @@ -525,13 +509,12 @@ def toxigen_prompt_arabic(line, task_name: str = None): ) -# sciq_ar -sciq_ar_task = LightevalTaskConfig( - name="sciq_ar", - prompt_function="sciq_prompt_arabic", +toxigen_ar_task = LightevalTaskConfig( + name="toxigen_ar", + prompt_function=toxigen_prompt_arabic, suite=["community"], hf_repo="OALL/AlGhafa-Arabic-LLM-Benchmark-Translated", - hf_subset="sciq_ar", + hf_subset="toxigen_ar", hf_avail_splits=["test", "validation"], evaluation_splits=["test"], few_shots_split="validation", @@ -542,6 +525,7 @@ def toxigen_prompt_arabic(line, task_name: str = None): ) +# sciq_ar def sciq_prompt_arabic(line, task_name: str = None): support = line["support"] question = line["question"] @@ -577,6 +561,22 @@ def sciq_prompt_arabic(line, task_name: str = None): ) +sciq_ar_task = LightevalTaskConfig( + name="sciq_ar", + prompt_function=sciq_prompt_arabic, + suite=["community"], + hf_repo="OALL/AlGhafa-Arabic-LLM-Benchmark-Translated", + hf_subset="sciq_ar", + hf_avail_splits=["test", "validation"], + evaluation_splits=["test"], + few_shots_split="validation", + few_shots_select="sequential", + metric=["loglikelihood_acc_norm"], + trust_dataset=True, + version=0, +) + + TASKS_TABLE = ( ARABIC_MMLU_TASKS + ACVA_TASKS diff --git a/community_tasks/german_rag_evals.py b/community_tasks/german_rag_evals.py index 0d2c76c0..82cdf2da 100644 --- a/community_tasks/german_rag_evals.py +++ b/community_tasks/german_rag_evals.py @@ -34,76 +34,6 @@ from lighteval.tasks.requests import Doc -# Task 1: Choose question by context. -# Given is a context and 4 questions. -# The task is to decide which question can be answered by the context. -task1 = LightevalTaskConfig( - name="german_rag_eval:choose_question_by_context", - prompt_function="prompt_fn_choose_question_by_context", - suite=["community"], - hf_repo="deutsche-telekom/Ger-RAG-eval", - hf_subset="task1", - hf_avail_splits=["test"], - evaluation_splits=["test"], - few_shots_split="test", - few_shots_select="sequential", - metric=["loglikelihood_acc"], - version=1, -) - -# Task 2: Choose context by question. -# Given is a question and 4 contexts. -# The task is to decide which context can answer the question. -task2 = LightevalTaskConfig( - name="german_rag_eval:choose_context_by_question", - prompt_function="prompt_fn_choose_context_by_question", - suite=["community"], - hf_repo="deutsche-telekom/Ger-RAG-eval", - hf_subset="task2", - hf_avail_splits=["test"], - evaluation_splits=["test"], - few_shots_split="test", - few_shots_select="sequential", - metric=["loglikelihood_acc"], - version=1, -) - - -# Task 3: Question-answer match. -# Given is a question and an answer. -# The task is to decide whether the answer actualy answers the question. -task3 = LightevalTaskConfig( - name="german_rag_eval:question_answer_match", - prompt_function="prompt_fn_question_answer_match", - suite=["community"], - hf_repo="deutsche-telekom/Ger-RAG-eval", - hf_subset="task3", - hf_avail_splits=["test"], - evaluation_splits=["test"], - few_shots_split="test", - few_shots_select="sequential", - metric=["loglikelihood_acc"], - version=1, -) - -# Task 4: Context-question match. -# Given is a context and a question. -# The task is to decide whether the question can be answered by the context or not. -task4 = LightevalTaskConfig( - name="german_rag_eval:context_question_match", - prompt_function="prompt_fn_context_question_match", - suite=["community"], - hf_repo="deutsche-telekom/Ger-RAG-eval", - hf_subset="task4", - hf_avail_splits=["test"], - evaluation_splits=["test"], - few_shots_split="test", - few_shots_select="sequential", - metric=["loglikelihood_acc"], - version=1, -) - - def prompt_fn_choose_question_by_context(line, task_name: str = None): instruction = "Welche der folgenden Fragen (A oder B oder C oder D) lässt sich anhand des Kontext beantworten?\n\n" query_template = """\ @@ -218,6 +148,76 @@ def prompt_fn_context_question_match(line, task_name: str = None): ) +# Task 1: Choose question by context. +# Given is a context and 4 questions. +# The task is to decide which question can be answered by the context. +task1 = LightevalTaskConfig( + name="german_rag_eval:choose_question_by_context", + prompt_function=prompt_fn_choose_question_by_context, + suite=["community"], + hf_repo="deutsche-telekom/Ger-RAG-eval", + hf_subset="task1", + hf_avail_splits=["test"], + evaluation_splits=["test"], + few_shots_split="test", + few_shots_select="sequential", + metric=["loglikelihood_acc"], + version=1, +) + +# Task 2: Choose context by question. +# Given is a question and 4 contexts. +# The task is to decide which context can answer the question. +task2 = LightevalTaskConfig( + name="german_rag_eval:choose_context_by_question", + prompt_function=prompt_fn_choose_context_by_question, + suite=["community"], + hf_repo="deutsche-telekom/Ger-RAG-eval", + hf_subset="task2", + hf_avail_splits=["test"], + evaluation_splits=["test"], + few_shots_split="test", + few_shots_select="sequential", + metric=["loglikelihood_acc"], + version=1, +) + + +# Task 3: Question-answer match. +# Given is a question and an answer. +# The task is to decide whether the answer actualy answers the question. +task3 = LightevalTaskConfig( + name="german_rag_eval:question_answer_match", + prompt_function=prompt_fn_question_answer_match, + suite=["community"], + hf_repo="deutsche-telekom/Ger-RAG-eval", + hf_subset="task3", + hf_avail_splits=["test"], + evaluation_splits=["test"], + few_shots_split="test", + few_shots_select="sequential", + metric=["loglikelihood_acc"], + version=1, +) + +# Task 4: Context-question match. +# Given is a context and a question. +# The task is to decide whether the question can be answered by the context or not. +task4 = LightevalTaskConfig( + name="german_rag_eval:context_question_match", + prompt_function=prompt_fn_context_question_match, + suite=["community"], + hf_repo="deutsche-telekom/Ger-RAG-eval", + hf_subset="task4", + hf_avail_splits=["test"], + evaluation_splits=["test"], + few_shots_split="test", + few_shots_select="sequential", + metric=["loglikelihood_acc"], + version=1, +) + + # STORE YOUR EVALS TASKS_TABLE = [task1, task2, task3, task4] diff --git a/examples/nanotron/custom_evaluation_tasks.py b/examples/nanotron/custom_evaluation_tasks.py index 62aa8dc4..3128335b 100644 --- a/examples/nanotron/custom_evaluation_tasks.py +++ b/examples/nanotron/custom_evaluation_tasks.py @@ -30,6 +30,7 @@ from dataclasses import asdict from typing import Dict, List, Tuple +import lighteval.tasks.tasks_prompt_formatting as prompt from lighteval.metrics import Metrics from lighteval.tasks.lighteval_task import LightevalTaskConfig from lighteval.tasks.requests import Doc @@ -39,11 +40,52 @@ _TASKS_STRINGS: List[Tuple[LightevalTaskConfig, str]] = [] _TASKS: List[LightevalTaskConfig] = [] + # COMMON_SENSE_REASONING_TASKS ## +def commonsense_qa_prompt(line, task_name: str = None): + return Doc( + task_name=task_name, + query=line["question"], + choices=[f" {c}" for c in line["choices"]["text"]], + gold_index=LETTER_INDICES.index(line["answerKey"].strip()), + instruction="", + ) + + +def siqa_prompt(line, task_name: str = None): + return Doc( + task_name=task_name, + query=line["context"] + " " + line["question"], + choices=[f" {c}" for c in [line["answerA"], line["answerB"], line["answerC"]]], + gold_index=int(line["label"]) - 1, + instruction="", + ) + + +def hellaswag_prompt(line, task_name: str = None): + def preprocess(text): + """Comes from AiHarness""" + # text = text.strip() + # NOTE: Brackets are artifacts of the WikiHow dataset portion of HellaSwag. + text = text.replace(" [title]", ". ") + text = re.sub("\\[.*?\\]", "", text) + text = text.replace(" ", " ") + return text + + ctx = f"{line['ctx_a']} {line['ctx_b'].capitalize()} " + return Doc( + task_name=task_name, + query=preprocess(line["activity_label"] + ": " + ctx), + choices=[" " + preprocess(ending) for ending in line["endings"]], + gold_index=int(line["label"]) if line["label"] != "" else -1, # -1 for test + # "metric": "choices_loglikelihood", + ) + + COMMON_SENSE_REASONING_TASKS = [ LightevalTaskConfig( name="hellaswag", - prompt_function="hellaswag_prompt", + prompt_function=hellaswag_prompt, hf_repo="hellaswag", hf_subset="default", metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], @@ -52,7 +94,7 @@ ), LightevalTaskConfig( name="winogrande", - prompt_function="winogrande", + prompt_function=prompt.winogrande, hf_repo="winogrande", hf_subset="winogrande_xl", metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], @@ -61,7 +103,7 @@ ), LightevalTaskConfig( name="piqa", - prompt_function="piqa_harness", + prompt_function=prompt.piqa_harness, hf_repo="piqa", hf_subset="plain_text", metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], @@ -70,7 +112,7 @@ ), LightevalTaskConfig( name="siqa", - prompt_function="siqa_prompt", + prompt_function=siqa_prompt, hf_repo="lighteval/siqa", hf_subset="default", hf_avail_splits=["train", "validation"], @@ -80,7 +122,7 @@ ), LightevalTaskConfig( name="openbookqa", - prompt_function="openbookqa", + prompt_function=prompt.openbookqa, hf_repo="openbookqa", hf_subset="main", metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], @@ -89,7 +131,7 @@ ), LightevalTaskConfig( name="arc:easy", - prompt_function="arc", + prompt_function=prompt.arc, hf_repo="ai2_arc", hf_subset="ARC-Easy", evaluation_splits=["test"], @@ -100,7 +142,7 @@ ), LightevalTaskConfig( name="arc:challenge", - prompt_function="arc", + prompt_function=prompt.arc, hf_repo="ai2_arc", hf_subset="ARC-Challenge", evaluation_splits=["test"], @@ -111,7 +153,7 @@ ), LightevalTaskConfig( name="commonsense_qa", - prompt_function="commonsense_qa_prompt", + prompt_function=commonsense_qa_prompt, hf_repo="commonsense_qa", hf_subset="default", metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], @@ -121,57 +163,27 @@ ] -def commonsense_qa_prompt(line, task_name: str = None): - return Doc( - task_name=task_name, - query=line["question"], - choices=[f" {c}" for c in line["choices"]["text"]], - gold_index=LETTER_INDICES.index(line["answerKey"].strip()), - instruction="", - ) +# 0 short for common sense +COMMON_SENSE_REASONING_STRING = [(t, f"custom|{t.name}|0|1") for t in COMMON_SENSE_REASONING_TASKS] +_TASKS_STRINGS.extend(COMMON_SENSE_REASONING_STRING) +_TASKS += COMMON_SENSE_REASONING_TASKS -def siqa_prompt(line, task_name: str = None): +# WORLD_KNOWLEDGE_TASKS ## +def natural_questions_prompt(line, task_name: str = None): return Doc( task_name=task_name, - query=line["context"] + " " + line["question"], - choices=[f" {c}" for c in [line["answerA"], line["answerB"], line["answerC"]]], - gold_index=int(line["label"]) - 1, + query=line["question"] + "?\nAnswer: ", + choices=[line["short_answers"]], + gold_index=0, instruction="", ) -def hellaswag_prompt(line, task_name: str = None): - def preprocess(text): - """Comes from AiHarness""" - # text = text.strip() - # NOTE: Brackets are artifacts of the WikiHow dataset portion of HellaSwag. - text = text.replace(" [title]", ". ") - text = re.sub("\\[.*?\\]", "", text) - text = text.replace(" ", " ") - return text - - ctx = f"{line['ctx_a']} {line['ctx_b'].capitalize()} " - return Doc( - task_name=task_name, - query=preprocess(line["activity_label"] + ": " + ctx), - choices=[" " + preprocess(ending) for ending in line["endings"]], - gold_index=int(line["label"]) if line["label"] != "" else -1, # -1 for test - # "metric": "choices_loglikelihood", - ) - - -# 0 short for common sense -COMMON_SENSE_REASONING_STRING = [(t, f"custom|{t.name}|0|1") for t in COMMON_SENSE_REASONING_TASKS] -_TASKS_STRINGS.extend(COMMON_SENSE_REASONING_STRING) -_TASKS += COMMON_SENSE_REASONING_TASKS - -# WORLD_KNOWLEDGE_TASKS ## - WORLD_KNOWLEDGE_TASKS = [ LightevalTaskConfig( name="trivia_qa", - prompt_function="triviaqa", + prompt_function=prompt.triviaqa, hf_repo="trivia_qa", hf_subset="rc.nocontext", metric=[Metrics.quasi_exact_match], @@ -181,7 +193,7 @@ def preprocess(text): ), LightevalTaskConfig( name="natural_questions", - prompt_function="natural_questions_prompt", + prompt_function=natural_questions_prompt, hf_repo="lighteval/natural_questions_clean", hf_subset="default", metric=[Metrics.quasi_exact_match], @@ -192,27 +204,26 @@ def preprocess(text): ] -def natural_questions_prompt(line, task_name: str = None): - return Doc( - task_name=task_name, - query=line["question"] + "?\nAnswer: ", - choices=[line["short_answers"]], - gold_index=0, - instruction="", - ) - - WORLD_KNOWLEDGE_STRING = [(t, f"custom|{t.name}|5|1") for t in WORLD_KNOWLEDGE_TASKS] # WORLD_KNOWLEDGE_STRING = {t: f'custom|{t.name}|0|1' for t in WORLD_KNOWLEDGE_TASKS} _TASKS_STRINGS.extend(WORLD_KNOWLEDGE_STRING) _TASKS += WORLD_KNOWLEDGE_TASKS + # Reading comprehension ## +def boolq_prompt(line, task_name: str = None): + return Doc( + task_name=task_name, + query=f"{line['passage']}\nQuestion: {line['question'].capitalize()}?\nAnswer:", + choices=[" No", " Yes"], # Only gold + gold_index=int(line["label"]), + ) + READING_COMP_TASKS = [ LightevalTaskConfig( name="super_glue:boolq", - prompt_function="boolq_prompt", + prompt_function=boolq_prompt, hf_repo="super_glue", hf_subset="boolq", metric=["target_perplexity"], @@ -221,7 +232,7 @@ def natural_questions_prompt(line, task_name: str = None): ), LightevalTaskConfig( name="quac", - prompt_function="quac", + prompt_function=prompt.quac, hf_repo="lighteval/quac_helm", hf_subset="deault", metric=[Metrics.quasi_exact_match], @@ -232,15 +243,6 @@ def natural_questions_prompt(line, task_name: str = None): ] -def boolq_prompt(line, task_name: str = None): - return Doc( - task_name=task_name, - query=f"{line['passage']}\nQuestion: {line['question'].capitalize()}?\nAnswer:", - choices=[" No", " Yes"], # Only gold - gold_index=int(line["label"]), - ) - - READING_COMP_STRING = [(t, f"custom|{t.name}|0|1") for t in READING_COMP_TASKS] _TASKS_STRINGS.extend(READING_COMP_STRING) _TASKS += READING_COMP_TASKS @@ -253,7 +255,7 @@ class CustomMathEvaluationTask(LightevalTaskConfig): def __init__( self, name, - prompt_function="math", + prompt_function=prompt.math, hf_repo="lighteval/MATH", hf_subset=None, metric=[Metrics.quasi_exact_match_math], @@ -298,7 +300,7 @@ def __init__( ] GSM8K = LightevalTaskConfig( name="gsm8k", - prompt_function="gsm8k", + prompt_function=prompt.gsm8k, hf_repo="gsm8k", hf_subset="main", hf_avail_splits=["train", "test"], @@ -317,11 +319,46 @@ def __init__( # MMLU ## +def mmlu_harness(line, task_name: str = None): + topic = line["subject"] + prompt = f"The following are multiple choice questions (with answers) about {topic.replace('_', ' ')}.\n\n" + prompt += line["question"] + "\n" + prompt += "".join([f"{key}. {choice}\n" for key, choice in zip(LETTER_INDICES, line["choices"])]) + prompt += "Answer:" + + gold_ix = LETTER_INDICES.index(line["answer"]) if isinstance(line["answer"], str) else line["answer"] + "__few_shots" in line and line["__few_shots"] is True # We are adding few shots + + return Doc( + task_name=task_name, + query=prompt, + choices=[" A", " B", " C", " D"], + target_for_fewshot_sorting=[" A", " B", " C", " D"][gold_ix], + gold_index=gold_ix, + instruction=f"The following are multiple choice questions (with answers) about {topic.replace('_', ' ')}.\n\n", + ) + + +def mmlu_prompt(line, task_name: str = None): + """MMLU prompt without letters""" + topic = line["subject"] + prompt = f"The following are questions about {topic.replace('_', ' ')}.\nQuestion: " + prompt += line["question"] + "\nAnswer:" + + return Doc( + task_name=task_name, + query=prompt, + choices=[f" {c}" for c in line["choices"]], + gold_index=line["answer"], + instruction=f"The following are questions about {topic.replace('_', ' ')}.\n", + ) + + class CustomMMLUEvaluationTask(LightevalTaskConfig): def __init__( self, name, - prompt_function="mmlu_prompt", + prompt_function=mmlu_prompt, hf_repo="lighteval/mmlu", hf_subset=None, # metric=[Metrics.loglikelihood_acc_single_token], @@ -419,54 +456,27 @@ def __init__( ] -def mmlu_harness(line, task_name: str = None): - topic = line["subject"] - prompt = f"The following are multiple choice questions (with answers) about {topic.replace('_', ' ')}.\n\n" - prompt += line["question"] + "\n" - prompt += "".join([f"{key}. {choice}\n" for key, choice in zip(LETTER_INDICES, line["choices"])]) - prompt += "Answer:" - - gold_ix = LETTER_INDICES.index(line["answer"]) if isinstance(line["answer"], str) else line["answer"] - "__few_shots" in line and line["__few_shots"] is True # We are adding few shots - - return Doc( - task_name=task_name, - query=prompt, - choices=[" A", " B", " C", " D"], - target_for_fewshot_sorting=[" A", " B", " C", " D"][gold_ix], - gold_index=gold_ix, - instruction=f"The following are multiple choice questions (with answers) about {topic.replace('_', ' ')}.\n\n", - ) - - -def mmlu_prompt(line, task_name: str = None): - """MMLU prompt without letters""" - topic = line["subject"] - prompt = f"The following are questions about {topic.replace('_', ' ')}.\nQuestion: " - prompt += line["question"] + "\nAnswer:" - - return Doc( - task_name=task_name, - query=prompt, - choices=[f" {c}" for c in line["choices"]], - gold_index=line["answer"], - instruction=f"The following are questions about {topic.replace('_', ' ')}.\n", - ) - - # MMLU_STRING = {t: f'custom|{t.name}|5|1' for t in MMLU_TASKS} MMLU_STRING = [(t, f"custom|{t.name}|0|1") for t in MMLU_TASKS] _TASKS_STRINGS.extend(MMLU_STRING) _TASKS += MMLU_TASKS + # BBH ## +def bbh_prompt(line, task_name: str = None): + return Doc( + task_name=task_name, + query=line["input"] + "\nAnswer: ", + choices=[line["target"]], + gold_index=0, + ) class CustomBBHEvaluationTask(LightevalTaskConfig): def __init__( self, name, - prompt_function="bbh_prompt", + prompt_function=bbh_prompt, hf_repo="lighteval/big_bench_hard", hf_subset=None, metric=[Metrics.exact_match], @@ -539,27 +549,69 @@ def __init__( ] -def bbh_prompt(line, task_name: str = None): +# BBH_STRING = {t: f'custom|{t.name}|3|1' for t in BBH_TASKS} +BBH_STRING = [(t, f"custom|{t.name}|0|1") for t in BBH_TASKS] +_TASKS_STRINGS.extend(BBH_STRING) +_TASKS += BBH_TASKS + + +# AGI eval ## + + +def agi_eval_math_prompt(line, task_name: str = None): return Doc( task_name=task_name, - query=line["input"] + "\nAnswer: ", - choices=[line["target"]], + query=line["question"], + choices=[line["answer"]], gold_index=0, + instruction="", ) -# BBH_STRING = {t: f'custom|{t.name}|3|1' for t in BBH_TASKS} -BBH_STRING = [(t, f"custom|{t.name}|0|1") for t in BBH_TASKS] -_TASKS_STRINGS.extend(BBH_STRING) -_TASKS += BBH_TASKS +def agi_eval_prompt(line, task_name: str = None): + cleaned_options = [o.replace("(", "").replace(")", " ") for o in line["options"]] + prompt = "The following are multiple choice questions (with answers).\n\n" + prompt += line["question"] + "\n" + "\n".join(cleaned_options) + "\n" + prompt += "Answer: " + + choices = LETTER_INDICES[: len(line["options"])] + + output = Doc( + query=prompt, + instruction="The following are multiple choice questions (with answers).\n\n", + ) + + if line["label"]: + output.choices = choices + output.gold_index = LETTER_INDICES.index(line["label"].strip()) + else: + output.choices = [line["answer"]] + output.gold_index = 0 + + return output + + +def agi_eval_prompt_no_letters(line, task_name: str = None): + cleaned_options = [ + " " + o.replace("(A)", "").replace("(B)", "").replace("(C)", "").replace("(D)", "").replace("(E)", "") + for o in line["options"] + ] + + output = Doc( + query=line["question"], + choices=cleaned_options, + gold_index=LETTER_INDICES.index(line["label"].strip()), + instruction="", + ) + + return output -# AGI eval ## class CustomAGIEvalEvaluationTask(LightevalTaskConfig): def __init__( self, name, - prompt_function="agi_eval_prompt_no_letters", + prompt_function=agi_eval_prompt_no_letters, hf_repo="lighteval/agi_eval_en", hf_subset=None, # metric=[Metrics.loglikelihood_acc_single_token], @@ -603,7 +655,7 @@ def __init__( CustomAGIEvalEvaluationTask( name="agi_eval:math", hf_subset="math", - prompt_function="agi_eval_math_prompt", + prompt_function=agi_eval_math_prompt, metric=[Metrics.exact_match, Metrics.quasi_exact_match], generation_size=40, ), @@ -612,55 +664,6 @@ def __init__( ] -def agi_eval_math_prompt(line, task_name: str = None): - return Doc( - task_name=task_name, - query=line["question"], - choices=[line["answer"]], - gold_index=0, - instruction="", - ) - - -def agi_eval_prompt(line, task_name: str = None): - cleaned_options = [o.replace("(", "").replace(")", " ") for o in line["options"]] - prompt = "The following are multiple choice questions (with answers).\n\n" - prompt += line["question"] + "\n" + "\n".join(cleaned_options) + "\n" - prompt += "Answer: " - - choices = LETTER_INDICES[: len(line["options"])] - - output = Doc( - query=prompt, - instruction="The following are multiple choice questions (with answers).\n\n", - ) - - if line["label"]: - output.choices = choices - output.gold_index = LETTER_INDICES.index(line["label"].strip()) - else: - output.choices = [line["answer"]] - output.gold_index = 0 - - return output - - -def agi_eval_prompt_no_letters(line, task_name: str = None): - cleaned_options = [ - " " + o.replace("(A)", "").replace("(B)", "").replace("(C)", "").replace("(D)", "").replace("(E)", "") - for o in line["options"] - ] - - output = Doc( - query=line["question"], - choices=cleaned_options, - gold_index=LETTER_INDICES.index(line["label"].strip()), - instruction="", - ) - - return output - - # AGIEVAL_STRING = {t: f'custom|{t.name}|5|1' for t in AGIEVAL_TASKS} AGIEVAL_STRING = [(t, f"custom|{t.name}|0|1") for t in AGIEVAL_TASKS] _TASKS_STRINGS.extend(AGIEVAL_STRING) @@ -670,7 +673,7 @@ def agi_eval_prompt_no_letters(line, task_name: str = None): # HUMAN EVAL ## # human_eval = LightevalTaskConfig( # name="human_eval", -# prompt_function="human_eval", +# prompt_function=prompt.human_eval", # hf_repo="lighteval/human_eval", # metric=["human_eval_pass_at_1"], # ), diff --git a/examples/nanotron/custom_task.py b/examples/nanotron/custom_task.py index 77f43c65..ccbae7b9 100644 --- a/examples/nanotron/custom_task.py +++ b/examples/nanotron/custom_task.py @@ -20,81 +20,84 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -TASKS_TABLE = [ - { - "name": "mmlu:anatomy", - "suite": ["custom"], - "prompt_function": "mmlu_anatomy", - "hf_repo": "lighteval/mmlu", - "hf_subset": "anatomy", - "hf_avail_splits": ["auxiliary_train", "test", "validation", "dev"], - "evaluation_splits": ["test"], - "few_shots_split": "dev", - "few_shots_select": "sequential", - "generation_size": 5, - "metric": ["loglikelihood_acc_single_token"], - "stop_sequence": ["\n"], - "output_regex": None, - "frozen": False, - }, - { - "name": "mmlu:anatomy_signs", - "suite": ["custom"], - "prompt_function": "mmlu_anatomy_signs", - "hf_repo": "lighteval/mmlu", - "hf_subset": "anatomy", - "hf_avail_splits": ["auxiliary_train", "test", "validation", "dev"], - "evaluation_splits": ["test"], - "few_shots_split": "dev", - "few_shots_select": "sequential", - "generation_size": 5, - "metric": ["loglikelihood_acc_single_token"], - "stop_sequence": ["\n"], - "output_regex": None, - "frozen": False, - }, -] - - -def mmlu_anatomy_signs(line): - return mmlu_signs(line, "anatomy") +from lighteval.tasks.lighteval_task import LightevalTaskConfig -def mmlu_anatomy(line): - return mmlu_numbers(line, "anatomy") - - -def mmlu_numbers(line, topic): +def mmlu_signs(line, topic): prompt = f"The following are multiple choice questions (with answers) about {topic.replace('_', ' ')}.\n\n" prompt += line["question"] + "\n" - prompt += "".join([f"{key}. {choice}\n" for key, choice in zip(["1", "2", "3", "4"], line["choices"])]) + prompt += "".join([f"{key}. {choice}\n" for key, choice in zip(["+", "*", "=", "#"], line["choices"])]) prompt += "Answer:" - gold_ix = ["1", "2", "3", "4"].index(line["answer"]) if isinstance(line["answer"], str) else line["answer"] + gold_ix = ["+", "*", "=", "#"].index(line["answer"]) if isinstance(line["answer"], str) else line["answer"] is_few_shots = line.get("__few_shots", False) # We are adding few shots return { "query": prompt, - "choices": [" 1", " 2", " 3", " 4"] if is_few_shots else ["1", "2", "3", "4"], - "target_for_fewshot_sorting": [" 1", " 2", " 3", " 4"][gold_ix], + "choices": [" +", " *", " =", " #"] if is_few_shots else ["+", "*", "=", "#"], + "target_for_fewshot_sorting": [" +", " *", " =", " #"][gold_ix], "gold_index": gold_ix, "instruction": f"The following are multiple choice questions (with answers) about {topic.replace('_', ' ')}.\n\n", } -def mmlu_signs(line, topic): +def mmlu_anatomy_signs(line): + return mmlu_signs(line, "anatomy") + + +def mmlu_numbers(line, topic): prompt = f"The following are multiple choice questions (with answers) about {topic.replace('_', ' ')}.\n\n" prompt += line["question"] + "\n" - prompt += "".join([f"{key}. {choice}\n" for key, choice in zip(["+", "*", "=", "#"], line["choices"])]) + prompt += "".join([f"{key}. {choice}\n" for key, choice in zip(["1", "2", "3", "4"], line["choices"])]) prompt += "Answer:" - gold_ix = ["+", "*", "=", "#"].index(line["answer"]) if isinstance(line["answer"], str) else line["answer"] + gold_ix = ["1", "2", "3", "4"].index(line["answer"]) if isinstance(line["answer"], str) else line["answer"] is_few_shots = line.get("__few_shots", False) # We are adding few shots return { "query": prompt, - "choices": [" +", " *", " =", " #"] if is_few_shots else ["+", "*", "=", "#"], - "target_for_fewshot_sorting": [" +", " *", " =", " #"][gold_ix], + "choices": [" 1", " 2", " 3", " 4"] if is_few_shots else ["1", "2", "3", "4"], + "target_for_fewshot_sorting": [" 1", " 2", " 3", " 4"][gold_ix], "gold_index": gold_ix, "instruction": f"The following are multiple choice questions (with answers) about {topic.replace('_', ' ')}.\n\n", } + + +def mmlu_anatomy(line): + return mmlu_numbers(line, "anatomy") + + +TASKS_TABLE = [ + LightevalTaskConfig( + name="mmlu:anatomy", + suite=["custom"], + prompt_function=mmlu_anatomy, + hf_repo="lighteval/mmlu", + hf_subset="anatomy", + hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], + evaluation_splits=["test"], + few_shots_split="dev", + few_shots_select="sequential", + generation_size=5, + metric=["loglikelihood_acc_single_token"], + stop_sequence=["\n"], + output_regex=None, + frozen=False, + ), + LightevalTaskConfig( + name="mmlu:anatomy_signs", + suite=["custom"], + prompt_function=mmlu_anatomy_signs, + hf_repo="lighteval/mmlu", + hf_subset="anatomy", + hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], + evaluation_splits=["test"], + few_shots_split="dev", + few_shots_select="sequential", + generation_size=5, + metric=["loglikelihood_acc_single_token"], + stop_sequence=["\n"], + output_regex=None, + frozen=False, + ), +] diff --git a/src/lighteval/logging/evaluation_tracker.py b/src/lighteval/logging/evaluation_tracker.py index b1dbe616..453d57e0 100644 --- a/src/lighteval/logging/evaluation_tracker.py +++ b/src/lighteval/logging/evaluation_tracker.py @@ -57,6 +57,8 @@ class EnhancedJSONEncoder(json.JSONEncoder): def default(self, o): if is_dataclass(o): return asdict(o) + if callable(o): + return o.__name__ return super().default(o) diff --git a/src/lighteval/tasks/default_tasks.py b/src/lighteval/tasks/default_tasks.py index dbfdfe09..468f12d5 100644 --- a/src/lighteval/tasks/default_tasks.py +++ b/src/lighteval/tasks/default_tasks.py @@ -19,13 +19,14 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import lighteval.tasks.tasks_prompt_formatting as prompt from lighteval.tasks.lighteval_task import LightevalTaskConfig abstract_narrative_understanding_bigbench = LightevalTaskConfig( name="abstract_narrative_understanding", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="abstract_narrative_understanding", hf_avail_splits=["default", "train", "validation"], @@ -43,7 +44,7 @@ agieval_aqua_rat_lighteval = LightevalTaskConfig( name="agieval:aqua-rat", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-aqua-rat", hf_subset="default", hf_avail_splits=["test"], @@ -61,7 +62,7 @@ agieval_gaokao_biology_lighteval = LightevalTaskConfig( name="agieval:gaokao-biology", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-gaokao-biology", hf_subset="default", hf_avail_splits=["test"], @@ -79,7 +80,7 @@ agieval_gaokao_chemistry_lighteval = LightevalTaskConfig( name="agieval:gaokao-chemistry", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-gaokao-chemistry", hf_subset="default", hf_avail_splits=["test"], @@ -97,7 +98,7 @@ agieval_gaokao_chinese_lighteval = LightevalTaskConfig( name="agieval:gaokao-chinese", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-gaokao-chinese", hf_subset="default", hf_avail_splits=["test"], @@ -115,7 +116,7 @@ agieval_gaokao_english_lighteval = LightevalTaskConfig( name="agieval:gaokao-english", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-gaokao-english", hf_subset="default", hf_avail_splits=["test"], @@ -133,7 +134,7 @@ agieval_gaokao_geography_lighteval = LightevalTaskConfig( name="agieval:gaokao-geography", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-gaokao-geography", hf_subset="default", hf_avail_splits=["test"], @@ -151,7 +152,7 @@ agieval_gaokao_history_lighteval = LightevalTaskConfig( name="agieval:gaokao-history", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-gaokao-history", hf_subset="default", hf_avail_splits=["test"], @@ -169,7 +170,7 @@ agieval_gaokao_mathqa_lighteval = LightevalTaskConfig( name="agieval:gaokao-mathqa", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-gaokao-mathqa", hf_subset="default", hf_avail_splits=["test"], @@ -187,7 +188,7 @@ agieval_gaokao_physics_lighteval = LightevalTaskConfig( name="agieval:gaokao-physics", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-gaokao-physics", hf_subset="default", hf_avail_splits=["test"], @@ -205,7 +206,7 @@ agieval_logiqa_en_lighteval = LightevalTaskConfig( name="agieval:logiqa-en", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-logiqa-en", hf_subset="default", hf_avail_splits=["test"], @@ -223,7 +224,7 @@ agieval_logiqa_zh_lighteval = LightevalTaskConfig( name="agieval:logiqa-zh", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-logiqa-zh", hf_subset="default", hf_avail_splits=["test"], @@ -241,7 +242,7 @@ agieval_lsat_ar_lighteval = LightevalTaskConfig( name="agieval:lsat-ar", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-lsat-ar", hf_subset="default", hf_avail_splits=["test"], @@ -259,7 +260,7 @@ agieval_lsat_lr_lighteval = LightevalTaskConfig( name="agieval:lsat-lr", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-lsat-lr", hf_subset="default", hf_avail_splits=["test"], @@ -277,7 +278,7 @@ agieval_lsat_rc_lighteval = LightevalTaskConfig( name="agieval:lsat-rc", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-lsat-rc", hf_subset="default", hf_avail_splits=["test"], @@ -295,7 +296,7 @@ agieval_sat_en_lighteval = LightevalTaskConfig( name="agieval:sat-en", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-sat-en", hf_subset="default", hf_avail_splits=["test"], @@ -313,7 +314,7 @@ agieval_sat_en_without_passage_lighteval = LightevalTaskConfig( name="agieval:sat-en-without-passage", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-sat-en-without-passage", hf_subset="default", hf_avail_splits=["test"], @@ -331,7 +332,7 @@ agieval_sat_math_lighteval = LightevalTaskConfig( name="agieval:sat-math", suite=["lighteval"], - prompt_function="agieval", + prompt_function=prompt.agieval, hf_repo="dmayhem93/agieval-sat-math", hf_subset="default", hf_avail_splits=["test"], @@ -349,7 +350,7 @@ anachronisms_bigbench = LightevalTaskConfig( name="anachronisms", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="anachronisms", hf_avail_splits=["default", "train", "validation"], @@ -367,7 +368,7 @@ analogical_similarity_bigbench = LightevalTaskConfig( name="analogical_similarity", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="analogical_similarity", hf_avail_splits=["default", "train", "validation"], @@ -385,7 +386,7 @@ analytic_entailment_bigbench = LightevalTaskConfig( name="analytic_entailment", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="analytic_entailment", hf_avail_splits=["default", "train", "validation"], @@ -403,7 +404,7 @@ anli_lighteval = LightevalTaskConfig( name="anli", suite=["lighteval", "anli"], - prompt_function="anli", + prompt_function=prompt.anli, hf_repo="anli", hf_subset="plain_text", hf_avail_splits=[ @@ -431,7 +432,7 @@ anli_r1_lighteval = LightevalTaskConfig( name="anli:r1", suite=["lighteval", "anli"], - prompt_function="anli", + prompt_function=prompt.anli, hf_repo="anli", hf_subset="plain_text", hf_avail_splits=["train_r1", "dev_r1", "test_r1"], @@ -449,7 +450,7 @@ anli_r2_lighteval = LightevalTaskConfig( name="anli:r2", suite=["lighteval", "anli"], - prompt_function="anli", + prompt_function=prompt.anli, hf_repo="anli", hf_subset="plain_text", hf_avail_splits=["train_r2", "dev_r2", "test_r2"], @@ -467,7 +468,7 @@ anli_r3_lighteval = LightevalTaskConfig( name="anli:r3", suite=["lighteval", "anli"], - prompt_function="anli", + prompt_function=prompt.anli, hf_repo="anli", hf_subset="plain_text", hf_avail_splits=["train_r3", "dev_r3", "test_r3"], @@ -485,7 +486,7 @@ arc_c_letters_original = LightevalTaskConfig( name="arc:c:letters", suite=["original", "arc"], - prompt_function="arc_with_options_letters_predict", + prompt_function=prompt.arc_with_options_letters_predict, hf_repo="ai2_arc", hf_subset="ARC-Challenge", hf_avail_splits=["train", "validation", "test"], @@ -503,7 +504,7 @@ arc_c_options_original = LightevalTaskConfig( name="arc:c:options", suite=["original", "arc"], - prompt_function="arc_with_options", + prompt_function=prompt.arc_with_options, hf_repo="ai2_arc", hf_subset="ARC-Challenge", hf_avail_splits=["train", "validation", "test"], @@ -521,7 +522,7 @@ arc_c_simple_original = LightevalTaskConfig( name="arc:c:simple", suite=["original", "arc"], - prompt_function="arc", + prompt_function=prompt.arc, hf_repo="ai2_arc", hf_subset="ARC-Challenge", hf_avail_splits=["train", "validation", "test"], @@ -539,7 +540,7 @@ arc_challenge_leaderboard = LightevalTaskConfig( name="arc:challenge", suite=["leaderboard", "arc"], - prompt_function="arc", + prompt_function=prompt.arc, hf_repo="ai2_arc", hf_subset="ARC-Challenge", hf_avail_splits=["train", "test"], @@ -557,7 +558,7 @@ arc_easy_lighteval = LightevalTaskConfig( name="arc:easy", suite=["lighteval", "arc"], - prompt_function="arc", + prompt_function=prompt.arc, hf_repo="ai2_arc", hf_subset="ARC-Easy", hf_avail_splits=["train", "validation", "test"], @@ -575,7 +576,7 @@ arithmetic_1dc_lighteval = LightevalTaskConfig( name="arithmetic:1dc", suite=["lighteval", "arithmetic"], - prompt_function="arithmetic", + prompt_function=prompt.arithmetic, hf_repo="EleutherAI/arithmetic", hf_subset="arithmetic_1dc", hf_avail_splits=["validation"], @@ -593,7 +594,7 @@ arithmetic_2da_lighteval = LightevalTaskConfig( name="arithmetic:2da", suite=["lighteval", "arithmetic"], - prompt_function="arithmetic", + prompt_function=prompt.arithmetic, hf_repo="EleutherAI/arithmetic", hf_subset="arithmetic_2da", hf_avail_splits=["validation"], @@ -611,7 +612,7 @@ arithmetic_2dm_lighteval = LightevalTaskConfig( name="arithmetic:2dm", suite=["lighteval", "arithmetic"], - prompt_function="arithmetic", + prompt_function=prompt.arithmetic, hf_repo="EleutherAI/arithmetic", hf_subset="arithmetic_2dm", hf_avail_splits=["validation"], @@ -629,7 +630,7 @@ arithmetic_2ds_lighteval = LightevalTaskConfig( name="arithmetic:2ds", suite=["lighteval", "arithmetic"], - prompt_function="arithmetic", + prompt_function=prompt.arithmetic, hf_repo="EleutherAI/arithmetic", hf_subset="arithmetic_2ds", hf_avail_splits=["validation"], @@ -647,7 +648,7 @@ arithmetic_3da_lighteval = LightevalTaskConfig( name="arithmetic:3da", suite=["lighteval", "arithmetic"], - prompt_function="arithmetic", + prompt_function=prompt.arithmetic, hf_repo="EleutherAI/arithmetic", hf_subset="arithmetic_3da", hf_avail_splits=["validation"], @@ -665,7 +666,7 @@ arithmetic_3ds_lighteval = LightevalTaskConfig( name="arithmetic:3ds", suite=["lighteval", "arithmetic"], - prompt_function="arithmetic", + prompt_function=prompt.arithmetic, hf_repo="EleutherAI/arithmetic", hf_subset="arithmetic_3ds", hf_avail_splits=["validation"], @@ -683,7 +684,7 @@ arithmetic_4da_lighteval = LightevalTaskConfig( name="arithmetic:4da", suite=["lighteval", "arithmetic"], - prompt_function="arithmetic", + prompt_function=prompt.arithmetic, hf_repo="EleutherAI/arithmetic", hf_subset="arithmetic_4da", hf_avail_splits=["validation"], @@ -701,7 +702,7 @@ arithmetic_4ds_lighteval = LightevalTaskConfig( name="arithmetic:4ds", suite=["lighteval", "arithmetic"], - prompt_function="arithmetic", + prompt_function=prompt.arithmetic, hf_repo="EleutherAI/arithmetic", hf_subset="arithmetic_4ds", hf_avail_splits=["validation"], @@ -719,7 +720,7 @@ arithmetic_5da_lighteval = LightevalTaskConfig( name="arithmetic:5da", suite=["lighteval", "arithmetic"], - prompt_function="arithmetic", + prompt_function=prompt.arithmetic, hf_repo="EleutherAI/arithmetic", hf_subset="arithmetic_5da", hf_avail_splits=["validation"], @@ -737,7 +738,7 @@ arithmetic_5ds_lighteval = LightevalTaskConfig( name="arithmetic:5ds", suite=["lighteval", "arithmetic"], - prompt_function="arithmetic", + prompt_function=prompt.arithmetic, hf_repo="EleutherAI/arithmetic", hf_subset="arithmetic_5ds", hf_avail_splits=["validation"], @@ -755,7 +756,7 @@ arithmetic_bb_bigbench = LightevalTaskConfig( name="arithmetic_bb", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="arithmetic", hf_avail_splits=["default", "train", "validation"], @@ -773,7 +774,7 @@ ascii_word_recognition_bigbench = LightevalTaskConfig( name="ascii_word_recognition", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="ascii_word_recognition", hf_avail_splits=["default", "train", "validation"], @@ -791,7 +792,7 @@ asdiv_lighteval = LightevalTaskConfig( name="asdiv", suite=["lighteval"], - prompt_function="asdiv", + prompt_function=prompt.asdiv, hf_repo="EleutherAI/asdiv", hf_subset="asdiv", hf_avail_splits=["validation"], @@ -809,7 +810,7 @@ authorship_verification_bigbench = LightevalTaskConfig( name="authorship_verification", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="authorship_verification", hf_avail_splits=["default", "train", "validation"], @@ -827,7 +828,7 @@ auto_categorization_bigbench = LightevalTaskConfig( name="auto_categorization", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="auto_categorization", hf_avail_splits=["default", "train", "validation"], @@ -845,7 +846,7 @@ auto_debugging_bigbench_lite = LightevalTaskConfig( name="auto_debugging", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_linefeed_before_and_after_query", + prompt_function=prompt.bigbench_linefeed_before_and_after_query, hf_repo="bigbench", hf_subset="auto_debugging", hf_avail_splits=["default", "train", "validation"], @@ -862,7 +863,7 @@ babi_qa_helm = LightevalTaskConfig( name="babi_qa", suite=["helm"], - prompt_function="babi_qa", + prompt_function=prompt.babi_qa, hf_repo="facebook/babi_qa", hf_subset="en-valid-qa1", hf_avail_splits=["train", "test", "validation"], @@ -880,7 +881,7 @@ bigbench_causal_judgment_lighteval = LightevalTaskConfig( name="bigbench:causal_judgment", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="causal_judgement", hf_avail_splits=["train"], @@ -898,7 +899,7 @@ bigbench_date_understanding_lighteval = LightevalTaskConfig( name="bigbench:date_understanding", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="date_understanding", hf_avail_splits=["train"], @@ -916,7 +917,7 @@ bigbench_disambiguation_qa_lighteval = LightevalTaskConfig( name="bigbench:disambiguation_qa", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="disambiguation_qa", hf_avail_splits=["train"], @@ -934,7 +935,7 @@ bigbench_geometric_shapes_lighteval = LightevalTaskConfig( name="bigbench:geometric_shapes", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="geometric_shapes", hf_avail_splits=["train"], @@ -952,7 +953,7 @@ bigbench_logical_deduction_five_objects_lighteval = LightevalTaskConfig( name="bigbench:logical_deduction_five_objects", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="logical_deduction_five_objects", hf_avail_splits=["train"], @@ -970,7 +971,7 @@ bigbench_logical_deduction_seven_objects_lighteval = LightevalTaskConfig( name="bigbench:logical_deduction_seven_objects", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="logical_deduction_seven_objects", hf_avail_splits=["train"], @@ -988,7 +989,7 @@ bigbench_logical_deduction_three_objects_lighteval = LightevalTaskConfig( name="bigbench:logical_deduction_three_objects", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="logical_deduction_three_objects", hf_avail_splits=["train"], @@ -1006,7 +1007,7 @@ bigbench_movie_recommendation_lighteval = LightevalTaskConfig( name="bigbench:movie_recommendation", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="movie_recommendation", hf_avail_splits=["train"], @@ -1024,7 +1025,7 @@ bigbench_navigate_lighteval = LightevalTaskConfig( name="bigbench:navigate", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="navigate", hf_avail_splits=["train"], @@ -1042,7 +1043,7 @@ bigbench_reasoning_about_colored_objects_lighteval = LightevalTaskConfig( name="bigbench:reasoning_about_colored_objects", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="reasoning_about_colored_objects", hf_avail_splits=["train"], @@ -1060,7 +1061,7 @@ bigbench_ruin_names_lighteval = LightevalTaskConfig( name="bigbench:ruin_names", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="ruin_names", hf_avail_splits=["train"], @@ -1078,7 +1079,7 @@ bigbench_salient_translation_error_detection_lighteval = LightevalTaskConfig( name="bigbench:salient_translation_error_detection", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="salient_translation_error_detection", hf_avail_splits=["train"], @@ -1096,7 +1097,7 @@ bigbench_snarks_lighteval = LightevalTaskConfig( name="bigbench:snarks", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="snarks", hf_avail_splits=["train"], @@ -1114,7 +1115,7 @@ bigbench_sports_understanding_lighteval = LightevalTaskConfig( name="bigbench:sports_understanding", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="sports_understanding", hf_avail_splits=["train"], @@ -1132,7 +1133,7 @@ bigbench_temporal_sequences_lighteval = LightevalTaskConfig( name="bigbench:temporal_sequences", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="temporal_sequences", hf_avail_splits=["train"], @@ -1150,7 +1151,7 @@ bigbench_tracking_shuffled_objects_five_objects_lighteval = LightevalTaskConfig( name="bigbench:tracking_shuffled_objects_five_objects", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="tracking_shuffled_objects_five_objects", hf_avail_splits=["train"], @@ -1168,7 +1169,7 @@ bigbench_tracking_shuffled_objects_seven_objects_lighteval = LightevalTaskConfig( name="bigbench:tracking_shuffled_objects_seven_objects", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="tracking_shuffled_objects_seven_objects", hf_avail_splits=["train"], @@ -1186,7 +1187,7 @@ bigbench_tracking_shuffled_objects_three_objects_lighteval = LightevalTaskConfig( name="bigbench:tracking_shuffled_objects_three_objects", suite=["lighteval"], - prompt_function="bbh_lighteval", + prompt_function=prompt.bbh_lighteval, hf_repo="lighteval/bbh", hf_subset="tracking_shuffled_objects_three_objects", hf_avail_splits=["train"], @@ -1204,7 +1205,7 @@ bigbench_causal_judgment_harness = LightevalTaskConfig( name="bigbench:causal_judgment", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="causal_judgement", hf_avail_splits=["train"], @@ -1223,7 +1224,7 @@ bigbench_date_understanding_harness = LightevalTaskConfig( name="bigbench:date_understanding", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="date_understanding", hf_avail_splits=["train"], @@ -1242,7 +1243,7 @@ bigbench_disambiguation_qa_harness = LightevalTaskConfig( name="bigbench:disambiguation_qa", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="disambiguation_qa", hf_avail_splits=["train"], @@ -1261,7 +1262,7 @@ bigbench_geometric_shapes_harness = LightevalTaskConfig( name="bigbench:geometric_shapes", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="geometric_shapes", hf_avail_splits=["train"], @@ -1280,7 +1281,7 @@ bigbench_logical_deduction_five_objects_harness = LightevalTaskConfig( name="bigbench:logical_deduction_five_objects", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="logical_deduction_five_objects", hf_avail_splits=["train"], @@ -1299,7 +1300,7 @@ bigbench_logical_deduction_seven_objects_harness = LightevalTaskConfig( name="bigbench:logical_deduction_seven_objects", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="logical_deduction_seven_objects", hf_avail_splits=["train"], @@ -1318,7 +1319,7 @@ bigbench_logical_deduction_three_objects_harness = LightevalTaskConfig( name="bigbench:logical_deduction_three_objects", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="logical_deduction_three_objects", hf_avail_splits=["train"], @@ -1337,7 +1338,7 @@ bigbench_movie_recommendation_harness = LightevalTaskConfig( name="bigbench:movie_recommendation", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="movie_recommendation", hf_avail_splits=["train"], @@ -1356,7 +1357,7 @@ bigbench_navigate_harness = LightevalTaskConfig( name="bigbench:navigate", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="navigate", hf_avail_splits=["train"], @@ -1375,7 +1376,7 @@ bigbench_reasoning_about_colored_objects_harness = LightevalTaskConfig( name="bigbench:reasoning_about_colored_objects", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="reasoning_about_colored_objects", hf_avail_splits=["train"], @@ -1394,7 +1395,7 @@ bigbench_ruin_names_harness = LightevalTaskConfig( name="bigbench:ruin_names", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="ruin_names", hf_avail_splits=["train"], @@ -1413,7 +1414,7 @@ bigbench_salient_translation_error_detection_harness = LightevalTaskConfig( name="bigbench:salient_translation_error_detection", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="salient_translation_error_detection", hf_avail_splits=["train"], @@ -1432,7 +1433,7 @@ bigbench_snarks_harness = LightevalTaskConfig( name="bigbench:snarks", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="snarks", hf_avail_splits=["train"], @@ -1451,7 +1452,7 @@ bigbench_sports_understanding_harness = LightevalTaskConfig( name="bigbench:sports_understanding", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="sports_understanding", hf_avail_splits=["train"], @@ -1470,7 +1471,7 @@ bigbench_temporal_sequences_harness = LightevalTaskConfig( name="bigbench:temporal_sequences", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="temporal_sequences", hf_avail_splits=["train"], @@ -1489,7 +1490,7 @@ bigbench_tracking_shuffled_objects_five_objects_harness = LightevalTaskConfig( name="bigbench:tracking_shuffled_objects_five_objects", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="tracking_shuffled_objects_five_objects", hf_avail_splits=["train"], @@ -1508,7 +1509,7 @@ bigbench_tracking_shuffled_objects_seven_objects_harness = LightevalTaskConfig( name="bigbench:tracking_shuffled_objects_seven_objects", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="tracking_shuffled_objects_seven_objects", hf_avail_splits=["train"], @@ -1527,7 +1528,7 @@ bigbench_tracking_shuffled_objects_three_objects_harness = LightevalTaskConfig( name="bigbench:tracking_shuffled_objects_three_objects", suite=["harness"], - prompt_function="bbh_harness", + prompt_function=prompt.bbh_harness, hf_repo="lighteval/bbh", hf_subset="tracking_shuffled_objects_three_objects", hf_avail_splits=["train"], @@ -1546,7 +1547,7 @@ bbh_boolean_expressions_harness = LightevalTaskConfig( name="bbh:boolean_expressions", suite=["harness"], - prompt_function="bbh_boolean_expressions", + prompt_function=prompt.bbh_boolean_expressions, hf_repo="lukaemon/bbh", hf_subset="boolean_expressions", hf_avail_splits=["test"], @@ -1570,7 +1571,7 @@ bbh_causal_judgment_harness = LightevalTaskConfig( name="bbh:causal_judgment", suite=["harness"], - prompt_function="bbh_causal_judgment", + prompt_function=prompt.bbh_causal_judgment, hf_repo="lukaemon/bbh", hf_subset="causal_judgement", hf_avail_splits=["test"], @@ -1594,7 +1595,7 @@ bbh_date_understanding_harness = LightevalTaskConfig( name="bbh:date_understanding", suite=["harness"], - prompt_function="bbh_date_understanding", + prompt_function=prompt.bbh_date_understanding, hf_repo="lukaemon/bbh", hf_subset="date_understanding", hf_avail_splits=["test"], @@ -1618,7 +1619,7 @@ bbh_disambiguation_qa_harness = LightevalTaskConfig( name="bbh:disambiguation_qa", suite=["harness"], - prompt_function="bbh_disambiguation_qa", + prompt_function=prompt.bbh_disambiguation_qa, hf_repo="lukaemon/bbh", hf_subset="disambiguation_qa", hf_avail_splits=["test"], @@ -1642,7 +1643,7 @@ bbh_dyck_languages_harness = LightevalTaskConfig( name="bbh:dyck_languages", suite=["harness"], - prompt_function="bbh_dyck_languages", + prompt_function=prompt.bbh_dyck_languages, hf_repo="lukaemon/bbh", hf_subset="dyck_languages", hf_avail_splits=["test"], @@ -1666,7 +1667,7 @@ bbh_formal_fallacies_harness = LightevalTaskConfig( name="bbh:formal_fallacies", suite=["harness"], - prompt_function="bbh_formal_fallacies", + prompt_function=prompt.bbh_formal_fallacies, hf_repo="lukaemon/bbh", hf_subset="formal_fallacies", hf_avail_splits=["test"], @@ -1690,7 +1691,7 @@ bbh_geometric_shapes_harness = LightevalTaskConfig( name="bbh:geometric_shapes", suite=["harness"], - prompt_function="bbh_geometric_shapes", + prompt_function=prompt.bbh_geometric_shapes, hf_repo="lukaemon/bbh", hf_subset="geometric_shapes", hf_avail_splits=["test"], @@ -1714,7 +1715,7 @@ bbh_hyperbaton_harness = LightevalTaskConfig( name="bbh:hyperbaton", suite=["harness"], - prompt_function="bbh_hyperbaton", + prompt_function=prompt.bbh_hyperbaton, hf_repo="lukaemon/bbh", hf_subset="hyperbaton", hf_avail_splits=["test"], @@ -1738,7 +1739,7 @@ bbh_logical_deduction_five_objects_harness = LightevalTaskConfig( name="bbh:logical_deduction_five_objects", suite=["harness"], - prompt_function="bbh_logical_deduction_five_objects", + prompt_function=prompt.bbh_logical_deduction_five_objects, hf_repo="lukaemon/bbh", hf_subset="logical_deduction_five_objects", hf_avail_splits=["test"], @@ -1762,7 +1763,7 @@ bbh_logical_deduction_seven_objects_harness = LightevalTaskConfig( name="bbh:logical_deduction_seven_objects", suite=["harness"], - prompt_function="bbh_logical_deduction_seven_objects", + prompt_function=prompt.bbh_logical_deduction_seven_objects, hf_repo="lukaemon/bbh", hf_subset="logical_deduction_seven_objects", hf_avail_splits=["test"], @@ -1786,7 +1787,7 @@ bbh_logical_deduction_three_objects_harness = LightevalTaskConfig( name="bbh:logical_deduction_three_objects", suite=["harness"], - prompt_function="bbh_logical_deduction_three_objects", + prompt_function=prompt.bbh_logical_deduction_three_objects, hf_repo="lukaemon/bbh", hf_subset="logical_deduction_three_objects", hf_avail_splits=["test"], @@ -1810,7 +1811,7 @@ bbh_movie_recommendation_harness = LightevalTaskConfig( name="bbh:movie_recommendation", suite=["harness"], - prompt_function="bbh_movie_recommendation", + prompt_function=prompt.bbh_movie_recommendation, hf_repo="lukaemon/bbh", hf_subset="movie_recommendation", hf_avail_splits=["test"], @@ -1834,7 +1835,7 @@ bbh_multistep_arithmetic_two_harness = LightevalTaskConfig( name="bbh:multistep_arithmetic_two", suite=["harness"], - prompt_function="bbh_multistep_arithmetic_two", + prompt_function=prompt.bbh_multistep_arithmetic_two, hf_repo="lukaemon/bbh", hf_subset="multistep_arithmetic_two", hf_avail_splits=["test"], @@ -1858,7 +1859,7 @@ bbh_navigate_harness = LightevalTaskConfig( name="bbh:navigate", suite=["harness"], - prompt_function="bbh_navigate", + prompt_function=prompt.bbh_navigate, hf_repo="lukaemon/bbh", hf_subset="navigate", hf_avail_splits=["test"], @@ -1882,7 +1883,7 @@ bbh_object_counting_harness = LightevalTaskConfig( name="bbh:object_counting", suite=["harness"], - prompt_function="bbh_object_counting", + prompt_function=prompt.bbh_object_counting, hf_repo="lukaemon/bbh", hf_subset="object_counting", hf_avail_splits=["test"], @@ -1906,7 +1907,7 @@ bbh_penguins_in_a_table_harness = LightevalTaskConfig( name="bbh:penguins_in_a_table", suite=["harness"], - prompt_function="bbh_penguins_in_a_table", + prompt_function=prompt.bbh_penguins_in_a_table, hf_repo="lukaemon/bbh", hf_subset="penguins_in_a_table", hf_avail_splits=["test"], @@ -1930,7 +1931,7 @@ bbh_reasoning_about_colored_objects_harness = LightevalTaskConfig( name="bbh:reasoning_about_colored_objects", suite=["harness"], - prompt_function="bbh_reasoning_about_colored_objects", + prompt_function=prompt.bbh_reasoning_about_colored_objects, hf_repo="lukaemon/bbh", hf_subset="reasoning_about_colored_objects", hf_avail_splits=["test"], @@ -1954,7 +1955,7 @@ bbh_ruin_names_harness = LightevalTaskConfig( name="bbh:ruin_names", suite=["harness"], - prompt_function="bbh_ruin_names", + prompt_function=prompt.bbh_ruin_names, hf_repo="lukaemon/bbh", hf_subset="ruin_names", hf_avail_splits=["test"], @@ -1978,7 +1979,7 @@ bbh_salient_translation_error_detection_harness = LightevalTaskConfig( name="bbh:salient_translation_error_detection", suite=["harness"], - prompt_function="bbh_salient_translation_error_detection", + prompt_function=prompt.bbh_salient_translation_error_detection, hf_repo="lukaemon/bbh", hf_subset="salient_translation_error_detection", hf_avail_splits=["test"], @@ -2002,7 +2003,7 @@ bbh_snarks_harness = LightevalTaskConfig( name="bbh:snarks", suite=["harness"], - prompt_function="bbh_snarks", + prompt_function=prompt.bbh_snarks, hf_repo="lukaemon/bbh", hf_subset="snarks", hf_avail_splits=["test"], @@ -2026,7 +2027,7 @@ bbh_sports_understanding_harness = LightevalTaskConfig( name="bbh:sports_understanding", suite=["harness"], - prompt_function="bbh_sports_understanding", + prompt_function=prompt.bbh_sports_understanding, hf_repo="lukaemon/bbh", hf_subset="sports_understanding", hf_avail_splits=["test"], @@ -2050,7 +2051,7 @@ bbh_temporal_sequences_harness = LightevalTaskConfig( name="bbh:temporal_sequences", suite=["harness"], - prompt_function="bbh_temporal_sequences", + prompt_function=prompt.bbh_temporal_sequences, hf_repo="lukaemon/bbh", hf_subset="temporal_sequences", hf_avail_splits=["test"], @@ -2074,7 +2075,7 @@ bbh_tracking_shuffled_objects_five_objects_harness = LightevalTaskConfig( name="bbh:tracking_shuffled_objects_five_objects", suite=["harness"], - prompt_function="bbh_tracking_shuffled_objects_five_objects", + prompt_function=prompt.bbh_tracking_shuffled_objects_five_objects, hf_repo="lukaemon/bbh", hf_subset="tracking_shuffled_objects_five_objects", hf_avail_splits=["test"], @@ -2098,7 +2099,7 @@ bbh_tracking_shuffled_objects_seven_objects_harness = LightevalTaskConfig( name="bbh:tracking_shuffled_objects_seven_objects", suite=["harness"], - prompt_function="bbh_tracking_shuffled_objects_seven_objects", + prompt_function=prompt.bbh_tracking_shuffled_objects_seven_objects, hf_repo="lukaemon/bbh", hf_subset="tracking_shuffled_objects_seven_objects", hf_avail_splits=["test"], @@ -2122,7 +2123,7 @@ bbh_tracking_shuffled_objects_three_objects_harness = LightevalTaskConfig( name="bbh:tracking_shuffled_objects_three_objects", suite=["harness"], - prompt_function="bbh_tracking_shuffled_objects_three_objects", + prompt_function=prompt.bbh_tracking_shuffled_objects_three_objects, hf_repo="lukaemon/bbh", hf_subset="tracking_shuffled_objects_three_objects", hf_avail_splits=["test"], @@ -2146,7 +2147,7 @@ bbh_web_of_lies_harness = LightevalTaskConfig( name="bbh:web_of_lies", suite=["harness"], - prompt_function="bbh_web_of_lies", + prompt_function=prompt.bbh_web_of_lies, hf_repo="lukaemon/bbh", hf_subset="web_of_lies", hf_avail_splits=["test"], @@ -2170,7 +2171,7 @@ bbh_word_sorting_harness = LightevalTaskConfig( name="bbh:word_sorting", suite=["harness"], - prompt_function="bbh_word_sorting", + prompt_function=prompt.bbh_word_sorting, hf_repo="lukaemon/bbh", hf_subset="word_sorting", hf_avail_splits=["test"], @@ -2194,7 +2195,7 @@ bbq_helm = LightevalTaskConfig( name="bbq", suite=["helm"], - prompt_function="bbq", + prompt_function=prompt.bbq, hf_repo="lighteval/bbq_helm", hf_subset="all", hf_avail_splits=["train", "test"], @@ -2218,7 +2219,7 @@ bbq_Age_helm = LightevalTaskConfig( name="bbq:Age", suite=["helm"], - prompt_function="bbq", + prompt_function=prompt.bbq, hf_repo="lighteval/bbq_helm", hf_subset="Age", hf_avail_splits=["train", "test"], @@ -2242,7 +2243,7 @@ bbq_Disability_status_helm = LightevalTaskConfig( name="bbq:Disability_status", suite=["helm"], - prompt_function="bbq", + prompt_function=prompt.bbq, hf_repo="lighteval/bbq_helm", hf_subset="Disability_status", hf_avail_splits=["train", "test"], @@ -2266,7 +2267,7 @@ bbq_Gender_identity_helm = LightevalTaskConfig( name="bbq:Gender_identity", suite=["helm"], - prompt_function="bbq", + prompt_function=prompt.bbq, hf_repo="lighteval/bbq_helm", hf_subset="Gender_identity", hf_avail_splits=["train", "test"], @@ -2290,7 +2291,7 @@ bbq_Nationality_helm = LightevalTaskConfig( name="bbq=Nationality", suite=["helm"], - prompt_function="bbq", + prompt_function=prompt.bbq, hf_repo="lighteval/bbq_helm", hf_subset="Nationality", hf_avail_splits=["train", "test"], @@ -2314,7 +2315,7 @@ bbq_Physical_appearance_helm = LightevalTaskConfig( name="bbq:Physical_appearance", suite=["helm"], - prompt_function="bbq", + prompt_function=prompt.bbq, hf_repo="lighteval/bbq_helm", hf_subset="Physical_appearance", hf_avail_splits=["train", "test"], @@ -2338,7 +2339,7 @@ bbq_Race_ethnicity_helm = LightevalTaskConfig( name="bbq:Race_ethnicity", suite=["helm"], - prompt_function="bbq", + prompt_function=prompt.bbq, hf_repo="lighteval/bbq_helm", hf_subset="Race_ethnicity", hf_avail_splits=["train", "test"], @@ -2362,7 +2363,7 @@ bbq_Race_x_SES_helm = LightevalTaskConfig( name="bbq:Race_x_SES", suite=["helm"], - prompt_function="bbq", + prompt_function=prompt.bbq, hf_repo="lighteval/bbq_helm", hf_subset="Race_x_SES", hf_avail_splits=["train", "test"], @@ -2386,7 +2387,7 @@ bbq_Race_x_gender_helm = LightevalTaskConfig( name="bbq:Race_x_gender", suite=["helm"], - prompt_function="bbq", + prompt_function=prompt.bbq, hf_repo="lighteval/bbq_helm", hf_subset="Race_x_gender", hf_avail_splits=["train", "test"], @@ -2410,7 +2411,7 @@ bbq_Religion_helm = LightevalTaskConfig( name="bbq:Religion", suite=["helm"], - prompt_function="bbq", + prompt_function=prompt.bbq, hf_repo="lighteval/bbq_helm", hf_subset="Religion", hf_avail_splits=["train", "test"], @@ -2434,7 +2435,7 @@ bbq_SES_helm = LightevalTaskConfig( name="bbq:SES", suite=["helm"], - prompt_function="bbq", + prompt_function=prompt.bbq, hf_repo="lighteval/bbq_helm", hf_subset="SES", hf_avail_splits=["train", "test"], @@ -2458,7 +2459,7 @@ bbq_Sexual_orientation_helm = LightevalTaskConfig( name="bbq:Sexual_orientation", suite=["helm"], - prompt_function="bbq", + prompt_function=prompt.bbq, hf_repo="lighteval/bbq_helm", hf_subset="Sexual_orientation", hf_avail_splits=["train", "test"], @@ -2482,7 +2483,7 @@ bbq_lite_json_bigbench_lite = LightevalTaskConfig( name="bbq_lite_json", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_linefeed_before_whitespace_after_query", + prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query, hf_repo="bigbench", hf_subset="bbq_lite_json", hf_avail_splits=["default", "train", "validation"], @@ -2500,7 +2501,7 @@ bigbench_auto_debugging_helm = LightevalTaskConfig( name="bigbench:auto_debugging", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="auto_debugging", hf_avail_splits=["train", "test", "validation"], @@ -2518,7 +2519,7 @@ bigbench_bbq_lite_json_age_ambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:age_ambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-age_ambig", hf_avail_splits=["train", "test", "validation"], @@ -2536,7 +2537,7 @@ bigbench_bbq_lite_json_age_disambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:age_disambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-age_disambig", hf_avail_splits=["train", "test", "validation"], @@ -2554,7 +2555,7 @@ bigbench_bbq_lite_json_disability_status_ambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:disability_status_ambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-disability_status_ambig", hf_avail_splits=["train", "test", "validation"], @@ -2572,7 +2573,7 @@ bigbench_bbq_lite_json_disability_status_disambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:disability_status_disambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-disability_status_disambig", hf_avail_splits=["train", "test", "validation"], @@ -2590,7 +2591,7 @@ bigbench_bbq_lite_json_gender_identity_ambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:gender_identity_ambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-gender_identity_ambig", hf_avail_splits=["train", "test", "validation"], @@ -2608,7 +2609,7 @@ bigbench_bbq_lite_json_gender_identity_disambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:gender_identity_disambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-gender_identity_disambig", hf_avail_splits=["train", "test", "validation"], @@ -2626,7 +2627,7 @@ bigbench_bbq_lite_json_nationality_ambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:nationality_ambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-nationality_ambig", hf_avail_splits=["train", "test", "validation"], @@ -2644,7 +2645,7 @@ bigbench_bbq_lite_json_nationality_disambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:nationality_disambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-nationality_disambig", hf_avail_splits=["train", "test", "validation"], @@ -2662,7 +2663,7 @@ bigbench_bbq_lite_json_physical_appearance_ambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:physical_appearance_ambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-physical_appearance_ambig", hf_avail_splits=["train", "test", "validation"], @@ -2680,7 +2681,7 @@ bigbench_bbq_lite_json_physical_appearance_disambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:physical_appearance_disambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-physical_appearance_disambig", hf_avail_splits=["train", "test", "validation"], @@ -2698,7 +2699,7 @@ bigbench_bbq_lite_json_race_ethnicity_ambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:race_ethnicity_ambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-race_ethnicity_ambig", hf_avail_splits=["train", "test", "validation"], @@ -2716,7 +2717,7 @@ bigbench_bbq_lite_json_race_ethnicity_disambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:race_ethnicity_disambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-race_ethnicity_disambig", hf_avail_splits=["train", "test", "validation"], @@ -2734,7 +2735,7 @@ bigbench_bbq_lite_json_religion_ambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:religion_ambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-religion_ambig", hf_avail_splits=["train", "test", "validation"], @@ -2752,7 +2753,7 @@ bigbench_bbq_lite_json_religion_disambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:religion_disambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-religion_disambig", hf_avail_splits=["train", "test", "validation"], @@ -2770,7 +2771,7 @@ bigbench_bbq_lite_json_ses_ambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:ses_ambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-ses_ambig", hf_avail_splits=["train", "test", "validation"], @@ -2788,7 +2789,7 @@ bigbench_bbq_lite_json_ses_disambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:ses_disambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-ses_disambig", hf_avail_splits=["train", "test", "validation"], @@ -2806,7 +2807,7 @@ bigbench_bbq_lite_json_sexual_orientation_ambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:sexual_orientation_ambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-sexual_orientation_ambig", hf_avail_splits=["train", "test", "validation"], @@ -2824,7 +2825,7 @@ bigbench_bbq_lite_json_sexual_orientation_disambig_helm = LightevalTaskConfig( name="bigbench:bbq_lite_json:sexual_orientation_disambig", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="bbq_lite_json-sexual_orientation_disambig", hf_avail_splits=["train", "test", "validation"], @@ -2842,7 +2843,7 @@ bigbench_code_line_description_helm = LightevalTaskConfig( name="bigbench:code_line_description", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="code_line_description", hf_avail_splits=["train", "test", "validation"], @@ -2860,7 +2861,7 @@ bigbench_conceptual_combinations_contradictions_helm = LightevalTaskConfig( name="bigbench:conceptual_combinations:contradictions", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conceptual_combinations-contradictions", hf_avail_splits=["train", "test", "validation"], @@ -2878,7 +2879,7 @@ bigbench_conceptual_combinations_emergent_properties_helm = LightevalTaskConfig( name="bigbench:conceptual_combinations:emergent_properties", suite=["helm"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conceptual_combinations-emergent_properties", hf_avail_splits=["train", "test", "validation"], @@ -2896,7 +2897,7 @@ bigbench_conceptual_combinations_fanciful_fictional_combinations_helm = LightevalTaskConfig( name="bigbench:conceptual_combinations:fanciful_fictional_combinations", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conceptual_combinations-fanciful_fictional_combinations", hf_avail_splits=["train", "test", "validation"], @@ -2914,7 +2915,7 @@ bigbench_conceptual_combinations_homonyms_helm = LightevalTaskConfig( name="bigbench:conceptual_combinations:homonyms", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conceptual_combinations-homonyms", hf_avail_splits=["train", "test", "validation"], @@ -2932,7 +2933,7 @@ bigbench_conceptual_combinations_invented_words_helm = LightevalTaskConfig( name="bigbench:conceptual_combinations:invented_words", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conceptual_combinations-invented_words", hf_avail_splits=["train", "test", "validation"], @@ -2950,7 +2951,7 @@ bigbench_conlang_translation_adna_from_helm = LightevalTaskConfig( name="bigbench:conlang_translation:adna_from", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conlang_translation-adna_from", hf_avail_splits=["train", "test", "validation"], @@ -2968,7 +2969,7 @@ bigbench_conlang_translation_adna_to_helm = LightevalTaskConfig( name="bigbench:conlang_translation:adna_to", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conlang_translation-adna_to", hf_avail_splits=["train", "test", "validation"], @@ -2986,7 +2987,7 @@ bigbench_conlang_translation_atikampe_from_helm = LightevalTaskConfig( name="bigbench:conlang_translation:atikampe_from", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conlang_translation-atikampe_from", hf_avail_splits=["train", "test", "validation"], @@ -3004,7 +3005,7 @@ bigbench_conlang_translation_atikampe_to_helm = LightevalTaskConfig( name="bigbench:conlang_translation:atikampe_to", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conlang_translation-atikampe_to", hf_avail_splits=["train", "test", "validation"], @@ -3022,7 +3023,7 @@ bigbench_conlang_translation_gornam_from_helm = LightevalTaskConfig( name="bigbench:conlang_translation:gornam_from", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conlang_translation-gornam_from", hf_avail_splits=["train", "test", "validation"], @@ -3040,7 +3041,7 @@ bigbench_conlang_translation_gornam_to_helm = LightevalTaskConfig( name="bigbench:conlang_translation:gornam_to", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conlang_translation-gornam_to", hf_avail_splits=["train", "test", "validation"], @@ -3058,7 +3059,7 @@ bigbench_conlang_translation_holuan_from_helm = LightevalTaskConfig( name="bigbench:conlang_translation:holuan_from", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conlang_translation-holuan_from", hf_avail_splits=["train", "test", "validation"], @@ -3076,7 +3077,7 @@ bigbench_conlang_translation_holuan_to_helm = LightevalTaskConfig( name="bigbench:conlang_translation:holuan_to", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conlang_translation-holuan_to", hf_avail_splits=["train", "test", "validation"], @@ -3094,7 +3095,7 @@ bigbench_conlang_translation_mkafala_from_helm = LightevalTaskConfig( name="bigbench:conlang_translation:mkafala_from", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conlang_translation-mkafala_from", hf_avail_splits=["train", "test", "validation"], @@ -3112,7 +3113,7 @@ bigbench_conlang_translation_mkafala_to_helm = LightevalTaskConfig( name="bigbench:conlang_translation:mkafala_to", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conlang_translation-mkafala_to", hf_avail_splits=["train", "test", "validation"], @@ -3130,7 +3131,7 @@ bigbench_conlang_translation_postpositive_english_from_helm = LightevalTaskConfig( name="bigbench:conlang_translation:postpositive_english_from", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conlang_translation-postpositive_english_from", hf_avail_splits=["train", "test", "validation"], @@ -3148,7 +3149,7 @@ bigbench_conlang_translation_postpositive_english_to_helm = LightevalTaskConfig( name="bigbench:conlang_translation:postpositive_english_to", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conlang_translation-postpositive_english_to", hf_avail_splits=["train", "test", "validation"], @@ -3166,7 +3167,7 @@ bigbench_conlang_translation_unapuri_from_helm = LightevalTaskConfig( name="bigbench:conlang_translation:unapuri_from", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conlang_translation-unapuri_from", hf_avail_splits=["train", "test", "validation"], @@ -3184,7 +3185,7 @@ bigbench_conlang_translation_unapuri_to_helm = LightevalTaskConfig( name="bigbench:conlang_translation:unapuri_to", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conlang_translation-unapuri_to", hf_avail_splits=["train", "test", "validation"], @@ -3202,7 +3203,7 @@ bigbench_conlang_translation_vaomi_from_helm = LightevalTaskConfig( name="bigbench:conlang_translation:vaomi_from", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conlang_translation-vaomi_from", hf_avail_splits=["train", "test", "validation"], @@ -3220,7 +3221,7 @@ bigbench_conlang_translation_vaomi_to_helm = LightevalTaskConfig( name="bigbench:conlang_translation:vaomi_to", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="conlang_translation-vaomi_to", hf_avail_splits=["train", "test", "validation"], @@ -3238,7 +3239,7 @@ bigbench_emoji_movie_helm = LightevalTaskConfig( name="bigbench:emoji_movie", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="emoji_movie", hf_avail_splits=["train", "test", "validation"], @@ -3256,7 +3257,7 @@ bigbench_formal_fallacies_syllogisms_negation_helm = LightevalTaskConfig( name="bigbench:formal_fallacies_syllogisms_negation", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="formal_fallacies_syllogisms_negation", hf_avail_splits=["train", "test", "validation"], @@ -3274,7 +3275,7 @@ bigbench_hindu_knowledge_helm = LightevalTaskConfig( name="bigbench:hindu_knowledge", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="hindu_knowledge", hf_avail_splits=["train", "test", "validation"], @@ -3292,7 +3293,7 @@ bigbench_known_unknowns_helm = LightevalTaskConfig( name="bigbench:known_unknowns", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="known_unknowns", hf_avail_splits=["train", "test", "validation"], @@ -3310,7 +3311,7 @@ bigbench_language_identification_helm = LightevalTaskConfig( name="bigbench:language_identification", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="language_identification", hf_avail_splits=["train", "test", "validation"], @@ -3328,7 +3329,7 @@ bigbench_linguistics_puzzles_helm = LightevalTaskConfig( name="bigbench:linguistics_puzzles", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="linguistics_puzzles", hf_avail_splits=["train", "test", "validation"], @@ -3346,7 +3347,7 @@ bigbench_logic_grid_puzzle_helm = LightevalTaskConfig( name="bigbench:logic_grid_puzzle", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="logic_grid_puzzle", hf_avail_splits=["train", "test", "validation"], @@ -3364,7 +3365,7 @@ bigbench_logical_deduction_five_objects_helm = LightevalTaskConfig( name="bigbench:logical_deduction-five_objects", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="logical_deduction-five_objects", hf_avail_splits=["train", "test", "validation"], @@ -3382,7 +3383,7 @@ bigbench_logical_deduction_seven_objects_helm = LightevalTaskConfig( name="bigbench:logical_deduction-seven_objects", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="logical_deduction-seven_objects", hf_avail_splits=["train", "test", "validation"], @@ -3400,7 +3401,7 @@ bigbench_logical_deduction_three_objects_helm = LightevalTaskConfig( name="bigbench:logical_deduction-three_objects", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="logical_deduction-three_objects", hf_avail_splits=["train", "test", "validation"], @@ -3418,7 +3419,7 @@ bigbench_misconceptions_russian_helm = LightevalTaskConfig( name="bigbench:misconceptions_russian", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="misconceptions_russian", hf_avail_splits=["train", "test", "validation"], @@ -3436,7 +3437,7 @@ bigbench_novel_concepts_helm = LightevalTaskConfig( name="bigbench:novel_concepts", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="novel_concepts", hf_avail_splits=["train", "test", "validation"], @@ -3454,7 +3455,7 @@ bigbench_operators_helm = LightevalTaskConfig( name="bigbench:operators", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="operators", hf_avail_splits=["train", "test", "validation"], @@ -3472,7 +3473,7 @@ bigbench_parsinlu_reading_comprehension_helm = LightevalTaskConfig( name="bigbench:parsinlu_reading_comprehension", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="parsinlu_reading_comprehension", hf_avail_splits=["train", "test", "validation"], @@ -3490,7 +3491,7 @@ bigbench_play_dialog_same_or_different_helm = LightevalTaskConfig( name="bigbench:play_dialog_same_or_different", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="play_dialog_same_or_different", hf_avail_splits=["train", "test", "validation"], @@ -3508,7 +3509,7 @@ bigbench_repeat_copy_logic_helm = LightevalTaskConfig( name="bigbench:repeat_copy_logic", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="repeat_copy_logic", hf_avail_splits=["train", "test", "validation"], @@ -3526,7 +3527,7 @@ bigbench_strange_stories_boolean_helm = LightevalTaskConfig( name="bigbench:strange_stories-boolean", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="strange_stories-boolean", hf_avail_splits=["train", "test", "validation"], @@ -3544,7 +3545,7 @@ bigbench_strange_stories_multiple_choice_helm = LightevalTaskConfig( name="bigbench:strange_stories-multiple_choice", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="strange_stories-multiple_choice", hf_avail_splits=["train", "test", "validation"], @@ -3562,7 +3563,7 @@ bigbench_strategyqa_helm = LightevalTaskConfig( name="bigbench:strategyqa", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="strategyqa", hf_avail_splits=["train", "test", "validation"], @@ -3580,7 +3581,7 @@ bigbench_symbol_interpretation_adversarial_helm = LightevalTaskConfig( name="bigbench:symbol_interpretation-adversarial", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="symbol_interpretation-adversarial", hf_avail_splits=["train", "test", "validation"], @@ -3598,7 +3599,7 @@ bigbench_symbol_interpretation_emoji_agnostic_helm = LightevalTaskConfig( name="bigbench:symbol_interpretation-emoji_agnostic", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="symbol_interpretation-emoji_agnostic", hf_avail_splits=["train", "test", "validation"], @@ -3616,7 +3617,7 @@ bigbench_symbol_interpretation_name_agnostic_helm = LightevalTaskConfig( name="bigbench:symbol_interpretation-name_agnostic", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="symbol_interpretation-name_agnostic", hf_avail_splits=["train", "test", "validation"], @@ -3634,7 +3635,7 @@ bigbench_symbol_interpretation_plain_helm = LightevalTaskConfig( name="bigbench:symbol_interpretation-plain", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="symbol_interpretation-plain", hf_avail_splits=["train", "test", "validation"], @@ -3652,7 +3653,7 @@ bigbench_symbol_interpretation_tricky_helm = LightevalTaskConfig( name="bigbench:symbol_interpretation-tricky", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="symbol_interpretation-tricky", hf_avail_splits=["train", "test", "validation"], @@ -3670,7 +3671,7 @@ bigbench_vitaminc_fact_verification_helm = LightevalTaskConfig( name="bigbench:vitaminc_fact_verification", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="vitaminc_fact_verification", hf_avail_splits=["train", "test", "validation"], @@ -3688,7 +3689,7 @@ bigbench_winowhy_helm = LightevalTaskConfig( name="bigbench:winowhy", suite=["helm", "bigbench_scenario"], - prompt_function="bigbench_helm", + prompt_function=prompt.bigbench_helm, hf_repo="lighteval/bigbench_helm", hf_subset="winowhy", hf_avail_splits=["train", "test", "validation"], @@ -3706,7 +3707,7 @@ blimp_adjunct_island_lighteval = LightevalTaskConfig( name="blimp:adjunct_island", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="adjunct_island", hf_avail_splits=["train"], @@ -3724,7 +3725,7 @@ blimp_adjunct_island_helm = LightevalTaskConfig( name="blimp:adjunct_island", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="adjunct_island", hf_avail_splits=["train"], @@ -3742,7 +3743,7 @@ blimp_anaphor_gender_agreement_lighteval = LightevalTaskConfig( name="blimp:anaphor_gender_agreement", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="anaphor_gender_agreement", hf_avail_splits=["train"], @@ -3760,7 +3761,7 @@ blimp_anaphor_gender_agreement_helm = LightevalTaskConfig( name="blimp:anaphor_gender_agreement", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="anaphor_gender_agreement", hf_avail_splits=["train"], @@ -3778,7 +3779,7 @@ blimp_anaphor_number_agreement_lighteval = LightevalTaskConfig( name="blimp:anaphor_number_agreement", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="anaphor_number_agreement", hf_avail_splits=["train"], @@ -3796,7 +3797,7 @@ blimp_anaphor_number_agreement_helm = LightevalTaskConfig( name="blimp:anaphor_number_agreement", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="anaphor_number_agreement", hf_avail_splits=["train"], @@ -3814,7 +3815,7 @@ blimp_animate_subject_passive_lighteval = LightevalTaskConfig( name="blimp:animate_subject_passive", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="animate_subject_passive", hf_avail_splits=["train"], @@ -3832,7 +3833,7 @@ blimp_animate_subject_passive_helm = LightevalTaskConfig( name="blimp:animate_subject_passive", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="animate_subject_passive", hf_avail_splits=["train"], @@ -3850,7 +3851,7 @@ blimp_animate_subject_trans_lighteval = LightevalTaskConfig( name="blimp:animate_subject_trans", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="animate_subject_trans", hf_avail_splits=["train"], @@ -3868,7 +3869,7 @@ blimp_animate_subject_trans_helm = LightevalTaskConfig( name="blimp:animate_subject_trans", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="animate_subject_trans", hf_avail_splits=["train"], @@ -3886,7 +3887,7 @@ blimp_causative_lighteval = LightevalTaskConfig( name="blimp:causative", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="causative", hf_avail_splits=["train"], @@ -3904,7 +3905,7 @@ blimp_causative_helm = LightevalTaskConfig( name="blimp:causative", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="causative", hf_avail_splits=["train"], @@ -3922,7 +3923,7 @@ blimp_complex_NP_island_lighteval = LightevalTaskConfig( name="blimp:complex_NP_island", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="complex_NP_island", hf_avail_splits=["train"], @@ -3940,7 +3941,7 @@ blimp_complex_NP_island_helm = LightevalTaskConfig( name="blimp:complex_NP_island", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="complex_NP_island", hf_avail_splits=["train"], @@ -3958,7 +3959,7 @@ blimp_coordinate_structure_constraint_complex_left_branch_lighteval = LightevalTaskConfig( name="blimp:coordinate_structure_constraint_complex_left_branch", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="coordinate_structure_constraint_complex_left_branch", hf_avail_splits=["train"], @@ -3976,7 +3977,7 @@ blimp_coordinate_structure_constraint_complex_left_branch_helm = LightevalTaskConfig( name="blimp:coordinate_structure_constraint_complex_left_branch", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="coordinate_structure_constraint_complex_left_branch", hf_avail_splits=["train"], @@ -3994,7 +3995,7 @@ blimp_coordinate_structure_constraint_object_extraction_lighteval = LightevalTaskConfig( name="blimp:coordinate_structure_constraint_object_extraction", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="coordinate_structure_constraint_object_extraction", hf_avail_splits=["train"], @@ -4012,7 +4013,7 @@ blimp_coordinate_structure_constraint_object_extraction_helm = LightevalTaskConfig( name="blimp:coordinate_structure_constraint_object_extraction", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="coordinate_structure_constraint_object_extraction", hf_avail_splits=["train"], @@ -4030,7 +4031,7 @@ blimp_determiner_noun_agreement_1_lighteval = LightevalTaskConfig( name="blimp:determiner_noun_agreement_1", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="determiner_noun_agreement_1", hf_avail_splits=["train"], @@ -4048,7 +4049,7 @@ blimp_determiner_noun_agreement_1_helm = LightevalTaskConfig( name="blimp:determiner_noun_agreement_1", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="determiner_noun_agreement_1", hf_avail_splits=["train"], @@ -4066,7 +4067,7 @@ blimp_determiner_noun_agreement_2_lighteval = LightevalTaskConfig( name="blimp:determiner_noun_agreement_2", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="determiner_noun_agreement_2", hf_avail_splits=["train"], @@ -4084,7 +4085,7 @@ blimp_determiner_noun_agreement_2_helm = LightevalTaskConfig( name="blimp:determiner_noun_agreement_2", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="determiner_noun_agreement_2", hf_avail_splits=["train"], @@ -4102,7 +4103,7 @@ blimp_determiner_noun_agreement_irregular_1_lighteval = LightevalTaskConfig( name="blimp:determiner_noun_agreement_irregular_1", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="determiner_noun_agreement_irregular_1", hf_avail_splits=["train"], @@ -4120,7 +4121,7 @@ blimp_determiner_noun_agreement_irregular_1_helm = LightevalTaskConfig( name="blimp:determiner_noun_agreement_irregular_1", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="determiner_noun_agreement_irregular_1", hf_avail_splits=["train"], @@ -4138,7 +4139,7 @@ blimp_determiner_noun_agreement_irregular_2_lighteval = LightevalTaskConfig( name="blimp:determiner_noun_agreement_irregular_2", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="determiner_noun_agreement_irregular_2", hf_avail_splits=["train"], @@ -4156,7 +4157,7 @@ blimp_determiner_noun_agreement_irregular_2_helm = LightevalTaskConfig( name="blimp:determiner_noun_agreement_irregular_2", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="determiner_noun_agreement_irregular_2", hf_avail_splits=["train"], @@ -4174,7 +4175,7 @@ blimp_determiner_noun_agreement_with_adj_2_lighteval = LightevalTaskConfig( name="blimp:determiner_noun_agreement_with_adj_2", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="determiner_noun_agreement_with_adj_2", hf_avail_splits=["train"], @@ -4192,7 +4193,7 @@ blimp_determiner_noun_agreement_with_adj_2_helm = LightevalTaskConfig( name="blimp:determiner_noun_agreement_with_adj_2", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="determiner_noun_agreement_with_adj_2", hf_avail_splits=["train"], @@ -4210,7 +4211,7 @@ blimp_determiner_noun_agreement_with_adj_irregular_1_lighteval = LightevalTaskConfig( name="blimp:determiner_noun_agreement_with_adj_irregular_1", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="determiner_noun_agreement_with_adj_irregular_1", hf_avail_splits=["train"], @@ -4228,7 +4229,7 @@ blimp_determiner_noun_agreement_with_adj_irregular_1_helm = LightevalTaskConfig( name="blimp:determiner_noun_agreement_with_adj_irregular_1", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="determiner_noun_agreement_with_adj_irregular_1", hf_avail_splits=["train"], @@ -4246,7 +4247,7 @@ blimp_determiner_noun_agreement_with_adj_irregular_2_lighteval = LightevalTaskConfig( name="blimp:determiner_noun_agreement_with_adj_irregular_2", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="determiner_noun_agreement_with_adj_irregular_2", hf_avail_splits=["train"], @@ -4264,7 +4265,7 @@ blimp_determiner_noun_agreement_with_adj_irregular_2_helm = LightevalTaskConfig( name="blimp:determiner_noun_agreement_with_adj_irregular_2", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="determiner_noun_agreement_with_adj_irregular_2", hf_avail_splits=["train"], @@ -4282,7 +4283,7 @@ blimp_determiner_noun_agreement_with_adjective_1_lighteval = LightevalTaskConfig( name="blimp:determiner_noun_agreement_with_adjective_1", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="determiner_noun_agreement_with_adjective_1", hf_avail_splits=["train"], @@ -4300,7 +4301,7 @@ blimp_determiner_noun_agreement_with_adjective_1_helm = LightevalTaskConfig( name="blimp:determiner_noun_agreement_with_adjective_1", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="determiner_noun_agreement_with_adjective_1", hf_avail_splits=["train"], @@ -4318,7 +4319,7 @@ blimp_distractor_agreement_relational_noun_lighteval = LightevalTaskConfig( name="blimp:distractor_agreement_relational_noun", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="distractor_agreement_relational_noun", hf_avail_splits=["train"], @@ -4336,7 +4337,7 @@ blimp_distractor_agreement_relational_noun_helm = LightevalTaskConfig( name="blimp:distractor_agreement_relational_noun", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="distractor_agreement_relational_noun", hf_avail_splits=["train"], @@ -4354,7 +4355,7 @@ blimp_distractor_agreement_relative_clause_lighteval = LightevalTaskConfig( name="blimp:distractor_agreement_relative_clause", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="distractor_agreement_relative_clause", hf_avail_splits=["train"], @@ -4372,7 +4373,7 @@ blimp_distractor_agreement_relative_clause_helm = LightevalTaskConfig( name="blimp:distractor_agreement_relative_clause", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="distractor_agreement_relative_clause", hf_avail_splits=["train"], @@ -4390,7 +4391,7 @@ blimp_drop_argument_lighteval = LightevalTaskConfig( name="blimp:drop_argument", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="drop_argument", hf_avail_splits=["train"], @@ -4408,7 +4409,7 @@ blimp_drop_argument_helm = LightevalTaskConfig( name="blimp:drop_argument", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="drop_argument", hf_avail_splits=["train"], @@ -4426,7 +4427,7 @@ blimp_ellipsis_n_bar_1_lighteval = LightevalTaskConfig( name="blimp:ellipsis_n_bar_1", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="ellipsis_n_bar_1", hf_avail_splits=["train"], @@ -4444,7 +4445,7 @@ blimp_ellipsis_n_bar_1_helm = LightevalTaskConfig( name="blimp:ellipsis_n_bar_1", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="ellipsis_n_bar_1", hf_avail_splits=["train"], @@ -4462,7 +4463,7 @@ blimp_ellipsis_n_bar_2_lighteval = LightevalTaskConfig( name="blimp:ellipsis_n_bar_2", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="ellipsis_n_bar_2", hf_avail_splits=["train"], @@ -4480,7 +4481,7 @@ blimp_ellipsis_n_bar_2_helm = LightevalTaskConfig( name="blimp:ellipsis_n_bar_2", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="ellipsis_n_bar_2", hf_avail_splits=["train"], @@ -4498,7 +4499,7 @@ blimp_existential_there_object_raising_lighteval = LightevalTaskConfig( name="blimp:existential_there_object_raising", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="existential_there_object_raising", hf_avail_splits=["train"], @@ -4516,7 +4517,7 @@ blimp_existential_there_object_raising_helm = LightevalTaskConfig( name="blimp:existential_there_object_raising", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="existential_there_object_raising", hf_avail_splits=["train"], @@ -4534,7 +4535,7 @@ blimp_existential_there_quantifiers_1_lighteval = LightevalTaskConfig( name="blimp:existential_there_quantifiers_1", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="existential_there_quantifiers_1", hf_avail_splits=["train"], @@ -4552,7 +4553,7 @@ blimp_existential_there_quantifiers_1_helm = LightevalTaskConfig( name="blimp:existential_there_quantifiers_1", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="existential_there_quantifiers_1", hf_avail_splits=["train"], @@ -4570,7 +4571,7 @@ blimp_existential_there_quantifiers_2_lighteval = LightevalTaskConfig( name="blimp:existential_there_quantifiers_2", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="existential_there_quantifiers_2", hf_avail_splits=["train"], @@ -4588,7 +4589,7 @@ blimp_existential_there_quantifiers_2_helm = LightevalTaskConfig( name="blimp:existential_there_quantifiers_2", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="existential_there_quantifiers_2", hf_avail_splits=["train"], @@ -4606,7 +4607,7 @@ blimp_existential_there_subject_raising_lighteval = LightevalTaskConfig( name="blimp:existential_there_subject_raising", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="existential_there_subject_raising", hf_avail_splits=["train"], @@ -4624,7 +4625,7 @@ blimp_existential_there_subject_raising_helm = LightevalTaskConfig( name="blimp:existential_there_subject_raising", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="existential_there_subject_raising", hf_avail_splits=["train"], @@ -4642,7 +4643,7 @@ blimp_expletive_it_object_raising_lighteval = LightevalTaskConfig( name="blimp:expletive_it_object_raising", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="expletive_it_object_raising", hf_avail_splits=["train"], @@ -4660,7 +4661,7 @@ blimp_expletive_it_object_raising_helm = LightevalTaskConfig( name="blimp:expletive_it_object_raising", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="expletive_it_object_raising", hf_avail_splits=["train"], @@ -4678,7 +4679,7 @@ blimp_inchoative_lighteval = LightevalTaskConfig( name="blimp:inchoative", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="inchoative", hf_avail_splits=["train"], @@ -4696,7 +4697,7 @@ blimp_inchoative_helm = LightevalTaskConfig( name="blimp:inchoative", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="inchoative", hf_avail_splits=["train"], @@ -4714,7 +4715,7 @@ blimp_intransitive_lighteval = LightevalTaskConfig( name="blimp:intransitive", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="intransitive", hf_avail_splits=["train"], @@ -4732,7 +4733,7 @@ blimp_intransitive_helm = LightevalTaskConfig( name="blimp:intransitive", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="intransitive", hf_avail_splits=["train"], @@ -4750,7 +4751,7 @@ blimp_irregular_past_participle_adjectives_lighteval = LightevalTaskConfig( name="blimp:irregular_past_participle_adjectives", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="irregular_past_participle_adjectives", hf_avail_splits=["train"], @@ -4768,7 +4769,7 @@ blimp_irregular_past_participle_adjectives_helm = LightevalTaskConfig( name="blimp:irregular_past_participle_adjectives", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="irregular_past_participle_adjectives", hf_avail_splits=["train"], @@ -4786,7 +4787,7 @@ blimp_irregular_past_participle_verbs_lighteval = LightevalTaskConfig( name="blimp:irregular_past_participle_verbs", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="irregular_past_participle_verbs", hf_avail_splits=["train"], @@ -4804,7 +4805,7 @@ blimp_irregular_past_participle_verbs_helm = LightevalTaskConfig( name="blimp:irregular_past_participle_verbs", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="irregular_past_participle_verbs", hf_avail_splits=["train"], @@ -4822,7 +4823,7 @@ blimp_irregular_plural_subject_verb_agreement_1_lighteval = LightevalTaskConfig( name="blimp:irregular_plural_subject_verb_agreement_1", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="irregular_plural_subject_verb_agreement_1", hf_avail_splits=["train"], @@ -4840,7 +4841,7 @@ blimp_irregular_plural_subject_verb_agreement_1_helm = LightevalTaskConfig( name="blimp:irregular_plural_subject_verb_agreement_1", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="irregular_plural_subject_verb_agreement_1", hf_avail_splits=["train"], @@ -4858,7 +4859,7 @@ blimp_irregular_plural_subject_verb_agreement_2_lighteval = LightevalTaskConfig( name="blimp:irregular_plural_subject_verb_agreement_2", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="irregular_plural_subject_verb_agreement_2", hf_avail_splits=["train"], @@ -4876,7 +4877,7 @@ blimp_irregular_plural_subject_verb_agreement_2_helm = LightevalTaskConfig( name="blimp:irregular_plural_subject_verb_agreement_2", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="irregular_plural_subject_verb_agreement_2", hf_avail_splits=["train"], @@ -4894,7 +4895,7 @@ blimp_left_branch_island_echo_question_lighteval = LightevalTaskConfig( name="blimp:left_branch_island_echo_question", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="left_branch_island_echo_question", hf_avail_splits=["train"], @@ -4912,7 +4913,7 @@ blimp_left_branch_island_echo_question_helm = LightevalTaskConfig( name="blimp:left_branch_island_echo_question", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="left_branch_island_echo_question", hf_avail_splits=["train"], @@ -4930,7 +4931,7 @@ blimp_left_branch_island_simple_question_lighteval = LightevalTaskConfig( name="blimp:left_branch_island_simple_question", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="left_branch_island_simple_question", hf_avail_splits=["train"], @@ -4948,7 +4949,7 @@ blimp_left_branch_island_simple_question_helm = LightevalTaskConfig( name="blimp:left_branch_island_simple_question", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="left_branch_island_simple_question", hf_avail_splits=["train"], @@ -4966,7 +4967,7 @@ blimp_matrix_question_npi_licensor_present_lighteval = LightevalTaskConfig( name="blimp:matrix_question_npi_licensor_present", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="matrix_question_npi_licensor_present", hf_avail_splits=["train"], @@ -4984,7 +4985,7 @@ blimp_matrix_question_npi_licensor_present_helm = LightevalTaskConfig( name="blimp:matrix_question_npi_licensor_present", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="matrix_question_npi_licensor_present", hf_avail_splits=["train"], @@ -5002,7 +5003,7 @@ blimp_npi_present_1_lighteval = LightevalTaskConfig( name="blimp:npi_present_1", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="npi_present_1", hf_avail_splits=["train"], @@ -5020,7 +5021,7 @@ blimp_npi_present_1_helm = LightevalTaskConfig( name="blimp:npi_present_1", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="npi_present_1", hf_avail_splits=["train"], @@ -5038,7 +5039,7 @@ blimp_npi_present_2_lighteval = LightevalTaskConfig( name="blimp:npi_present_2", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="npi_present_2", hf_avail_splits=["train"], @@ -5056,7 +5057,7 @@ blimp_npi_present_2_helm = LightevalTaskConfig( name="blimp:npi_present_2", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="npi_present_2", hf_avail_splits=["train"], @@ -5074,7 +5075,7 @@ blimp_only_npi_licensor_present_lighteval = LightevalTaskConfig( name="blimp:only_npi_licensor_present", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="only_npi_licensor_present", hf_avail_splits=["train"], @@ -5092,7 +5093,7 @@ blimp_only_npi_licensor_present_helm = LightevalTaskConfig( name="blimp:only_npi_licensor_present", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="only_npi_licensor_present", hf_avail_splits=["train"], @@ -5110,7 +5111,7 @@ blimp_only_npi_scope_lighteval = LightevalTaskConfig( name="blimp:only_npi_scope", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="only_npi_scope", hf_avail_splits=["train"], @@ -5128,7 +5129,7 @@ blimp_only_npi_scope_helm = LightevalTaskConfig( name="blimp:only_npi_scope", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="only_npi_scope", hf_avail_splits=["train"], @@ -5146,7 +5147,7 @@ blimp_passive_1_lighteval = LightevalTaskConfig( name="blimp:passive_1", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="passive_1", hf_avail_splits=["train"], @@ -5164,7 +5165,7 @@ blimp_passive_1_helm = LightevalTaskConfig( name="blimp:passive_1", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="passive_1", hf_avail_splits=["train"], @@ -5182,7 +5183,7 @@ blimp_passive_2_lighteval = LightevalTaskConfig( name="blimp:passive_2", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="passive_2", hf_avail_splits=["train"], @@ -5200,7 +5201,7 @@ blimp_passive_2_helm = LightevalTaskConfig( name="blimp:passive_2", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="passive_2", hf_avail_splits=["train"], @@ -5218,7 +5219,7 @@ blimp_principle_A_c_command_lighteval = LightevalTaskConfig( name="blimp:principle_A_c_command", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="principle_A_c_command", hf_avail_splits=["train"], @@ -5236,7 +5237,7 @@ blimp_principle_A_c_command_helm = LightevalTaskConfig( name="blimp:principle_A_c_command", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="principle_A_c_command", hf_avail_splits=["train"], @@ -5254,7 +5255,7 @@ blimp_principle_A_case_1_lighteval = LightevalTaskConfig( name="blimp:principle_A_case_1", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="principle_A_case_1", hf_avail_splits=["train"], @@ -5272,7 +5273,7 @@ blimp_principle_A_case_1_helm = LightevalTaskConfig( name="blimp:principle_A_case_1", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="principle_A_case_1", hf_avail_splits=["train"], @@ -5290,7 +5291,7 @@ blimp_principle_A_case_2_lighteval = LightevalTaskConfig( name="blimp:principle_A_case_2", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="principle_A_case_2", hf_avail_splits=["train"], @@ -5308,7 +5309,7 @@ blimp_principle_A_case_2_helm = LightevalTaskConfig( name="blimp:principle_A_case_2", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="principle_A_case_2", hf_avail_splits=["train"], @@ -5326,7 +5327,7 @@ blimp_principle_A_domain_1_lighteval = LightevalTaskConfig( name="blimp:principle_A_domain_1", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="principle_A_domain_1", hf_avail_splits=["train"], @@ -5344,7 +5345,7 @@ blimp_principle_A_domain_1_helm = LightevalTaskConfig( name="blimp:principle_A_domain_1", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="principle_A_domain_1", hf_avail_splits=["train"], @@ -5362,7 +5363,7 @@ blimp_principle_A_domain_2_lighteval = LightevalTaskConfig( name="blimp:principle_A_domain_2", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="principle_A_domain_2", hf_avail_splits=["train"], @@ -5380,7 +5381,7 @@ blimp_principle_A_domain_2_helm = LightevalTaskConfig( name="blimp:principle_A_domain_2", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="principle_A_domain_2", hf_avail_splits=["train"], @@ -5398,7 +5399,7 @@ blimp_principle_A_domain_3_lighteval = LightevalTaskConfig( name="blimp:principle_A_domain_3", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="principle_A_domain_3", hf_avail_splits=["train"], @@ -5416,7 +5417,7 @@ blimp_principle_A_domain_3_helm = LightevalTaskConfig( name="blimp:principle_A_domain_3", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="principle_A_domain_3", hf_avail_splits=["train"], @@ -5434,7 +5435,7 @@ blimp_principle_A_reconstruction_lighteval = LightevalTaskConfig( name="blimp:principle_A_reconstruction", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="principle_A_reconstruction", hf_avail_splits=["train"], @@ -5452,7 +5453,7 @@ blimp_principle_A_reconstruction_helm = LightevalTaskConfig( name="blimp:principle_A_reconstruction", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="principle_A_reconstruction", hf_avail_splits=["train"], @@ -5470,7 +5471,7 @@ blimp_regular_plural_subject_verb_agreement_1_lighteval = LightevalTaskConfig( name="blimp:regular_plural_subject_verb_agreement_1", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="regular_plural_subject_verb_agreement_1", hf_avail_splits=["train"], @@ -5488,7 +5489,7 @@ blimp_regular_plural_subject_verb_agreement_1_helm = LightevalTaskConfig( name="blimp:regular_plural_subject_verb_agreement_1", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="regular_plural_subject_verb_agreement_1", hf_avail_splits=["train"], @@ -5506,7 +5507,7 @@ blimp_regular_plural_subject_verb_agreement_2_lighteval = LightevalTaskConfig( name="blimp:regular_plural_subject_verb_agreement_2", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="regular_plural_subject_verb_agreement_2", hf_avail_splits=["train"], @@ -5524,7 +5525,7 @@ blimp_regular_plural_subject_verb_agreement_2_helm = LightevalTaskConfig( name="blimp:regular_plural_subject_verb_agreement_2", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="regular_plural_subject_verb_agreement_2", hf_avail_splits=["train"], @@ -5542,7 +5543,7 @@ blimp_sentential_negation_npi_licensor_present_lighteval = LightevalTaskConfig( name="blimp:sentential_negation_npi_licensor_present", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="sentential_negation_npi_licensor_present", hf_avail_splits=["train"], @@ -5560,7 +5561,7 @@ blimp_sentential_negation_npi_licensor_present_helm = LightevalTaskConfig( name="blimp:sentential_negation_npi_licensor_present", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="sentential_negation_npi_licensor_present", hf_avail_splits=["train"], @@ -5578,7 +5579,7 @@ blimp_sentential_negation_npi_scope_lighteval = LightevalTaskConfig( name="blimp:sentential_negation_npi_scope", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="sentential_negation_npi_scope", hf_avail_splits=["train"], @@ -5596,7 +5597,7 @@ blimp_sentential_negation_npi_scope_helm = LightevalTaskConfig( name="blimp:sentential_negation_npi_scope", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="sentential_negation_npi_scope", hf_avail_splits=["train"], @@ -5614,7 +5615,7 @@ blimp_sentential_subject_island_lighteval = LightevalTaskConfig( name="blimp:sentential_subject_island", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="sentential_subject_island", hf_avail_splits=["train"], @@ -5632,7 +5633,7 @@ blimp_sentential_subject_island_helm = LightevalTaskConfig( name="blimp:sentential_subject_island", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="sentential_subject_island", hf_avail_splits=["train"], @@ -5650,7 +5651,7 @@ blimp_superlative_quantifiers_1_lighteval = LightevalTaskConfig( name="blimp:superlative_quantifiers_1", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="superlative_quantifiers_1", hf_avail_splits=["train"], @@ -5668,7 +5669,7 @@ blimp_superlative_quantifiers_1_helm = LightevalTaskConfig( name="blimp:superlative_quantifiers_1", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="superlative_quantifiers_1", hf_avail_splits=["train"], @@ -5686,7 +5687,7 @@ blimp_superlative_quantifiers_2_lighteval = LightevalTaskConfig( name="blimp:superlative_quantifiers_2", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="superlative_quantifiers_2", hf_avail_splits=["train"], @@ -5704,7 +5705,7 @@ blimp_superlative_quantifiers_2_helm = LightevalTaskConfig( name="blimp:superlative_quantifiers_2", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="superlative_quantifiers_2", hf_avail_splits=["train"], @@ -5722,7 +5723,7 @@ blimp_tough_vs_raising_1_lighteval = LightevalTaskConfig( name="blimp:tough_vs_raising_1", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="tough_vs_raising_1", hf_avail_splits=["train"], @@ -5740,7 +5741,7 @@ blimp_tough_vs_raising_1_helm = LightevalTaskConfig( name="blimp:tough_vs_raising_1", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="tough_vs_raising_1", hf_avail_splits=["train"], @@ -5758,7 +5759,7 @@ blimp_tough_vs_raising_2_lighteval = LightevalTaskConfig( name="blimp:tough_vs_raising_2", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="tough_vs_raising_2", hf_avail_splits=["train"], @@ -5776,7 +5777,7 @@ blimp_tough_vs_raising_2_helm = LightevalTaskConfig( name="blimp:tough_vs_raising_2", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="tough_vs_raising_2", hf_avail_splits=["train"], @@ -5794,7 +5795,7 @@ blimp_transitive_lighteval = LightevalTaskConfig( name="blimp:transitive", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="transitive", hf_avail_splits=["train"], @@ -5812,7 +5813,7 @@ blimp_transitive_helm = LightevalTaskConfig( name="blimp:transitive", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="transitive", hf_avail_splits=["train"], @@ -5830,7 +5831,7 @@ blimp_wh_island_lighteval = LightevalTaskConfig( name="blimp:wh_island", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="wh_island", hf_avail_splits=["train"], @@ -5848,7 +5849,7 @@ blimp_wh_island_helm = LightevalTaskConfig( name="blimp:wh_island", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="wh_island", hf_avail_splits=["train"], @@ -5866,7 +5867,7 @@ blimp_wh_questions_object_gap_lighteval = LightevalTaskConfig( name="blimp:wh_questions_object_gap", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="wh_questions_object_gap", hf_avail_splits=["train"], @@ -5884,7 +5885,7 @@ blimp_wh_questions_object_gap_helm = LightevalTaskConfig( name="blimp:wh_questions_object_gap", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="wh_questions_object_gap", hf_avail_splits=["train"], @@ -5902,7 +5903,7 @@ blimp_wh_questions_subject_gap_lighteval = LightevalTaskConfig( name="blimp:wh_questions_subject_gap", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="wh_questions_subject_gap", hf_avail_splits=["train"], @@ -5920,7 +5921,7 @@ blimp_wh_questions_subject_gap_helm = LightevalTaskConfig( name="blimp:wh_questions_subject_gap", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="wh_questions_subject_gap", hf_avail_splits=["train"], @@ -5938,7 +5939,7 @@ blimp_wh_questions_subject_gap_long_distance_lighteval = LightevalTaskConfig( name="blimp:wh_questions_subject_gap_long_distance", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="wh_questions_subject_gap_long_distance", hf_avail_splits=["train"], @@ -5956,7 +5957,7 @@ blimp_wh_questions_subject_gap_long_distance_helm = LightevalTaskConfig( name="blimp:wh_questions_subject_gap_long_distance", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="wh_questions_subject_gap_long_distance", hf_avail_splits=["train"], @@ -5974,7 +5975,7 @@ blimp_wh_vs_that_no_gap_lighteval = LightevalTaskConfig( name="blimp:wh_vs_that_no_gap", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="wh_vs_that_no_gap", hf_avail_splits=["train"], @@ -5992,7 +5993,7 @@ blimp_wh_vs_that_no_gap_helm = LightevalTaskConfig( name="blimp:wh_vs_that_no_gap", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="wh_vs_that_no_gap", hf_avail_splits=["train"], @@ -6010,7 +6011,7 @@ blimp_wh_vs_that_no_gap_long_distance_lighteval = LightevalTaskConfig( name="blimp:wh_vs_that_no_gap_long_distance", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="wh_vs_that_no_gap_long_distance", hf_avail_splits=["train"], @@ -6028,7 +6029,7 @@ blimp_wh_vs_that_no_gap_long_distance_helm = LightevalTaskConfig( name="blimp:wh_vs_that_no_gap_long_distance", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="wh_vs_that_no_gap_long_distance", hf_avail_splits=["train"], @@ -6046,7 +6047,7 @@ blimp_wh_vs_that_with_gap_lighteval = LightevalTaskConfig( name="blimp:wh_vs_that_with_gap", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="wh_vs_that_with_gap", hf_avail_splits=["train"], @@ -6064,7 +6065,7 @@ blimp_wh_vs_that_with_gap_helm = LightevalTaskConfig( name="blimp:wh_vs_that_with_gap", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="wh_vs_that_with_gap", hf_avail_splits=["train"], @@ -6082,7 +6083,7 @@ blimp_wh_vs_that_with_gap_long_distance_lighteval = LightevalTaskConfig( name="blimp:wh_vs_that_with_gap_long_distance", suite=["lighteval", "blimp"], - prompt_function="blimp", + prompt_function=prompt.blimp, hf_repo="blimp", hf_subset="wh_vs_that_with_gap_long_distance", hf_avail_splits=["train"], @@ -6100,7 +6101,7 @@ blimp_wh_vs_that_with_gap_long_distance_helm = LightevalTaskConfig( name="blimp:wh_vs_that_with_gap_long_distance", suite=["helm", "blimp"], - prompt_function="blimp_helm", + prompt_function=prompt.blimp_helm, hf_repo="blimp", hf_subset="wh_vs_that_with_gap_long_distance", hf_avail_splits=["train"], @@ -6118,7 +6119,7 @@ bold_helm = LightevalTaskConfig( name="bold", suite=["helm"], - prompt_function="bold", + prompt_function=prompt.bold, hf_repo="lighteval/bold_helm", hf_subset="all", hf_avail_splits=["train", "test"], @@ -6136,7 +6137,7 @@ bold_gender_helm = LightevalTaskConfig( name="bold:gender", suite=["helm"], - prompt_function="bold", + prompt_function=prompt.bold, hf_repo="lighteval/bold_helm", hf_subset="gender", hf_avail_splits=["train", "test"], @@ -6154,7 +6155,7 @@ bold_political_ideology_helm = LightevalTaskConfig( name="bold:political_ideology", suite=["helm"], - prompt_function="bold", + prompt_function=prompt.bold, hf_repo="lighteval/bold_helm", hf_subset="political_ideology", hf_avail_splits=["train", "test"], @@ -6172,7 +6173,7 @@ bold_profession_helm = LightevalTaskConfig( name="bold:profession", suite=["helm"], - prompt_function="bold", + prompt_function=prompt.bold, hf_repo="lighteval/bold_helm", hf_subset="profession", hf_avail_splits=["train", "test"], @@ -6190,7 +6191,7 @@ bold_race_helm = LightevalTaskConfig( name="bold:race", suite=["helm"], - prompt_function="bold", + prompt_function=prompt.bold, hf_repo="lighteval/bold_helm", hf_subset="race", hf_avail_splits=["train", "test"], @@ -6208,7 +6209,7 @@ bold_religious_ideology_helm = LightevalTaskConfig( name="bold:religious_ideology", suite=["helm"], - prompt_function="bold", + prompt_function=prompt.bold, hf_repo="lighteval/bold_helm", hf_subset="religious_ideology", hf_avail_splits=["train", "test"], @@ -6226,7 +6227,7 @@ boolq_helm = LightevalTaskConfig( name="boolq", suite=["helm", "helm_general"], - prompt_function="boolq_helm", + prompt_function=prompt.boolq_helm, hf_repo="lighteval/boolq_helm", hf_subset="default", hf_avail_splits=["train", "validation"], @@ -6244,7 +6245,7 @@ boolq_contrastset_helm = LightevalTaskConfig( name="boolq:contrastset", suite=["helm"], - prompt_function="boolq_helm_contrastset", + prompt_function=prompt.boolq_helm_contrastset, hf_repo="lighteval/boolq_helm", hf_subset="default", hf_avail_splits=["validation"], @@ -6262,7 +6263,7 @@ bridging_anaphora_resolution_barqa_bigbench = LightevalTaskConfig( name="bridging_anaphora_resolution_barqa", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="bridging_anaphora_resolution_barqa", hf_avail_splits=["default", "train", "validation"], @@ -6280,7 +6281,7 @@ causal_judgment_bigbench = LightevalTaskConfig( name="causal_judgment", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="causal_judgment", hf_avail_splits=["default", "train", "validation"], @@ -6298,7 +6299,7 @@ cause_and_effect_bigbench = LightevalTaskConfig( name="cause_and_effect", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="cause_and_effect", hf_avail_splits=["default", "train", "validation"], @@ -6316,7 +6317,7 @@ checkmate_in_one_bigbench = LightevalTaskConfig( name="checkmate_in_one", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="checkmate_in_one", hf_avail_splits=["default", "train", "validation"], @@ -6334,7 +6335,7 @@ chess_state_tracking_bigbench = LightevalTaskConfig( name="chess_state_tracking", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="chess_state_tracking", hf_avail_splits=["default", "train", "validation"], @@ -6352,7 +6353,7 @@ chinese_remainder_theorem_bigbench = LightevalTaskConfig( name="chinese_remainder_theorem", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="chinese_remainder_theorem", hf_avail_splits=["default", "train", "validation"], @@ -6370,7 +6371,7 @@ cifar10_classification_bigbench = LightevalTaskConfig( name="cifar10_classification", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="cifar10_classification", hf_avail_splits=["default", "train", "validation"], @@ -6388,7 +6389,7 @@ civil_comments_helm = LightevalTaskConfig( name="civil_comments", suite=["helm", "helm_general"], - prompt_function="civil_comments", + prompt_function=prompt.civil_comments, hf_repo="lighteval/civil_comments_helm", hf_subset="all", hf_avail_splits=["train", "test"], @@ -6413,7 +6414,7 @@ civil_comments_LGBTQ_helm = LightevalTaskConfig( name="civil_comments:LGBTQ", suite=["helm"], - prompt_function="civil_comments", + prompt_function=prompt.civil_comments, hf_repo="lighteval/civil_comments_helm", hf_subset="LGBTQ", hf_avail_splits=["train", "test"], @@ -6438,7 +6439,7 @@ civil_comments_black_helm = LightevalTaskConfig( name="civil_comments:black", suite=["helm"], - prompt_function="civil_comments", + prompt_function=prompt.civil_comments, hf_repo="lighteval/civil_comments_helm", hf_subset="black", hf_avail_splits=["train", "test"], @@ -6463,7 +6464,7 @@ civil_comments_christian_helm = LightevalTaskConfig( name="civil_comments:christian", suite=["helm"], - prompt_function="civil_comments", + prompt_function=prompt.civil_comments, hf_repo="lighteval/civil_comments_helm", hf_subset="christian", hf_avail_splits=["train", "test"], @@ -6488,7 +6489,7 @@ civil_comments_female_helm = LightevalTaskConfig( name="civil_comments:female", suite=["helm"], - prompt_function="civil_comments", + prompt_function=prompt.civil_comments, hf_repo="lighteval/civil_comments_helm", hf_subset="female", hf_avail_splits=["train", "test"], @@ -6513,7 +6514,7 @@ civil_comments_male_helm = LightevalTaskConfig( name="civil_comments:male", suite=["helm"], - prompt_function="civil_comments", + prompt_function=prompt.civil_comments, hf_repo="lighteval/civil_comments_helm", hf_subset="male", hf_avail_splits=["train", "test"], @@ -6538,7 +6539,7 @@ civil_comments_muslim_helm = LightevalTaskConfig( name="civil_comments:muslim", suite=["helm"], - prompt_function="civil_comments", + prompt_function=prompt.civil_comments, hf_repo="lighteval/civil_comments_helm", hf_subset="muslim", hf_avail_splits=["train", "test"], @@ -6563,7 +6564,7 @@ civil_comments_other_religions_helm = LightevalTaskConfig( name="civil_comments:other_religions", suite=["helm"], - prompt_function="civil_comments", + prompt_function=prompt.civil_comments, hf_repo="lighteval/civil_comments_helm", hf_subset="other_religions", hf_avail_splits=["train", "test"], @@ -6588,7 +6589,7 @@ civil_comments_white_helm = LightevalTaskConfig( name="civil_comments:white", suite=["helm"], - prompt_function="civil_comments", + prompt_function=prompt.civil_comments, hf_repo="lighteval/civil_comments_helm", hf_subset="white", hf_avail_splits=["train", "test"], @@ -6613,7 +6614,7 @@ code_line_description_bigbench_lite = LightevalTaskConfig( name="code_line_description", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_linefeed_before_and_after_query", + prompt_function=prompt.bigbench_linefeed_before_and_after_query, hf_repo="bigbench", hf_subset="code_line_description", hf_avail_splits=["default", "train", "validation"], @@ -6631,7 +6632,7 @@ codenames_bigbench = LightevalTaskConfig( name="codenames", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="codenames", hf_avail_splits=["default", "train", "validation"], @@ -6649,7 +6650,7 @@ color_bigbench = LightevalTaskConfig( name="color", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="color", hf_avail_splits=["default", "train", "validation"], @@ -6667,7 +6668,7 @@ common_morpheme_bigbench = LightevalTaskConfig( name="common_morpheme", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="common_morpheme", hf_avail_splits=["default", "train", "validation"], @@ -6685,7 +6686,7 @@ commonsenseqa_helm = LightevalTaskConfig( name="commonsenseqa", suite=["helm", "commonsense_scenario"], - prompt_function="commonsense_qa", + prompt_function=prompt.commonsense_qa, hf_repo="commonsense_qa", hf_subset="default", hf_avail_splits=["train", "test", "validation"], @@ -6703,7 +6704,7 @@ conceptual_combinations_bigbench_lite = LightevalTaskConfig( name="conceptual_combinations", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_linefeed_before_whitespace_after_query", + prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query, hf_repo="bigbench", hf_subset="conceptual_combinations", hf_avail_splits=["default", "train", "validation"], @@ -6721,7 +6722,7 @@ conlang_translation_bigbench_lite = LightevalTaskConfig( name="conlang_translation", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_whitespace_after_query", + prompt_function=prompt.bigbench_whitespace_after_query, hf_repo="bigbench", hf_subset="conlang_translation", hf_avail_splits=["default", "train", "validation"], @@ -6738,7 +6739,7 @@ contextual_parametric_knowledge_conflicts_bigbench = LightevalTaskConfig( name="contextual_parametric_knowledge_conflicts", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="contextual_parametric_knowledge_conflicts", hf_avail_splits=["default", "train", "validation"], @@ -6756,7 +6757,7 @@ copyright_n_books_1000_extractions_per_book_1_prefix_length_125_helm = LightevalTaskConfig( name="copyright:n_books_1000-extractions_per_book_1-prefix_length_125", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="n_books_1000-extractions_per_book_1-prefix_length_125", hf_avail_splits=["train"], @@ -6774,7 +6775,7 @@ copyright_n_books_1000_extractions_per_book_1_prefix_length_25_helm = LightevalTaskConfig( name="copyright:n_books_1000-extractions_per_book_1-prefix_length_25", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="n_books_1000-extractions_per_book_1-prefix_length_25", hf_avail_splits=["train"], @@ -6792,7 +6793,7 @@ copyright_n_books_1000_extractions_per_book_1_prefix_length_5_helm = LightevalTaskConfig( name="copyright:n_books_1000-extractions_per_book_1-prefix_length_5", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="n_books_1000-extractions_per_book_1-prefix_length_5", hf_avail_splits=["train"], @@ -6810,7 +6811,7 @@ copyright_n_books_1000_extractions_per_book_3_prefix_length_125_helm = LightevalTaskConfig( name="copyright:n_books_1000-extractions_per_book_3-prefix_length_125", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="n_books_1000-extractions_per_book_3-prefix_length_125", hf_avail_splits=["train"], @@ -6828,7 +6829,7 @@ copyright_n_books_1000_extractions_per_book_3_prefix_length_25_helm = LightevalTaskConfig( name="copyright:n_books_1000-extractions_per_book_3-prefix_length_25", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="n_books_1000-extractions_per_book_3-prefix_length_25", hf_avail_splits=["train"], @@ -6846,7 +6847,7 @@ copyright_n_books_1000_extractions_per_book_3_prefix_length_5_helm = LightevalTaskConfig( name="copyright:n_books_1000-extractions_per_book_3-prefix_length_5", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="n_books_1000-extractions_per_book_3-prefix_length_5", hf_avail_splits=["train"], @@ -6864,7 +6865,7 @@ copyright_oh_the_places_helm = LightevalTaskConfig( name="copyright:oh_the_places", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="oh_the_places", hf_avail_splits=["train"], @@ -6882,7 +6883,7 @@ copyright_pilot_helm = LightevalTaskConfig( name="copyright:pilot", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="pilot", hf_avail_splits=["train"], @@ -6900,7 +6901,7 @@ copyright_popular_books_prefix_length_10_helm = LightevalTaskConfig( name="copyright:popular_books-prefix_length_10", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="popular_books-prefix_length_10", hf_avail_splits=["train"], @@ -6918,7 +6919,7 @@ copyright_popular_books_prefix_length_125_helm = LightevalTaskConfig( name="copyright:popular_books-prefix_length_125", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="popular_books-prefix_length_125", hf_avail_splits=["train"], @@ -6936,7 +6937,7 @@ copyright_popular_books_prefix_length_25_helm = LightevalTaskConfig( name="copyright:popular_books-prefix_length_25", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="popular_books-prefix_length_25", hf_avail_splits=["train"], @@ -6954,7 +6955,7 @@ copyright_popular_books_prefix_length_250_helm = LightevalTaskConfig( name="copyright:popular_books-prefix_length_250", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="popular_books-prefix_length_250", hf_avail_splits=["train"], @@ -6972,7 +6973,7 @@ copyright_popular_books_prefix_length_5_helm = LightevalTaskConfig( name="copyright:popular_books-prefix_length_5", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="popular_books-prefix_length_5", hf_avail_splits=["train"], @@ -6990,7 +6991,7 @@ copyright_popular_books_prefix_length_50_helm = LightevalTaskConfig( name="copyright:popular_books-prefix_length_50", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="popular_books-prefix_length_50", hf_avail_splits=["train"], @@ -7008,7 +7009,7 @@ copyright_prompt_num_line_1_min_lines_20_helm = LightevalTaskConfig( name="copyright:prompt_num_line_1-min_lines_20", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="prompt_num_line_1-min_lines_20", hf_avail_splits=["train"], @@ -7026,7 +7027,7 @@ copyright_prompt_num_line_10_min_lines_20_helm = LightevalTaskConfig( name="copyright:prompt_num_line_10-min_lines_20", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="prompt_num_line_10-min_lines_20", hf_avail_splits=["train"], @@ -7044,7 +7045,7 @@ copyright_prompt_num_line_5_min_lines_20_helm = LightevalTaskConfig( name="copyright:prompt_num_line_5-min_lines_20", suite=["helm", "copyright_scenario"], - prompt_function="copyright", + prompt_function=prompt.copyright, hf_repo="lighteval/copyright_helm", hf_subset="prompt_num_line_5-min_lines_20", hf_avail_splits=["train"], @@ -7062,7 +7063,7 @@ coqa_lighteval = LightevalTaskConfig( name="coqa", suite=["lighteval"], - prompt_function="coqa", + prompt_function=prompt.coqa, hf_repo="coqa", hf_subset="default", hf_avail_splits=["train", "validation"], @@ -7080,7 +7081,7 @@ coqa_bb_lighteval = LightevalTaskConfig( name="coqa_bb", suite=["lighteval", "bigbench_programmatic", "bigbench"], - prompt_function="coqa", + prompt_function=prompt.coqa, hf_repo="coqa", hf_subset="default", hf_avail_splits=["train", "validation"], @@ -7098,7 +7099,7 @@ covid_dialogue_helm = LightevalTaskConfig( name="covid_dialogue", suite=["helm"], - prompt_function="covid_dialogue", + prompt_function=prompt.covid_dialogue, hf_repo="lighteval/covid_dialogue", hf_subset="default", hf_avail_splits=["train", "test", "validation"], @@ -7116,7 +7117,7 @@ crash_blossom_bigbench = LightevalTaskConfig( name="crash_blossom", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="crash_blossom", hf_avail_splits=["default", "train", "validation"], @@ -7134,7 +7135,7 @@ crass_ai_bigbench = LightevalTaskConfig( name="crass_ai", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="crass_ai", hf_avail_splits=["default", "train", "validation"], @@ -7152,7 +7153,7 @@ cryobiology_spanish_bigbench = LightevalTaskConfig( name="cryobiology_spanish", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="cryobiology_spanish", hf_avail_splits=["default", "train", "validation"], @@ -7170,7 +7171,7 @@ cryptonite_bigbench = LightevalTaskConfig( name="cryptonite", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="cryptonite", hf_avail_splits=["default", "train", "validation"], @@ -7188,7 +7189,7 @@ cs_algorithms_bigbench = LightevalTaskConfig( name="cs_algorithms", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="cs_algorithms", hf_avail_splits=["default", "train", "validation"], @@ -7206,7 +7207,7 @@ dark_humor_detection_bigbench = LightevalTaskConfig( name="dark_humor_detection", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="dark_humor_detection", hf_avail_splits=["default", "train", "validation"], @@ -7224,7 +7225,7 @@ date_understanding_bigbench = LightevalTaskConfig( name="date_understanding", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="date_understanding", hf_avail_splits=["default", "train", "validation"], @@ -7242,7 +7243,7 @@ disambiguation_qa_bigbench = LightevalTaskConfig( name="disambiguation_qa", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="disambiguation_qa", hf_avail_splits=["default", "train", "validation"], @@ -7260,7 +7261,7 @@ discourse_marker_prediction_bigbench = LightevalTaskConfig( name="discourse_marker_prediction", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="discourse_marker_prediction", hf_avail_splits=["default", "train", "validation"], @@ -7278,7 +7279,7 @@ disfl_qa_bigbench = LightevalTaskConfig( name="disfl_qa", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="disfl_qa", hf_avail_splits=["default", "train", "validation"], @@ -7296,7 +7297,7 @@ drop_lighteval = LightevalTaskConfig( name="drop", suite=["lighteval"], - prompt_function="drop", + prompt_function=prompt.drop, hf_repo="lighteval/drop_harness", hf_subset="default", hf_avail_splits=["train", "validation"], @@ -7314,7 +7315,7 @@ dyck_language_2_helm = LightevalTaskConfig( name="dyck_language:2", suite=["helm"], - prompt_function="dyck_language", + prompt_function=prompt.dyck_language, hf_repo="lighteval/DyckLanguage", hf_subset="2", hf_avail_splits=["train", "test"], @@ -7332,7 +7333,7 @@ dyck_language_3_helm = LightevalTaskConfig( name="dyck_language:3", suite=["helm"], - prompt_function="dyck_language", + prompt_function=prompt.dyck_language, hf_repo="lighteval/DyckLanguage", hf_subset="3", hf_avail_splits=["train", "test"], @@ -7350,7 +7351,7 @@ dyck_language_4_helm = LightevalTaskConfig( name="dyck_language:4", suite=["helm"], - prompt_function="dyck_language", + prompt_function=prompt.dyck_language, hf_repo="lighteval/DyckLanguage", hf_subset="4", hf_avail_splits=["train", "test"], @@ -7368,7 +7369,7 @@ dyck_languages_bigbench = LightevalTaskConfig( name="dyck_languages", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="dyck_languages", hf_avail_splits=["default", "train", "validation"], @@ -7386,7 +7387,7 @@ elementary_math_qa_bigbench = LightevalTaskConfig( name="elementary_math_qa", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="elementary_math_qa", hf_avail_splits=["default", "train", "validation"], @@ -7404,7 +7405,7 @@ emoji_movie_bigbench_lite = LightevalTaskConfig( name="emoji_movie", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_linefeed_before_whitespace_after_query", + prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query, hf_repo="bigbench", hf_subset="emoji_movie", hf_avail_splits=["default", "train", "validation"], @@ -7422,7 +7423,7 @@ emojis_emotion_prediction_bigbench = LightevalTaskConfig( name="emojis_emotion_prediction", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="emojis_emotion_prediction", hf_avail_splits=["default", "train", "validation"], @@ -7440,7 +7441,7 @@ empirical_judgments_bigbench = LightevalTaskConfig( name="empirical_judgments", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="empirical_judgments", hf_avail_splits=["default", "train", "validation"], @@ -7458,7 +7459,7 @@ english_proverbs_bigbench = LightevalTaskConfig( name="english_proverbs", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="english_proverbs", hf_avail_splits=["default", "train", "validation"], @@ -7476,7 +7477,7 @@ english_russian_proverbs_bigbench = LightevalTaskConfig( name="english_russian_proverbs", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="english_russian_proverbs", hf_avail_splits=["default", "train", "validation"], @@ -7494,7 +7495,7 @@ entailed_polarity_bigbench = LightevalTaskConfig( name="entailed_polarity", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="entailed_polarity", hf_avail_splits=["default", "train", "validation"], @@ -7512,7 +7513,7 @@ entailed_polarity_hindi_bigbench = LightevalTaskConfig( name="entailed_polarity_hindi", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="entailed_polarity_hindi", hf_avail_splits=["default", "train", "validation"], @@ -7530,7 +7531,7 @@ entity_data_imputation_Buy_helm = LightevalTaskConfig( name="entity_data_imputation:Buy", suite=["helm"], - prompt_function="entity_data_imputation", + prompt_function=prompt.entity_data_imputation, hf_repo="lighteval/Buy", hf_subset="default", hf_avail_splits=["train", "test", "valid"], @@ -7548,7 +7549,7 @@ entity_data_imputation_Restaurant_helm = LightevalTaskConfig( name="entity_data_imputation:Restaurant", suite=["helm"], - prompt_function="entity_data_imputation", + prompt_function=prompt.entity_data_imputation, hf_repo="lighteval/Restaurant", hf_subset="default", hf_avail_splits=["train"], @@ -7566,7 +7567,7 @@ entity_matching_Abt_Buy_helm = LightevalTaskConfig( name="entity_matching:Abt_Buy", suite=["helm"], - prompt_function="entity_matching", + prompt_function=prompt.entity_matching, hf_repo="lighteval/EntityMatching", hf_subset="Abt_Buy", hf_avail_splits=["train", "test", "validation"], @@ -7584,7 +7585,7 @@ entity_matching_Amazon_Google_helm = LightevalTaskConfig( name="entity_matching:Amazon_Google", suite=["helm"], - prompt_function="entity_matching", + prompt_function=prompt.entity_matching, hf_repo="lighteval/EntityMatching", hf_subset="Amazon_Google", hf_avail_splits=["train", "test", "validation"], @@ -7602,7 +7603,7 @@ entity_matching_Beer_helm = LightevalTaskConfig( name="entity_matching:Beer", suite=["helm"], - prompt_function="entity_matching", + prompt_function=prompt.entity_matching, hf_repo="lighteval/EntityMatching", hf_subset="Beer", hf_avail_splits=["train", "test", "validation"], @@ -7620,7 +7621,7 @@ entity_matching_Company_helm = LightevalTaskConfig( name="entity_matching:Company", suite=["helm"], - prompt_function="entity_matching", + prompt_function=prompt.entity_matching, hf_repo="lighteval/EntityMatching", hf_subset="Company", hf_avail_splits=["train", "test", "validation"], @@ -7638,7 +7639,7 @@ entity_matching_DBLP_ACM_helm = LightevalTaskConfig( name="entity_matching:DBLP_ACM", suite=["helm"], - prompt_function="entity_matching", + prompt_function=prompt.entity_matching, hf_repo="lighteval/EntityMatching", hf_subset="DBLP_ACM", hf_avail_splits=["train", "test", "validation"], @@ -7656,7 +7657,7 @@ entity_matching_DBLP_GoogleScholar_helm = LightevalTaskConfig( name="entity_matching:DBLP_GoogleScholar", suite=["helm"], - prompt_function="entity_matching", + prompt_function=prompt.entity_matching, hf_repo="lighteval/EntityMatching", hf_subset="DBLP_GoogleScholar", hf_avail_splits=["train", "test", "validation"], @@ -7674,7 +7675,7 @@ entity_matching_Dirty_DBLP_ACM_helm = LightevalTaskConfig( name="entity_matching:Dirty_DBLP_ACM", suite=["helm"], - prompt_function="entity_matching", + prompt_function=prompt.entity_matching, hf_repo="lighteval/EntityMatching", hf_subset="Dirty_DBLP_ACM", hf_avail_splits=["train", "test", "validation"], @@ -7692,7 +7693,7 @@ entity_matching_Dirty_DBLP_GoogleScholar_helm = LightevalTaskConfig( name="entity_matching:Dirty_DBLP_GoogleScholar", suite=["helm"], - prompt_function="entity_matching", + prompt_function=prompt.entity_matching, hf_repo="lighteval/EntityMatching", hf_subset="Dirty_DBLP_GoogleScholar", hf_avail_splits=["train", "test", "validation"], @@ -7710,7 +7711,7 @@ entity_matching_Dirty_Walmart_Amazon_helm = LightevalTaskConfig( name="entity_matching:Dirty_Walmart_Amazon", suite=["helm"], - prompt_function="entity_matching", + prompt_function=prompt.entity_matching, hf_repo="lighteval/EntityMatching", hf_subset="Dirty_Walmart_Amazon", hf_avail_splits=["train", "test", "validation"], @@ -7728,7 +7729,7 @@ entity_matching_Dirty_iTunes_Amazon_helm = LightevalTaskConfig( name="entity_matching:Dirty_iTunes_Amazon", suite=["helm"], - prompt_function="entity_matching", + prompt_function=prompt.entity_matching, hf_repo="lighteval/EntityMatching", hf_subset="Dirty_iTunes_Amazon", hf_avail_splits=["train", "test", "validation"], @@ -7746,7 +7747,7 @@ entity_matching_Fodors_Zagats_helm = LightevalTaskConfig( name="entity_matching=Fodors_Zagats", suite=["helm"], - prompt_function="entity_matching", + prompt_function=prompt.entity_matching, hf_repo="lighteval/EntityMatching", hf_subset="Fodors_Zagats", hf_avail_splits=["train", "test", "validation"], @@ -7764,7 +7765,7 @@ entity_matching_Walmart_Amazon_helm = LightevalTaskConfig( name="entity_matching:Walmart_Amazon", suite=["helm"], - prompt_function="entity_matching", + prompt_function=prompt.entity_matching, hf_repo="lighteval/EntityMatching", hf_subset="Walmart_Amazon", hf_avail_splits=["train", "test", "validation"], @@ -7782,7 +7783,7 @@ entity_matching_iTunes_Amazon_helm = LightevalTaskConfig( name="entity_matching:iTunes_Amazon", suite=["helm"], - prompt_function="entity_matching", + prompt_function=prompt.entity_matching, hf_repo="lighteval/EntityMatching", hf_subset="iTunes_Amazon", hf_avail_splits=["train", "test", "validation"], @@ -7800,7 +7801,7 @@ epistemic_reasoning_bigbench = LightevalTaskConfig( name="epistemic_reasoning", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="epistemic_reasoning", hf_avail_splits=["default", "train", "validation"], @@ -7818,7 +7819,7 @@ ethics_commonsense_lighteval = LightevalTaskConfig( name="ethics:commonsense", suite=["lighteval", "ethics"], - prompt_function="ethics_commonsense", + prompt_function=prompt.ethics_commonsense, hf_repo="lighteval/hendrycks_ethics", hf_subset="commonsense", hf_avail_splits=["train", "validation", "test"], @@ -7836,7 +7837,7 @@ ethics_deontology_lighteval = LightevalTaskConfig( name="ethics:deontology", suite=["lighteval", "ethics"], - prompt_function="ethics_deontology", + prompt_function=prompt.ethics_deontology, hf_repo="lighteval/hendrycks_ethics", hf_subset="deontology", hf_avail_splits=["train", "validation", "test"], @@ -7854,7 +7855,7 @@ ethics_justice_lighteval = LightevalTaskConfig( name="ethics:justice", suite=["lighteval", "ethics"], - prompt_function="ethics_justice", + prompt_function=prompt.ethics_justice, hf_repo="lighteval/hendrycks_ethics", hf_subset="justice", hf_avail_splits=["train", "validation", "test"], @@ -7872,7 +7873,7 @@ ethics_utilitarianism_lighteval = LightevalTaskConfig( name="ethics:utilitarianism", suite=["lighteval", "ethics"], - prompt_function="ethics_utilitarianism", + prompt_function=prompt.ethics_utilitarianism, hf_repo="lighteval/hendrycks_ethics", hf_subset="utilitarianism", hf_avail_splits=["train", "validation", "test"], @@ -7890,7 +7891,7 @@ ethics_virtue_lighteval = LightevalTaskConfig( name="ethics:virtue", suite=["lighteval", "ethics"], - prompt_function="ethics_virtue", + prompt_function=prompt.ethics_virtue, hf_repo="lighteval/hendrycks_ethics", hf_subset="virtue", hf_avail_splits=["train", "validation", "test"], @@ -7908,7 +7909,7 @@ evaluating_information_essentiality_bigbench = LightevalTaskConfig( name="evaluating_information_essentiality", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="evaluating_information_essentiality", hf_avail_splits=["default", "train", "validation"], @@ -7926,7 +7927,7 @@ fact_checker_bigbench = LightevalTaskConfig( name="fact_checker", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="fact_checker", hf_avail_splits=["default", "train", "validation"], @@ -7944,7 +7945,7 @@ fantasy_reasoning_bigbench = LightevalTaskConfig( name="fantasy_reasoning", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="fantasy_reasoning", hf_avail_splits=["default", "train", "validation"], @@ -7962,7 +7963,7 @@ few_shot_nlg_bigbench = LightevalTaskConfig( name="few_shot_nlg", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="few_shot_nlg", hf_avail_splits=["default", "train", "validation"], @@ -7980,7 +7981,7 @@ figure_of_speech_detection_bigbench = LightevalTaskConfig( name="figure_of_speech_detection", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="figure_of_speech_detection", hf_avail_splits=["default", "train", "validation"], @@ -7998,7 +7999,7 @@ formal_fallacies_syllogisms_negation_bigbench_lite = LightevalTaskConfig( name="formal_fallacies_syllogisms_negation", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_linefeed_before_whitespace_after_query", + prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query, hf_repo="bigbench", hf_subset="formal_fallacies_syllogisms_negation", hf_avail_splits=["default", "train", "validation"], @@ -8016,7 +8017,7 @@ gem_bigbench = LightevalTaskConfig( name="gem", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="gem", hf_avail_splits=["default", "train", "validation"], @@ -8034,7 +8035,7 @@ gender_inclusive_sentences_german_bigbench = LightevalTaskConfig( name="gender_inclusive_sentences_german", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="gender_inclusive_sentences_german", hf_avail_splits=["default", "train", "validation"], @@ -8052,7 +8053,7 @@ general_knowledge_bigbench = LightevalTaskConfig( name="general_knowledge", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="general_knowledge", hf_avail_splits=["default", "train", "validation"], @@ -8070,7 +8071,7 @@ geometric_shapes_bigbench = LightevalTaskConfig( name="geometric_shapes", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="geometric_shapes", hf_avail_splits=["default", "train", "validation"], @@ -8088,7 +8089,7 @@ glue_cola_lighteval = LightevalTaskConfig( name="glue:cola", suite=["lighteval", "glue"], - prompt_function="cola", + prompt_function=prompt.cola, hf_repo="glue", hf_subset="cola", hf_avail_splits=["test", "train", "validation"], @@ -8106,7 +8107,7 @@ glue_mnli_lighteval = LightevalTaskConfig( name="glue:mnli", suite=["lighteval", "glue"], - prompt_function="mnli", + prompt_function=prompt.mnli, hf_repo="glue", hf_subset="mnli_matched", hf_avail_splits=["train", "validation"], @@ -8124,7 +8125,7 @@ glue_mnli_mismatched_lighteval = LightevalTaskConfig( name="glue:mnli_mismatched", suite=["lighteval", "glue"], - prompt_function="mnli", + prompt_function=prompt.mnli, hf_repo="glue", hf_subset="mnli_mismatched", hf_avail_splits=["train", "validation"], @@ -8142,7 +8143,7 @@ glue_mrpc_lighteval = LightevalTaskConfig( name="glue:mrpc", suite=["lighteval", "glue"], - prompt_function="mrpc", + prompt_function=prompt.mrpc, hf_repo="glue", hf_subset="mrpc", hf_avail_splits=["test", "train", "validation"], @@ -8160,7 +8161,7 @@ glue_qnli_lighteval = LightevalTaskConfig( name="glue:qnli", suite=["lighteval", "glue"], - prompt_function="qnli", + prompt_function=prompt.qnli, hf_repo="glue", hf_subset="qnli", hf_avail_splits=["test", "train", "validation"], @@ -8178,7 +8179,7 @@ glue_qqp_lighteval = LightevalTaskConfig( name="glue:qqp", suite=["lighteval", "glue"], - prompt_function="qqp", + prompt_function=prompt.qqp, hf_repo="glue", hf_subset="qqp", hf_avail_splits=["train", "validation", "test"], @@ -8196,7 +8197,7 @@ glue_rte_lighteval = LightevalTaskConfig( name="glue:rte", suite=["lighteval", "glue"], - prompt_function="rte", + prompt_function=prompt.rte, hf_repo="glue", hf_subset="rte", hf_avail_splits=["test", "train", "validation"], @@ -8214,7 +8215,7 @@ glue_sst2_lighteval = LightevalTaskConfig( name="glue:sst2", suite=["lighteval", "glue"], - prompt_function="sst", + prompt_function=prompt.sst, hf_repo="glue", hf_subset="sst2", hf_avail_splits=["test", "train", "validation"], @@ -8232,7 +8233,7 @@ glue_stsb_lighteval = LightevalTaskConfig( name="glue:stsb", suite=["lighteval", "glue"], - prompt_function="stsb", + prompt_function=prompt.stsb, hf_repo="glue", hf_subset="stsb", hf_avail_splits=["test", "train", "validation"], @@ -8250,7 +8251,7 @@ glue_wnli_lighteval = LightevalTaskConfig( name="glue:wnli", suite=["lighteval", "glue"], - prompt_function="wnli", + prompt_function=prompt.wnli, hf_repo="glue", hf_subset="wnli", hf_avail_splits=["test", "train", "validation"], @@ -8268,7 +8269,7 @@ goal_step_wikihow_bigbench = LightevalTaskConfig( name="goal_step_wikihow", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="goal_step_wikihow", hf_avail_splits=["default", "train", "validation"], @@ -8286,7 +8287,7 @@ gpqa_lighteval = LightevalTaskConfig( name="gpqa", suite=["lighteval"], - prompt_function="gpqa", + prompt_function=prompt.gpqa, hf_repo="Idavidrein/gpqa", hf_subset="gpqa_main", hf_avail_splits=["train"], @@ -8304,7 +8305,7 @@ gre_reading_comprehension_bigbench = LightevalTaskConfig( name="gre_reading_comprehension", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="gre_reading_comprehension", hf_avail_splits=["default", "train", "validation"], @@ -8322,7 +8323,7 @@ gsm8k_leaderboard = LightevalTaskConfig( name="gsm8k", suite=["leaderboard"], - prompt_function="gsm8k", + prompt_function=prompt.gsm8k, hf_repo="gsm8k", hf_subset="main", hf_avail_splits=["train", "test"], @@ -8340,7 +8341,7 @@ gsm8k_lighteval = LightevalTaskConfig( name="gsm8k", suite=["lighteval"], - prompt_function="gsm8k", + prompt_function=prompt.gsm8k, hf_repo="gsm8k", hf_subset="main", hf_avail_splits=["train", "test"], @@ -8358,7 +8359,7 @@ headqa_en_lighteval = LightevalTaskConfig( name="headqa:en", suite=["lighteval", "headqa"], - prompt_function="headqa", + prompt_function=prompt.headqa, hf_repo="lighteval/headqa_harness", hf_subset="en", hf_avail_splits=["train", "test", "validation"], @@ -8376,7 +8377,7 @@ headqa_es_lighteval = LightevalTaskConfig( name="headqa:es", suite=["lighteval", "headqa"], - prompt_function="headqa", + prompt_function=prompt.headqa, hf_repo="lighteval/headqa_harness", hf_subset="es", hf_avail_splits=["train", "test", "validation"], @@ -8394,7 +8395,7 @@ hellaswag_leaderboard = LightevalTaskConfig( name="hellaswag", suite=["leaderboard"], - prompt_function="hellaswag_harness", + prompt_function=prompt.hellaswag_harness, hf_repo="hellaswag", hf_subset="default", hf_avail_splits=["train", "test", "validation"], @@ -8412,7 +8413,7 @@ hellaswag_helm = LightevalTaskConfig( name="hellaswag", suite=["helm", "helm_general"], - prompt_function="hellaswag_helm", + prompt_function=prompt.hellaswag_helm, hf_repo="hellaswag", hf_subset="default", hf_avail_splits=["train", "test", "validation"], @@ -8430,7 +8431,7 @@ hhh_alignment_bigbench = LightevalTaskConfig( name="hhh_alignment", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="hhh_alignment", hf_avail_splits=["default", "train", "validation"], @@ -8448,7 +8449,7 @@ hindi_question_answering_bigbench = LightevalTaskConfig( name="hindi_question_answering", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="hindi_question_answering", hf_avail_splits=["default", "train", "validation"], @@ -8466,7 +8467,7 @@ hindu_knowledge_bigbench_lite = LightevalTaskConfig( name="hindu_knowledge", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_linefeed_before_whitespace_after_query", + prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query, hf_repo="bigbench", hf_subset="hindu_knowledge", hf_avail_splits=["default", "train", "validation"], @@ -8484,7 +8485,7 @@ hinglish_toxicity_bigbench = LightevalTaskConfig( name="hinglish_toxicity", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="hinglish_toxicity", hf_avail_splits=["default", "train", "validation"], @@ -8502,7 +8503,7 @@ human_organs_senses_bigbench = LightevalTaskConfig( name="human_organs_senses", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="human_organs_senses", hf_avail_splits=["default", "train", "validation"], @@ -8520,7 +8521,7 @@ humaneval_helm = LightevalTaskConfig( name="humaneval", suite=["helm", "code_scenario"], - prompt_function="humaneval", + prompt_function=prompt.humaneval, hf_repo="openai_humaneval", hf_subset="openai_humaneval", hf_avail_splits=["test"], @@ -8538,7 +8539,7 @@ hyperbaton_bigbench = LightevalTaskConfig( name="hyperbaton", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="hyperbaton", hf_avail_splits=["default", "train", "validation"], @@ -8556,7 +8557,7 @@ identify_math_theorems_bigbench = LightevalTaskConfig( name="identify_math_theorems", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="identify_math_theorems", hf_avail_splits=["default", "train", "validation"], @@ -8574,7 +8575,7 @@ identify_odd_metaphor_bigbench = LightevalTaskConfig( name="identify_odd_metaphor", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="identify_odd_metaphor", hf_avail_splits=["default", "train", "validation"], @@ -8592,7 +8593,7 @@ imdb_helm = LightevalTaskConfig( name="imdb", suite=["helm", "helm_general"], - prompt_function="imdb", + prompt_function=prompt.imdb, hf_repo="lighteval/IMDB_helm", hf_subset="default", hf_avail_splits=["train", "test"], @@ -8617,7 +8618,7 @@ imdb_contrastset_helm = LightevalTaskConfig( name="imdb:contrastset", suite=["helm"], - prompt_function="imdb_contrastset", + prompt_function=prompt.imdb_contrastset, hf_repo="lighteval/IMDB_helm", hf_subset="default", hf_avail_splits=["test"], @@ -8642,7 +8643,7 @@ implicatures_bigbench = LightevalTaskConfig( name="implicatures", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="implicatures", hf_avail_splits=["default", "train", "validation"], @@ -8660,7 +8661,7 @@ implicit_relations_bigbench = LightevalTaskConfig( name="implicit_relations", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="implicit_relations", hf_avail_splits=["default", "train", "validation"], @@ -8678,7 +8679,7 @@ intent_recognition_bigbench = LightevalTaskConfig( name="intent_recognition", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="intent_recognition", hf_avail_splits=["default", "train", "validation"], @@ -8696,7 +8697,7 @@ interactive_qa_mmlu_abstract_algebra_helm = LightevalTaskConfig( name="interactive_qa_mmlu:abstract_algebra", suite=["helm", "interactive_qa_mmlu_scenario"], - prompt_function="mmlu_qa_abstract_algebra", + prompt_function=prompt.mmlu_qa_abstract_algebra, hf_repo="lighteval/mmlu", hf_subset="abstract_algebra", hf_avail_splits=["dev", "test"], @@ -8714,7 +8715,7 @@ interactive_qa_mmlu_college_chemistry_helm = LightevalTaskConfig( name="interactive_qa_mmlu:college_chemistry", suite=["helm", "interactive_qa_mmlu_scenario"], - prompt_function="mmlu_qa_college_chemistry", + prompt_function=prompt.mmlu_qa_college_chemistry, hf_repo="lighteval/mmlu", hf_subset="college_chemistry", hf_avail_splits=["dev", "test"], @@ -8732,7 +8733,7 @@ interactive_qa_mmlu_global_facts_helm = LightevalTaskConfig( name="interactive_qa_mmlu:global_facts", suite=["helm", "interactive_qa_mmlu_scenario"], - prompt_function="mmlu_qa_global_facts", + prompt_function=prompt.mmlu_qa_global_facts, hf_repo="lighteval/mmlu", hf_subset="global_facts", hf_avail_splits=["dev", "test"], @@ -8750,7 +8751,7 @@ interactive_qa_mmlu_miscellaneous_helm = LightevalTaskConfig( name="interactive_qa_mmlu:miscellaneous", suite=["helm", "interactive_qa_mmlu_scenario"], - prompt_function="mmlu_qa_miscellaneous", + prompt_function=prompt.mmlu_qa_miscellaneous, hf_repo="lighteval/mmlu", hf_subset="miscellaneous", hf_avail_splits=["dev", "test"], @@ -8768,7 +8769,7 @@ interactive_qa_mmlu_nutrition_helm = LightevalTaskConfig( name="interactive_qa_mmlu:nutrition", suite=["helm", "interactive_qa_mmlu_scenario"], - prompt_function="mmlu_qa_nutrition", + prompt_function=prompt.mmlu_qa_nutrition, hf_repo="lighteval/mmlu", hf_subset="nutrition", hf_avail_splits=["dev", "test"], @@ -8786,7 +8787,7 @@ interactive_qa_mmlu_us_foreign_policy_helm = LightevalTaskConfig( name="interactive_qa_mmlu:us_foreign_policy", suite=["helm", "interactive_qa_mmlu_scenario"], - prompt_function="mmlu_qa_us_foreign_policy", + prompt_function=prompt.mmlu_qa_us_foreign_policy, hf_repo="lighteval/mmlu", hf_subset="us_foreign_policy", hf_avail_splits=["dev", "test"], @@ -8804,7 +8805,7 @@ international_phonetic_alphabet_nli_bigbench = LightevalTaskConfig( name="international_phonetic_alphabet_nli", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="international_phonetic_alphabet_nli", hf_avail_splits=["default", "train", "validation"], @@ -8822,7 +8823,7 @@ international_phonetic_alphabet_transliterate_bigbench = LightevalTaskConfig( name="international_phonetic_alphabet_transliterate", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="international_phonetic_alphabet_transliterate", hf_avail_splits=["default", "train", "validation"], @@ -8840,7 +8841,7 @@ intersect_geometry_bigbench = LightevalTaskConfig( name="intersect_geometry", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="intersect_geometry", hf_avail_splits=["default", "train", "validation"], @@ -8858,7 +8859,7 @@ irony_identification_bigbench = LightevalTaskConfig( name="irony_identification", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="irony_identification", hf_avail_splits=["default", "train", "validation"], @@ -8876,7 +8877,7 @@ iwslt17_ar_en_lighteval = LightevalTaskConfig( name="iwslt17:ar-en", suite=["lighteval", "harness_selection"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="iwslt17_ar-en", hf_avail_splits=["test"], @@ -8894,7 +8895,7 @@ iwslt17_de_en_lighteval = LightevalTaskConfig( name="iwslt17:de-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="iwslt17_de-en", hf_avail_splits=["test"], @@ -8912,7 +8913,7 @@ iwslt17_en_ar_lighteval = LightevalTaskConfig( name="iwslt17:en-ar", suite=["lighteval", "harness_selection"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="iwslt17_ar-en", hf_avail_splits=["test"], @@ -8930,7 +8931,7 @@ iwslt17_en_de_lighteval = LightevalTaskConfig( name="iwslt17:en-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="iwslt17_en-de", hf_avail_splits=["test"], @@ -8948,7 +8949,7 @@ iwslt17_en_fr_lighteval = LightevalTaskConfig( name="iwslt17:en-fr", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="iwslt17_en-fr", hf_avail_splits=["test"], @@ -8966,7 +8967,7 @@ iwslt17_en_ja_lighteval = LightevalTaskConfig( name="iwslt17:en-ja", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="iwslt17_en-ja", hf_avail_splits=["test"], @@ -8984,7 +8985,7 @@ iwslt17_en_ko_lighteval = LightevalTaskConfig( name="iwslt17:en-ko", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="iwslt17_en-ko", hf_avail_splits=["test"], @@ -9002,7 +9003,7 @@ iwslt17_en_zh_lighteval = LightevalTaskConfig( name="iwslt17:en-zh", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="iwslt17_en-zh", hf_avail_splits=["test"], @@ -9020,7 +9021,7 @@ iwslt17_fr_en_lighteval = LightevalTaskConfig( name="iwslt17:fr-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="iwslt17_fr-en", hf_avail_splits=["test"], @@ -9038,7 +9039,7 @@ iwslt17_ja_en_lighteval = LightevalTaskConfig( name="iwslt17:ja-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="iwslt17_ja-en", hf_avail_splits=["test"], @@ -9056,7 +9057,7 @@ iwslt17_ko_en_lighteval = LightevalTaskConfig( name="iwslt17:ko-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="iwslt17_ko-en", hf_avail_splits=["test"], @@ -9074,7 +9075,7 @@ iwslt17_zh_en_lighteval = LightevalTaskConfig( name="iwslt17:zh-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="iwslt17_zh-en", hf_avail_splits=["test"], @@ -9092,7 +9093,7 @@ kanji_ascii_bigbench = LightevalTaskConfig( name="kanji_ascii", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="kanji_ascii", hf_avail_splits=["default", "train", "validation"], @@ -9110,7 +9111,7 @@ kannada_bigbench = LightevalTaskConfig( name="kannada", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="kannada", hf_avail_splits=["default", "train", "validation"], @@ -9128,7 +9129,7 @@ key_value_maps_bigbench = LightevalTaskConfig( name="key_value_maps", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="key_value_maps", hf_avail_splits=["default", "train", "validation"], @@ -9146,7 +9147,7 @@ known_unknowns_bigbench_lite = LightevalTaskConfig( name="known_unknowns", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_linefeed_before_whitespace_after_query", + prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query, hf_repo="bigbench", hf_subset="known_unknowns", hf_avail_splits=["default", "train", "validation"], @@ -9164,7 +9165,7 @@ lambada_standard_lighteval = LightevalTaskConfig( name="lambada:standard", suite=["lighteval", "lambada"], - prompt_function="lambada", + prompt_function=prompt.lambada, hf_repo="lambada", hf_subset="plain_text", hf_avail_splits=["train", "test", "validation"], @@ -9182,7 +9183,7 @@ lambada_standard_cloze_lighteval = LightevalTaskConfig( name="lambada:standard_cloze", suite=["lighteval", "lambada"], - prompt_function="lambada_cloze", + prompt_function=prompt.lambada_cloze, hf_repo="lambada", hf_subset="plain_text", hf_avail_splits=["train", "test", "validation"], @@ -9200,7 +9201,7 @@ lambada_openai_lighteval = LightevalTaskConfig( name="lambada:openai", suite=["lighteval", "lambada"], - prompt_function="lambada", + prompt_function=prompt.lambada, hf_repo="EleutherAI/lambada_openai", hf_subset="default", hf_avail_splits=["test"], @@ -9218,7 +9219,7 @@ lambada_openai_de_lighteval = LightevalTaskConfig( name="lambada:openai:de", suite=["lighteval", "lambada"], - prompt_function="lambada", + prompt_function=prompt.lambada, hf_repo="EleutherAI/lambada_openai", hf_subset="de", hf_avail_splits=["test"], @@ -9236,7 +9237,7 @@ lambada_openai_en_lighteval = LightevalTaskConfig( name="lambada:openai:en", suite=["lighteval", "lambada"], - prompt_function="lambada", + prompt_function=prompt.lambada, hf_repo="EleutherAI/lambada_openai", hf_subset="en", hf_avail_splits=["test"], @@ -9254,7 +9255,7 @@ lambada_openai_es_lighteval = LightevalTaskConfig( name="lambada:openai:es", suite=["lighteval", "lambada"], - prompt_function="lambada", + prompt_function=prompt.lambada, hf_repo="EleutherAI/lambada_openai", hf_subset="es", hf_avail_splits=["test"], @@ -9272,7 +9273,7 @@ lambada_openai_fr_lighteval = LightevalTaskConfig( name="lambada:openai:fr", suite=["lighteval", "lambada"], - prompt_function="lambada", + prompt_function=prompt.lambada, hf_repo="EleutherAI/lambada_openai", hf_subset="fr", hf_avail_splits=["test"], @@ -9290,7 +9291,7 @@ lambada_openai_it_lighteval = LightevalTaskConfig( name="lambada:openai:it", suite=["lighteval", "lambada"], - prompt_function="lambada", + prompt_function=prompt.lambada, hf_repo="EleutherAI/lambada_openai", hf_subset="it", hf_avail_splits=["test"], @@ -9308,7 +9309,7 @@ lambada_openai_cloze_lighteval = LightevalTaskConfig( name="lambada:openai_cloze", suite=["lighteval", "lambada"], - prompt_function="lambada_cloze", + prompt_function=prompt.lambada_cloze, hf_repo="EleutherAI/lambada_openai", hf_subset="en", hf_avail_splits=["test"], @@ -9326,7 +9327,7 @@ language_games_bigbench = LightevalTaskConfig( name="language_games", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="language_games", hf_avail_splits=["default", "train", "validation"], @@ -9344,7 +9345,7 @@ language_identification_bigbench_lite = LightevalTaskConfig( name="language_identification", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="language_identification", hf_avail_splits=["default", "train", "validation"], @@ -9362,7 +9363,7 @@ legal_summarization_billsum_helm = LightevalTaskConfig( name="legal_summarization:billsum", suite=["helm"], - prompt_function="legal_summarization", + prompt_function=prompt.legal_summarization, hf_repo="lighteval/legal_summarization", hf_subset="BillSum", hf_avail_splits=["train", "test"], @@ -9380,7 +9381,7 @@ legal_summarization_eurlexsum_helm = LightevalTaskConfig( name="legal_summarization:eurlexsum", suite=["helm"], - prompt_function="legal_summarization", + prompt_function=prompt.legal_summarization, hf_repo="lighteval/legal_summarization", hf_subset="EurLexSum", hf_avail_splits=["train", "test", "validation"], @@ -9398,7 +9399,7 @@ legal_summarization_multilexsum_helm = LightevalTaskConfig( name="legal_summarization:multilexsum", suite=["helm"], - prompt_function="multilexsum", + prompt_function=prompt.multilexsum, hf_repo="lighteval/legal_summarization", hf_subset="MultiLexSum", hf_avail_splits=["train", "test", "validation"], @@ -9416,7 +9417,7 @@ legalsupport_helm = LightevalTaskConfig( name="legalsupport", suite=["helm"], - prompt_function="legal_support", + prompt_function=prompt.legal_support, hf_repo="lighteval/LegalSupport", hf_subset="default", hf_avail_splits=["train", "test", "validation"], @@ -9434,7 +9435,7 @@ lexglue_case_hold_helm = LightevalTaskConfig( name="lexglue:case_hold", suite=["helm", "lex_glue_scenario"], - prompt_function="lex_glue_case_hold", + prompt_function=prompt.lex_glue_case_hold, hf_repo="lighteval/lexglue", hf_subset="case_hold", hf_avail_splits=["train", "test", "validation"], @@ -9452,7 +9453,7 @@ lexglue_ecthr_a_helm = LightevalTaskConfig( name="lexglue:ecthr_a", suite=["helm", "lex_glue_scenario"], - prompt_function="lex_glue_ecthr_a", + prompt_function=prompt.lex_glue_ecthr_a, hf_repo="lighteval/lexglue", hf_subset="ecthr_a", hf_avail_splits=["train", "test", "validation"], @@ -9470,7 +9471,7 @@ lexglue_ecthr_b_helm = LightevalTaskConfig( name="lexglue:ecthr_b", suite=["helm", "lex_glue_scenario"], - prompt_function="lex_glue_ecthr_b", + prompt_function=prompt.lex_glue_ecthr_b, hf_repo="lighteval/lexglue", hf_subset="ecthr_b", hf_avail_splits=["train", "test", "validation"], @@ -9488,7 +9489,7 @@ lexglue_eurlex_helm = LightevalTaskConfig( name="lexglue:eurlex", suite=["helm", "lex_glue_scenario"], - prompt_function="lex_glue_eurlex", + prompt_function=prompt.lex_glue_eurlex, hf_repo="lighteval/lexglue", hf_subset="eurlex", hf_avail_splits=["train", "test", "validation"], @@ -9506,7 +9507,7 @@ lexglue_ledgar_helm = LightevalTaskConfig( name="lexglue:ledgar", suite=["helm", "lex_glue_scenario"], - prompt_function="lex_glue_ledgar", + prompt_function=prompt.lex_glue_ledgar, hf_repo="lighteval/lexglue", hf_subset="ledgar", hf_avail_splits=["train", "test", "validation"], @@ -9524,7 +9525,7 @@ lexglue_scotus_helm = LightevalTaskConfig( name="lexglue:scotus", suite=["helm", "lex_glue_scenario"], - prompt_function="lex_glue_scotus", + prompt_function=prompt.lex_glue_scotus, hf_repo="lighteval/lexglue", hf_subset="scotus", hf_avail_splits=["train", "test", "validation"], @@ -9542,7 +9543,7 @@ lexglue_unfair_tos_helm = LightevalTaskConfig( name="lexglue:unfair_tos", suite=["helm", "lex_glue_scenario"], - prompt_function="lex_glue_unfair_tos", + prompt_function=prompt.lex_glue_unfair_tos, hf_repo="lighteval/lexglue", hf_subset="unfair_tos", hf_avail_splits=["train", "test", "validation"], @@ -9560,7 +9561,7 @@ lextreme_brazilian_court_decisions_judgment_helm = LightevalTaskConfig( name="lextreme:brazilian_court_decisions_judgment", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_brazilian_court_decisions_judgment", + prompt_function=prompt.lextreme_brazilian_court_decisions_judgment, hf_repo="lighteval/lextreme", hf_subset="brazilian_court_decisions_judgment", hf_avail_splits=["train", "test", "validation"], @@ -9578,7 +9579,7 @@ lextreme_brazilian_court_decisions_unanimity_helm = LightevalTaskConfig( name="lextreme:brazilian_court_decisions_unanimity", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_brazilian_court_decisions_unanimity", + prompt_function=prompt.lextreme_brazilian_court_decisions_unanimity, hf_repo="lighteval/lextreme", hf_subset="brazilian_court_decisions_unanimity", hf_avail_splits=["train", "test", "validation"], @@ -9596,7 +9597,7 @@ lextreme_covid19_emergency_event_helm = LightevalTaskConfig( name="lextreme:covid19_emergency_event", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_covid19_emergency_event", + prompt_function=prompt.lextreme_covid19_emergency_event, hf_repo="lighteval/lextreme", hf_subset="covid19_emergency_event", hf_avail_splits=["train", "test", "validation"], @@ -9614,7 +9615,7 @@ lextreme_german_argument_mining_helm = LightevalTaskConfig( name="lextreme:german_argument_mining", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_german_argument_mining", + prompt_function=prompt.lextreme_german_argument_mining, hf_repo="lighteval/lextreme", hf_subset="german_argument_mining", hf_avail_splits=["train", "test", "validation"], @@ -9632,7 +9633,7 @@ lextreme_greek_legal_code_chapter_helm = LightevalTaskConfig( name="lextreme:greek_legal_code_chapter", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_greek_legal_code_chapter", + prompt_function=prompt.lextreme_greek_legal_code_chapter, hf_repo="lighteval/lextreme", hf_subset="greek_legal_code_chapter", hf_avail_splits=["train", "test", "validation"], @@ -9650,7 +9651,7 @@ lextreme_greek_legal_code_subject_helm = LightevalTaskConfig( name="lextreme:greek_legal_code_subject", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_greek_legal_code_subject", + prompt_function=prompt.lextreme_greek_legal_code_subject, hf_repo="lighteval/lextreme", hf_subset="greek_legal_code_subject", hf_avail_splits=["train", "test", "validation"], @@ -9668,7 +9669,7 @@ lextreme_greek_legal_code_volume_helm = LightevalTaskConfig( name="lextreme:greek_legal_code_volume", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_greek_legal_code_volume", + prompt_function=prompt.lextreme_greek_legal_code_volume, hf_repo="lighteval/lextreme", hf_subset="greek_legal_code_volume", hf_avail_splits=["train", "test", "validation"], @@ -9686,7 +9687,7 @@ lextreme_greek_legal_ner_helm = LightevalTaskConfig( name="lextreme:greek_legal_ner", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_greek_legal_ner", + prompt_function=prompt.lextreme_greek_legal_ner, hf_repo="lighteval/lextreme", hf_subset="greek_legal_ner", hf_avail_splits=["train", "test", "validation"], @@ -9704,7 +9705,7 @@ lextreme_legalnero_helm = LightevalTaskConfig( name="lextreme:legalnero", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_legalnero", + prompt_function=prompt.lextreme_legalnero, hf_repo="lighteval/lextreme", hf_subset="legalnero", hf_avail_splits=["train", "test", "validation"], @@ -9722,7 +9723,7 @@ lextreme_lener_br_helm = LightevalTaskConfig( name="lextreme:lener_br", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_lener_br", + prompt_function=prompt.lextreme_lener_br, hf_repo="lighteval/lextreme", hf_subset="lener_br", hf_avail_splits=["train", "test", "validation"], @@ -9740,7 +9741,7 @@ lextreme_mapa_coarse_helm = LightevalTaskConfig( name="lextreme:mapa_coarse", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_mapa_coarse", + prompt_function=prompt.lextreme_mapa_coarse, hf_repo="lighteval/lextreme", hf_subset="mapa_coarse", hf_avail_splits=["train", "test", "validation"], @@ -9758,7 +9759,7 @@ lextreme_mapa_fine_helm = LightevalTaskConfig( name="lextreme:mapa_fine", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_mapa_fine", + prompt_function=prompt.lextreme_mapa_fine, hf_repo="lighteval/lextreme", hf_subset="mapa_fine", hf_avail_splits=["train", "test", "validation"], @@ -9776,7 +9777,7 @@ lextreme_multi_eurlex_level_1_helm = LightevalTaskConfig( name="lextreme:multi_eurlex_level_1", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_multi_eurlex_level_1", + prompt_function=prompt.lextreme_multi_eurlex_level_1, hf_repo="lighteval/lextreme", hf_subset="multi_eurlex_level_1", hf_avail_splits=["train", "test", "validation"], @@ -9794,7 +9795,7 @@ lextreme_multi_eurlex_level_2_helm = LightevalTaskConfig( name="lextreme:multi_eurlex_level_2", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_multi_eurlex_level_2", + prompt_function=prompt.lextreme_multi_eurlex_level_2, hf_repo="lighteval/lextreme", hf_subset="multi_eurlex_level_2", hf_avail_splits=["train", "test", "validation"], @@ -9812,7 +9813,7 @@ lextreme_multi_eurlex_level_3_helm = LightevalTaskConfig( name="lextreme:multi_eurlex_level_3", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_multi_eurlex_level_3", + prompt_function=prompt.lextreme_multi_eurlex_level_3, hf_repo="lighteval/lextreme", hf_subset="multi_eurlex_level_3", hf_avail_splits=["train", "test", "validation"], @@ -9830,7 +9831,7 @@ lextreme_online_terms_of_service_clause_topics_helm = LightevalTaskConfig( name="lextreme:online_terms_of_service_clause_topics", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_online_terms_of_service_clause_topics", + prompt_function=prompt.lextreme_online_terms_of_service_clause_topics, hf_repo="lighteval/lextreme", hf_subset="online_terms_of_service_clause_topics", hf_avail_splits=["train", "test", "validation"], @@ -9848,7 +9849,7 @@ lextreme_online_terms_of_service_unfairness_levels_helm = LightevalTaskConfig( name="lextreme:online_terms_of_service_unfairness_levels", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_online_terms_of_service_unfairness_levels", + prompt_function=prompt.lextreme_online_terms_of_service_unfairness_levels, hf_repo="lighteval/lextreme", hf_subset="online_terms_of_service_unfairness_levels", hf_avail_splits=["train", "test", "validation"], @@ -9866,7 +9867,7 @@ lextreme_swiss_judgment_prediction_helm = LightevalTaskConfig( name="lextreme:swiss_judgment_prediction", suite=["helm", "lextreme_scenario"], - prompt_function="lextreme_swiss_judgment_prediction", + prompt_function=prompt.lextreme_swiss_judgment_prediction, hf_repo="lighteval/lextreme", hf_subset="swiss_judgment_prediction", hf_avail_splits=["train", "test", "validation"], @@ -9884,7 +9885,7 @@ linguistic_mappings_bigbench = LightevalTaskConfig( name="linguistic_mappings", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="linguistic_mappings", hf_avail_splits=["default", "train", "validation"], @@ -9902,7 +9903,7 @@ linguistics_puzzles_bigbench_lite = LightevalTaskConfig( name="linguistics_puzzles", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_whitespace_after_query", + prompt_function=prompt.bigbench_whitespace_after_query, hf_repo="bigbench", hf_subset="linguistics_puzzles", hf_avail_splits=["default", "train", "validation"], @@ -9919,7 +9920,7 @@ logic_grid_puzzle_bigbench_lite = LightevalTaskConfig( name="logic_grid_puzzle", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="logic_grid_puzzle", hf_avail_splits=["default", "train", "validation"], @@ -9937,7 +9938,7 @@ logical_args_bigbench = LightevalTaskConfig( name="logical_args", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="logical_args", hf_avail_splits=["default", "train", "validation"], @@ -9955,7 +9956,7 @@ logical_deduction_bigbench_lite = LightevalTaskConfig( name="logical_deduction", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_whitespace_after_query", + prompt_function=prompt.bigbench_whitespace_after_query, hf_repo="bigbench", hf_subset="logical_deduction", hf_avail_splits=["default", "train", "validation"], @@ -9973,7 +9974,7 @@ logical_fallacy_detection_bigbench = LightevalTaskConfig( name="logical_fallacy_detection", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="logical_fallacy_detection", hf_avail_splits=["default", "train", "validation"], @@ -9991,7 +9992,7 @@ logical_sequence_bigbench = LightevalTaskConfig( name="logical_sequence", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="logical_sequence", hf_avail_splits=["default", "train", "validation"], @@ -10009,7 +10010,7 @@ logiqa_lighteval = LightevalTaskConfig( name="logiqa", suite=["lighteval"], - prompt_function="logiqa", + prompt_function=prompt.logiqa, hf_repo="lighteval/logiqa_harness", hf_subset="logiqa", hf_avail_splits=["train", "validation", "test"], @@ -10027,7 +10028,7 @@ lsat_qa_helm = LightevalTaskConfig( name="lsat_qa", suite=["helm", "lsat_qa_scenario"], - prompt_function="lsat_qa", + prompt_function=prompt.lsat_qa, hf_repo="lighteval/lsat_qa", hf_subset="all", hf_avail_splits=["train", "test", "validation"], @@ -10045,7 +10046,7 @@ lsat_qa_assignment_helm = LightevalTaskConfig( name="lsat_qa:assignment", suite=["helm", "lsat_qa_scenario"], - prompt_function="lsat_qa", + prompt_function=prompt.lsat_qa, hf_repo="lighteval/lsat_qa", hf_subset="assignment", hf_avail_splits=["train", "test", "validation"], @@ -10063,7 +10064,7 @@ lsat_qa_grouping_helm = LightevalTaskConfig( name="lsat_qa:grouping", suite=["helm", "lsat_qa_scenario"], - prompt_function="lsat_qa", + prompt_function=prompt.lsat_qa, hf_repo="lighteval/lsat_qa", hf_subset="grouping", hf_avail_splits=["train", "test", "validation"], @@ -10081,7 +10082,7 @@ lsat_qa_miscellaneous_helm = LightevalTaskConfig( name="lsat_qa:miscellaneous", suite=["helm", "lsat_qa_scenario"], - prompt_function="lsat_qa", + prompt_function=prompt.lsat_qa, hf_repo="lighteval/lsat_qa", hf_subset="miscellaneous", hf_avail_splits=["train", "test", "validation"], @@ -10099,7 +10100,7 @@ lsat_qa_ordering_helm = LightevalTaskConfig( name="lsat_qa:ordering", suite=["helm", "lsat_qa_scenario"], - prompt_function="lsat_qa", + prompt_function=prompt.lsat_qa, hf_repo="lighteval/lsat_qa", hf_subset="ordering", hf_avail_splits=["train", "test", "validation"], @@ -10117,7 +10118,7 @@ math_algebra_lighteval = LightevalTaskConfig( name="math:algebra", suite=["lighteval", "math"], - prompt_function="math", + prompt_function=prompt.math, hf_repo="lighteval/MATH", hf_subset="algebra", hf_avail_splits=["train", "test", "validation"], @@ -10135,7 +10136,7 @@ math_counting_and_probability_lighteval = LightevalTaskConfig( name="math:counting_and_probability", suite=["lighteval", "math"], - prompt_function="math", + prompt_function=prompt.math, hf_repo="lighteval/MATH", hf_subset="counting_and_probability", hf_avail_splits=["train", "test", "validation"], @@ -10153,7 +10154,7 @@ math_geometry_lighteval = LightevalTaskConfig( name="math:geometry", suite=["lighteval", "math"], - prompt_function="math", + prompt_function=prompt.math, hf_repo="lighteval/MATH", hf_subset="geometry", hf_avail_splits=["train", "test", "validation"], @@ -10171,7 +10172,7 @@ math_intermediate_algebra_lighteval = LightevalTaskConfig( name="math:intermediate_algebra", suite=["lighteval", "math"], - prompt_function="math", + prompt_function=prompt.math, hf_repo="lighteval/MATH", hf_subset="intermediate_algebra", hf_avail_splits=["train", "test", "validation"], @@ -10189,7 +10190,7 @@ math_number_theory_lighteval = LightevalTaskConfig( name="math:number_theory", suite=["lighteval", "math"], - prompt_function="math", + prompt_function=prompt.math, hf_repo="lighteval/MATH", hf_subset="number_theory", hf_avail_splits=["train", "test", "validation"], @@ -10207,7 +10208,7 @@ math_prealgebra_lighteval = LightevalTaskConfig( name="math:prealgebra", suite=["lighteval", "math"], - prompt_function="math", + prompt_function=prompt.math, hf_repo="lighteval/MATH", hf_subset="prealgebra", hf_avail_splits=["train", "test", "validation"], @@ -10225,7 +10226,7 @@ math_precalculus_lighteval = LightevalTaskConfig( name="math:precalculus", suite=["lighteval", "math"], - prompt_function="math", + prompt_function=prompt.math, hf_repo="lighteval/MATH", hf_subset="precalculus", hf_avail_splits=["train", "test", "validation"], @@ -10243,7 +10244,7 @@ math_cot_algebra_lighteval = LightevalTaskConfig( name="math_cot:algebra", suite=["lighteval", "math"], - prompt_function="math_cot", + prompt_function=prompt.math_cot, hf_repo="lighteval/MATH", hf_subset="algebra", hf_avail_splits=["train", "test", "validation"], @@ -10261,7 +10262,7 @@ math_cot_counting_and_probability_lighteval = LightevalTaskConfig( name="math_cot:counting_and_probability", suite=["lighteval", "math"], - prompt_function="math_cot", + prompt_function=prompt.math_cot, hf_repo="lighteval/MATH", hf_subset="counting_and_probability", hf_avail_splits=["train", "test", "validation"], @@ -10279,7 +10280,7 @@ math_cot_geometry_lighteval = LightevalTaskConfig( name="math_cot:geometry", suite=["lighteval", "math"], - prompt_function="math_cot", + prompt_function=prompt.math_cot, hf_repo="lighteval/MATH", hf_subset="geometry", hf_avail_splits=["train", "test", "validation"], @@ -10297,7 +10298,7 @@ math_cot_intermediate_algebra_lighteval = LightevalTaskConfig( name="math_cot:intermediate_algebra", suite=["lighteval", "math"], - prompt_function="math_cot", + prompt_function=prompt.math_cot, hf_repo="lighteval/MATH", hf_subset="intermediate_algebra", hf_avail_splits=["train", "test", "validation"], @@ -10315,7 +10316,7 @@ math_cot_number_theory_lighteval = LightevalTaskConfig( name="math_cot:number_theory", suite=["lighteval", "math"], - prompt_function="math_cot", + prompt_function=prompt.math_cot, hf_repo="lighteval/MATH", hf_subset="number_theory", hf_avail_splits=["train", "test", "validation"], @@ -10333,7 +10334,7 @@ math_cot_prealgebra_lighteval = LightevalTaskConfig( name="math_cot:prealgebra", suite=["lighteval", "math"], - prompt_function="math_cot", + prompt_function=prompt.math_cot, hf_repo="lighteval/MATH", hf_subset="prealgebra", hf_avail_splits=["train", "test", "validation"], @@ -10351,7 +10352,7 @@ math_cot_precalculus_lighteval = LightevalTaskConfig( name="math_cot:precalculus", suite=["lighteval", "math"], - prompt_function="math_cot", + prompt_function=prompt.math_cot, hf_repo="lighteval/MATH", hf_subset="precalculus", hf_avail_splits=["train", "test", "validation"], @@ -10369,7 +10370,7 @@ mathematical_induction_bigbench = LightevalTaskConfig( name="mathematical_induction", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="mathematical_induction", hf_avail_splits=["default", "train", "validation"], @@ -10387,7 +10388,7 @@ mathqa_lighteval = LightevalTaskConfig( name="mathqa", suite=["lighteval"], - prompt_function="mathqa", + prompt_function=prompt.mathqa, hf_repo="math_qa", hf_subset="default", hf_avail_splits=["train", "validation", "test"], @@ -10405,7 +10406,7 @@ matrixshapes_bigbench = LightevalTaskConfig( name="matrixshapes", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="matrixshapes", hf_avail_splits=["default", "train", "validation"], @@ -10423,7 +10424,7 @@ me_q_sum_helm = LightevalTaskConfig( name="me_q_sum", suite=["helm"], - prompt_function="me_q_sum", + prompt_function=prompt.me_q_sum, hf_repo="lighteval/me_q_sum", hf_subset="default", hf_avail_splits=["train", "test", "validation"], @@ -10441,7 +10442,7 @@ med_dialog_healthcaremagic_helm = LightevalTaskConfig( name="med_dialog:healthcaremagic", suite=["helm"], - prompt_function="med_dialog", + prompt_function=prompt.med_dialog, hf_repo="lighteval/med_dialog", hf_subset="healthcaremagic", hf_avail_splits=["train", "test", "validation"], @@ -10459,7 +10460,7 @@ med_dialog_icliniq_helm = LightevalTaskConfig( name="med_dialog:icliniq", suite=["helm"], - prompt_function="med_dialog", + prompt_function=prompt.med_dialog, hf_repo="lighteval/med_dialog", hf_subset="icliniq", hf_avail_splits=["train", "test", "validation"], @@ -10477,7 +10478,7 @@ med_mcqa_helm = LightevalTaskConfig( name="med_mcqa", suite=["helm"], - prompt_function="med_mcqa", + prompt_function=prompt.med_mcqa, hf_repo="lighteval/med_mcqa", hf_subset="default", hf_avail_splits=["train", "test", "validation"], @@ -10495,7 +10496,7 @@ med_paragraph_simplification_helm = LightevalTaskConfig( name="med_paragraph_simplification", suite=["helm"], - prompt_function="med_paragraph_simplification", + prompt_function=prompt.med_paragraph_simplification, hf_repo="lighteval/med_paragraph_simplification", hf_subset="default", hf_avail_splits=["train", "test", "validation"], @@ -10513,7 +10514,7 @@ med_qa_helm = LightevalTaskConfig( name="med_qa", suite=["helm"], - prompt_function="med_qa", + prompt_function=prompt.med_qa, hf_repo="bigbio/med_qa", hf_subset="med_qa_en_source", hf_avail_splits=["train", "test", "validation"], @@ -10531,7 +10532,7 @@ metaphor_boolean_bigbench = LightevalTaskConfig( name="metaphor_boolean", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="metaphor_boolean", hf_avail_splits=["default", "train", "validation"], @@ -10549,7 +10550,7 @@ metaphor_understanding_bigbench = LightevalTaskConfig( name="metaphor_understanding", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="metaphor_understanding", hf_avail_splits=["default", "train", "validation"], @@ -10567,7 +10568,7 @@ mgsm_en_lighteval = LightevalTaskConfig( name="mgsm:en", suite=["lighteval"], - prompt_function="mgsm_en", + prompt_function=prompt.mgsm_en, hf_repo="juletxara/mgsm", hf_subset="en", hf_avail_splits=["train", "test"], @@ -10585,7 +10586,7 @@ mgsm_es_lighteval = LightevalTaskConfig( name="mgsm:es", suite=["lighteval"], - prompt_function="mgsm_es", + prompt_function=prompt.mgsm_es, hf_repo="juletxara/mgsm", hf_subset="es", hf_avail_splits=["train", "test"], @@ -10603,7 +10604,7 @@ mgsm_fr_lighteval = LightevalTaskConfig( name="mgsm:fr", suite=["lighteval"], - prompt_function="mgsm_fr", + prompt_function=prompt.mgsm_fr, hf_repo="juletxara/mgsm", hf_subset="fr", hf_avail_splits=["train", "test"], @@ -10621,7 +10622,7 @@ mgsm_de_lighteval = LightevalTaskConfig( name="mgsm:de", suite=["lighteval"], - prompt_function="mgsm_de", + prompt_function=prompt.mgsm_de, hf_repo="juletxara/mgsm", hf_subset="de", hf_avail_splits=["train", "test"], @@ -10639,7 +10640,7 @@ mgsm_ru_lighteval = LightevalTaskConfig( name="mgsm:ru", suite=["lighteval"], - prompt_function="mgsm_ru", + prompt_function=prompt.mgsm_ru, hf_repo="juletxara/mgsm", hf_subset="ru", hf_avail_splits=["train", "test"], @@ -10657,7 +10658,7 @@ mgsm_zh_lighteval = LightevalTaskConfig( name="mgsm:zh", suite=["lighteval"], - prompt_function="mgsm_zh", + prompt_function=prompt.mgsm_zh, hf_repo="juletxara/mgsm", hf_subset="zh", hf_avail_splits=["train", "test"], @@ -10675,7 +10676,7 @@ mgsm_ja_lighteval = LightevalTaskConfig( name="mgsm:ja", suite=["lighteval"], - prompt_function="mgsm_ja", + prompt_function=prompt.mgsm_ja, hf_repo="juletxara/mgsm", hf_subset="ja", hf_avail_splits=["train", "test"], @@ -10693,7 +10694,7 @@ mgsm_th_lighteval = LightevalTaskConfig( name="mgsm:th", suite=["lighteval"], - prompt_function="mgsm_th", + prompt_function=prompt.mgsm_th, hf_repo="juletxara/mgsm", hf_subset="th", hf_avail_splits=["train", "test"], @@ -10711,7 +10712,7 @@ mgsm_sw_lighteval = LightevalTaskConfig( name="mgsm:sw", suite=["lighteval"], - prompt_function="mgsm_sw", + prompt_function=prompt.mgsm_sw, hf_repo="juletxara/mgsm", hf_subset="sw", hf_avail_splits=["train", "test"], @@ -10729,7 +10730,7 @@ mgsm_bn_lighteval = LightevalTaskConfig( name="mgsm:bn", suite=["lighteval"], - prompt_function="mgsm_bn", + prompt_function=prompt.mgsm_bn, hf_repo="juletxara/mgsm", hf_subset="bn", hf_avail_splits=["train", "test"], @@ -10747,7 +10748,7 @@ mgsm_te_lighteval = LightevalTaskConfig( name="mgsm:te", suite=["lighteval"], - prompt_function="mgsm_te", + prompt_function=prompt.mgsm_te, hf_repo="juletxara/mgsm", hf_subset="te", hf_avail_splits=["train", "test"], @@ -10765,7 +10766,7 @@ minute_mysteries_qa_bigbench = LightevalTaskConfig( name="minute_mysteries_qa", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="minute_mysteries_qa", hf_avail_splits=["default", "train", "validation"], @@ -10783,7 +10784,7 @@ misconceptions_bigbench = LightevalTaskConfig( name="misconceptions", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="misconceptions", hf_avail_splits=["default", "train", "validation"], @@ -10801,7 +10802,7 @@ misconceptions_russian_bigbench_lite = LightevalTaskConfig( name="misconceptions_russian", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="misconceptions_russian", hf_avail_splits=["default", "train", "validation"], @@ -10819,7 +10820,7 @@ mmlu_helm = LightevalTaskConfig( name="mmlu", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="all", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -10837,7 +10838,7 @@ mmlu_original = LightevalTaskConfig( name="mmlu", suite=["original"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="all", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -10855,7 +10856,7 @@ mmlu_abstract_algebra_original = LightevalTaskConfig( name="mmlu:abstract_algebra", suite=["original", "mmlu"], - prompt_function="mmlu_abstract_algebra", + prompt_function=prompt.mmlu_abstract_algebra, hf_repo="cais/mmlu", hf_subset="abstract_algebra", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -10873,7 +10874,7 @@ mmlu_abstract_algebra_leaderboard = LightevalTaskConfig( name="mmlu:abstract_algebra", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="abstract_algebra", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -10891,7 +10892,7 @@ mmlu_abstract_algebra_helm = LightevalTaskConfig( name="mmlu:abstract_algebra", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="abstract_algebra", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -10909,7 +10910,7 @@ mmlu_anatomy_original = LightevalTaskConfig( name="mmlu:anatomy", suite=["original", "mmlu"], - prompt_function="mmlu_anatomy", + prompt_function=prompt.mmlu_anatomy, hf_repo="cais/mmlu", hf_subset="anatomy", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -10927,7 +10928,7 @@ mmlu_anatomy_leaderboard = LightevalTaskConfig( name="mmlu:anatomy", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="anatomy", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -10945,7 +10946,7 @@ mmlu_anatomy_helm = LightevalTaskConfig( name="mmlu:anatomy", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="anatomy", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -10963,7 +10964,7 @@ mmlu_astronomy_original = LightevalTaskConfig( name="mmlu:astronomy", suite=["original", "mmlu"], - prompt_function="mmlu_astronomy", + prompt_function=prompt.mmlu_astronomy, hf_repo="cais/mmlu", hf_subset="astronomy", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -10981,7 +10982,7 @@ mmlu_astronomy_leaderboard = LightevalTaskConfig( name="mmlu:astronomy", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="astronomy", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -10999,7 +11000,7 @@ mmlu_astronomy_helm = LightevalTaskConfig( name="mmlu:astronomy", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="astronomy", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11017,7 +11018,7 @@ mmlu_business_ethics_original = LightevalTaskConfig( name="mmlu:business_ethics", suite=["original", "mmlu"], - prompt_function="mmlu_business_ethics", + prompt_function=prompt.mmlu_business_ethics, hf_repo="cais/mmlu", hf_subset="business_ethics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11035,7 +11036,7 @@ mmlu_business_ethics_leaderboard = LightevalTaskConfig( name="mmlu:business_ethics", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="business_ethics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11053,7 +11054,7 @@ mmlu_business_ethics_helm = LightevalTaskConfig( name="mmlu:business_ethics", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="business_ethics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11071,7 +11072,7 @@ mmlu_clinical_knowledge_original = LightevalTaskConfig( name="mmlu:clinical_knowledge", suite=["original", "mmlu"], - prompt_function="mmlu_clinical_knowledge", + prompt_function=prompt.mmlu_clinical_knowledge, hf_repo="cais/mmlu", hf_subset="clinical_knowledge", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11089,7 +11090,7 @@ mmlu_clinical_knowledge_leaderboard = LightevalTaskConfig( name="mmlu:clinical_knowledge", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="clinical_knowledge", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11107,7 +11108,7 @@ mmlu_clinical_knowledge_helm = LightevalTaskConfig( name="mmlu:clinical_knowledge", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="clinical_knowledge", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11125,7 +11126,7 @@ mmlu_college_biology_original = LightevalTaskConfig( name="mmlu:college_biology", suite=["original", "mmlu"], - prompt_function="mmlu_college_biology", + prompt_function=prompt.mmlu_college_biology, hf_repo="cais/mmlu", hf_subset="college_biology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11143,7 +11144,7 @@ mmlu_college_biology_leaderboard = LightevalTaskConfig( name="mmlu:college_biology", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="college_biology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11161,7 +11162,7 @@ mmlu_college_biology_helm = LightevalTaskConfig( name="mmlu:college_biology", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="college_biology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11179,7 +11180,7 @@ mmlu_college_chemistry_original = LightevalTaskConfig( name="mmlu:college_chemistry", suite=["original", "mmlu"], - prompt_function="mmlu_college_chemistry", + prompt_function=prompt.mmlu_college_chemistry, hf_repo="cais/mmlu", hf_subset="college_chemistry", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11197,7 +11198,7 @@ mmlu_college_chemistry_leaderboard = LightevalTaskConfig( name="mmlu:college_chemistry", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="college_chemistry", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11215,7 +11216,7 @@ mmlu_college_chemistry_helm = LightevalTaskConfig( name="mmlu:college_chemistry", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="college_chemistry", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11233,7 +11234,7 @@ mmlu_college_computer_science_original = LightevalTaskConfig( name="mmlu:college_computer_science", suite=["original", "mmlu"], - prompt_function="mmlu_college_computer_science", + prompt_function=prompt.mmlu_college_computer_science, hf_repo="cais/mmlu", hf_subset="college_computer_science", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11251,7 +11252,7 @@ mmlu_college_computer_science_leaderboard = LightevalTaskConfig( name="mmlu:college_computer_science", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="college_computer_science", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11269,7 +11270,7 @@ mmlu_college_computer_science_helm = LightevalTaskConfig( name="mmlu:college_computer_science", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="college_computer_science", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11287,7 +11288,7 @@ mmlu_college_mathematics_original = LightevalTaskConfig( name="mmlu:college_mathematics", suite=["original", "mmlu"], - prompt_function="mmlu_college_mathematics", + prompt_function=prompt.mmlu_college_mathematics, hf_repo="cais/mmlu", hf_subset="college_mathematics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11305,7 +11306,7 @@ mmlu_college_mathematics_leaderboard = LightevalTaskConfig( name="mmlu:college_mathematics", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="college_mathematics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11323,7 +11324,7 @@ mmlu_college_mathematics_helm = LightevalTaskConfig( name="mmlu:college_mathematics", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="college_mathematics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11341,7 +11342,7 @@ mmlu_college_medicine_original = LightevalTaskConfig( name="mmlu:college_medicine", suite=["original", "mmlu"], - prompt_function="mmlu_college_medicine", + prompt_function=prompt.mmlu_college_medicine, hf_repo="cais/mmlu", hf_subset="college_medicine", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11359,7 +11360,7 @@ mmlu_college_medicine_leaderboard = LightevalTaskConfig( name="mmlu:college_medicine", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="college_medicine", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11377,7 +11378,7 @@ mmlu_college_medicine_helm = LightevalTaskConfig( name="mmlu:college_medicine", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="college_medicine", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11395,7 +11396,7 @@ mmlu_college_physics_original = LightevalTaskConfig( name="mmlu:college_physics", suite=["original", "mmlu"], - prompt_function="mmlu_college_physics", + prompt_function=prompt.mmlu_college_physics, hf_repo="cais/mmlu", hf_subset="college_physics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11413,7 +11414,7 @@ mmlu_college_physics_leaderboard = LightevalTaskConfig( name="mmlu:college_physics", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="college_physics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11431,7 +11432,7 @@ mmlu_college_physics_helm = LightevalTaskConfig( name="mmlu:college_physics", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="college_physics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11449,7 +11450,7 @@ mmlu_computer_security_original = LightevalTaskConfig( name="mmlu:computer_security", suite=["original", "mmlu"], - prompt_function="mmlu_computer_security", + prompt_function=prompt.mmlu_computer_security, hf_repo="cais/mmlu", hf_subset="computer_security", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11467,7 +11468,7 @@ mmlu_computer_security_leaderboard = LightevalTaskConfig( name="mmlu:computer_security", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="computer_security", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11485,7 +11486,7 @@ mmlu_computer_security_helm = LightevalTaskConfig( name="mmlu:computer_security", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="computer_security", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11503,7 +11504,7 @@ mmlu_conceptual_physics_original = LightevalTaskConfig( name="mmlu:conceptual_physics", suite=["original", "mmlu"], - prompt_function="mmlu_conceptual_physics", + prompt_function=prompt.mmlu_conceptual_physics, hf_repo="cais/mmlu", hf_subset="conceptual_physics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11521,7 +11522,7 @@ mmlu_conceptual_physics_leaderboard = LightevalTaskConfig( name="mmlu:conceptual_physics", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="conceptual_physics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11539,7 +11540,7 @@ mmlu_conceptual_physics_helm = LightevalTaskConfig( name="mmlu:conceptual_physics", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="conceptual_physics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11557,7 +11558,7 @@ mmlu_econometrics_original = LightevalTaskConfig( name="mmlu:econometrics", suite=["original", "mmlu"], - prompt_function="mmlu_econometrics", + prompt_function=prompt.mmlu_econometrics, hf_repo="cais/mmlu", hf_subset="econometrics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11575,7 +11576,7 @@ mmlu_econometrics_leaderboard = LightevalTaskConfig( name="mmlu:econometrics", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="econometrics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11593,7 +11594,7 @@ mmlu_econometrics_helm = LightevalTaskConfig( name="mmlu:econometrics", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="econometrics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11611,7 +11612,7 @@ mmlu_electrical_engineering_original = LightevalTaskConfig( name="mmlu:electrical_engineering", suite=["original", "mmlu"], - prompt_function="mmlu_electrical_engineering", + prompt_function=prompt.mmlu_electrical_engineering, hf_repo="cais/mmlu", hf_subset="electrical_engineering", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11629,7 +11630,7 @@ mmlu_electrical_engineering_leaderboard = LightevalTaskConfig( name="mmlu:electrical_engineering", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="electrical_engineering", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11647,7 +11648,7 @@ mmlu_electrical_engineering_helm = LightevalTaskConfig( name="mmlu:electrical_engineering", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="electrical_engineering", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11665,7 +11666,7 @@ mmlu_elementary_mathematics_original = LightevalTaskConfig( name="mmlu:elementary_mathematics", suite=["original", "mmlu"], - prompt_function="mmlu_elementary_mathematics", + prompt_function=prompt.mmlu_elementary_mathematics, hf_repo="cais/mmlu", hf_subset="elementary_mathematics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11683,7 +11684,7 @@ mmlu_elementary_mathematics_leaderboard = LightevalTaskConfig( name="mmlu:elementary_mathematics", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="elementary_mathematics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11701,7 +11702,7 @@ mmlu_elementary_mathematics_helm = LightevalTaskConfig( name="mmlu:elementary_mathematics", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="elementary_mathematics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11719,7 +11720,7 @@ mmlu_formal_logic_original = LightevalTaskConfig( name="mmlu:formal_logic", suite=["original", "mmlu"], - prompt_function="mmlu_formal_logic", + prompt_function=prompt.mmlu_formal_logic, hf_repo="cais/mmlu", hf_subset="formal_logic", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11737,7 +11738,7 @@ mmlu_formal_logic_leaderboard = LightevalTaskConfig( name="mmlu:formal_logic", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="formal_logic", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11755,7 +11756,7 @@ mmlu_formal_logic_helm = LightevalTaskConfig( name="mmlu:formal_logic", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="formal_logic", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11773,7 +11774,7 @@ mmlu_global_facts_original = LightevalTaskConfig( name="mmlu:global_facts", suite=["original", "mmlu"], - prompt_function="mmlu_global_facts", + prompt_function=prompt.mmlu_global_facts, hf_repo="cais/mmlu", hf_subset="global_facts", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11791,7 +11792,7 @@ mmlu_global_facts_leaderboard = LightevalTaskConfig( name="mmlu:global_facts", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="global_facts", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11809,7 +11810,7 @@ mmlu_global_facts_helm = LightevalTaskConfig( name="mmlu:global_facts", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="global_facts", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11827,7 +11828,7 @@ mmlu_high_school_biology_original = LightevalTaskConfig( name="mmlu:high_school_biology", suite=["original", "mmlu"], - prompt_function="mmlu_high_school_biology", + prompt_function=prompt.mmlu_high_school_biology, hf_repo="cais/mmlu", hf_subset="high_school_biology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11845,7 +11846,7 @@ mmlu_high_school_biology_leaderboard = LightevalTaskConfig( name="mmlu:high_school_biology", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="high_school_biology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11863,7 +11864,7 @@ mmlu_high_school_biology_helm = LightevalTaskConfig( name="mmlu:high_school_biology", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="high_school_biology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11881,7 +11882,7 @@ mmlu_high_school_chemistry_original = LightevalTaskConfig( name="mmlu:high_school_chemistry", suite=["original", "mmlu"], - prompt_function="mmlu_high_school_chemistry", + prompt_function=prompt.mmlu_high_school_chemistry, hf_repo="cais/mmlu", hf_subset="high_school_chemistry", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11899,7 +11900,7 @@ mmlu_high_school_chemistry_leaderboard = LightevalTaskConfig( name="mmlu:high_school_chemistry", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="high_school_chemistry", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11917,7 +11918,7 @@ mmlu_high_school_chemistry_helm = LightevalTaskConfig( name="mmlu:high_school_chemistry", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="high_school_chemistry", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11935,7 +11936,7 @@ mmlu_high_school_computer_science_original = LightevalTaskConfig( name="mmlu:high_school_computer_science", suite=["original", "mmlu"], - prompt_function="mmlu_high_school_computer_science", + prompt_function=prompt.mmlu_high_school_computer_science, hf_repo="cais/mmlu", hf_subset="high_school_computer_science", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11953,7 +11954,7 @@ mmlu_high_school_computer_science_leaderboard = LightevalTaskConfig( name="mmlu:high_school_computer_science", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="high_school_computer_science", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11971,7 +11972,7 @@ mmlu_high_school_computer_science_helm = LightevalTaskConfig( name="mmlu:high_school_computer_science", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="high_school_computer_science", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -11989,7 +11990,7 @@ mmlu_high_school_european_history_original = LightevalTaskConfig( name="mmlu:high_school_european_history", suite=["original", "mmlu"], - prompt_function="mmlu_high_school_european_history", + prompt_function=prompt.mmlu_high_school_european_history, hf_repo="cais/mmlu", hf_subset="high_school_european_history", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12007,7 +12008,7 @@ mmlu_high_school_european_history_leaderboard = LightevalTaskConfig( name="mmlu:high_school_european_history", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="high_school_european_history", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12025,7 +12026,7 @@ mmlu_high_school_european_history_helm = LightevalTaskConfig( name="mmlu:high_school_european_history", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="high_school_european_history", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12043,7 +12044,7 @@ mmlu_high_school_geography_original = LightevalTaskConfig( name="mmlu:high_school_geography", suite=["original", "mmlu"], - prompt_function="mmlu_high_school_geography", + prompt_function=prompt.mmlu_high_school_geography, hf_repo="cais/mmlu", hf_subset="high_school_geography", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12061,7 +12062,7 @@ mmlu_high_school_geography_leaderboard = LightevalTaskConfig( name="mmlu:high_school_geography", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="high_school_geography", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12079,7 +12080,7 @@ mmlu_high_school_geography_helm = LightevalTaskConfig( name="mmlu:high_school_geography", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="high_school_geography", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12097,7 +12098,7 @@ mmlu_high_school_government_and_politics_original = LightevalTaskConfig( name="mmlu:high_school_government_and_politics", suite=["original", "mmlu"], - prompt_function="mmlu_high_school_government_and_politics", + prompt_function=prompt.mmlu_high_school_government_and_politics, hf_repo="cais/mmlu", hf_subset="high_school_government_and_politics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12115,7 +12116,7 @@ mmlu_high_school_government_and_politics_leaderboard = LightevalTaskConfig( name="mmlu:high_school_government_and_politics", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="high_school_government_and_politics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12133,7 +12134,7 @@ mmlu_high_school_government_and_politics_helm = LightevalTaskConfig( name="mmlu:high_school_government_and_politics", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="high_school_government_and_politics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12151,7 +12152,7 @@ mmlu_high_school_macroeconomics_original = LightevalTaskConfig( name="mmlu:high_school_macroeconomics", suite=["original", "mmlu"], - prompt_function="mmlu_high_school_macroeconomics", + prompt_function=prompt.mmlu_high_school_macroeconomics, hf_repo="cais/mmlu", hf_subset="high_school_macroeconomics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12169,7 +12170,7 @@ mmlu_high_school_macroeconomics_leaderboard = LightevalTaskConfig( name="mmlu:high_school_macroeconomics", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="high_school_macroeconomics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12187,7 +12188,7 @@ mmlu_high_school_macroeconomics_helm = LightevalTaskConfig( name="mmlu:high_school_macroeconomics", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="high_school_macroeconomics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12205,7 +12206,7 @@ mmlu_high_school_mathematics_original = LightevalTaskConfig( name="mmlu:high_school_mathematics", suite=["original", "mmlu"], - prompt_function="mmlu_high_school_mathematics", + prompt_function=prompt.mmlu_high_school_mathematics, hf_repo="cais/mmlu", hf_subset="high_school_mathematics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12223,7 +12224,7 @@ mmlu_high_school_mathematics_leaderboard = LightevalTaskConfig( name="mmlu:high_school_mathematics", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="high_school_mathematics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12241,7 +12242,7 @@ mmlu_high_school_mathematics_helm = LightevalTaskConfig( name="mmlu:high_school_mathematics", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="high_school_mathematics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12259,7 +12260,7 @@ mmlu_high_school_microeconomics_original = LightevalTaskConfig( name="mmlu:high_school_microeconomics", suite=["original", "mmlu"], - prompt_function="mmlu_high_school_microeconomics", + prompt_function=prompt.mmlu_high_school_microeconomics, hf_repo="cais/mmlu", hf_subset="high_school_microeconomics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12277,7 +12278,7 @@ mmlu_high_school_microeconomics_leaderboard = LightevalTaskConfig( name="mmlu:high_school_microeconomics", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="high_school_microeconomics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12295,7 +12296,7 @@ mmlu_high_school_microeconomics_helm = LightevalTaskConfig( name="mmlu:high_school_microeconomics", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="high_school_microeconomics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12313,7 +12314,7 @@ mmlu_high_school_physics_original = LightevalTaskConfig( name="mmlu:high_school_physics", suite=["original", "mmlu"], - prompt_function="mmlu_high_school_physics", + prompt_function=prompt.mmlu_high_school_physics, hf_repo="cais/mmlu", hf_subset="high_school_physics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12331,7 +12332,7 @@ mmlu_high_school_physics_leaderboard = LightevalTaskConfig( name="mmlu:high_school_physics", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="high_school_physics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12349,7 +12350,7 @@ mmlu_high_school_physics_helm = LightevalTaskConfig( name="mmlu:high_school_physics", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="high_school_physics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12367,7 +12368,7 @@ mmlu_high_school_psychology_original = LightevalTaskConfig( name="mmlu:high_school_psychology", suite=["original", "mmlu"], - prompt_function="mmlu_high_school_psychology", + prompt_function=prompt.mmlu_high_school_psychology, hf_repo="cais/mmlu", hf_subset="high_school_psychology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12385,7 +12386,7 @@ mmlu_high_school_psychology_leaderboard = LightevalTaskConfig( name="mmlu:high_school_psychology", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="high_school_psychology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12403,7 +12404,7 @@ mmlu_high_school_psychology_helm = LightevalTaskConfig( name="mmlu:high_school_psychology", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="high_school_psychology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12421,7 +12422,7 @@ mmlu_high_school_statistics_original = LightevalTaskConfig( name="mmlu:high_school_statistics", suite=["original", "mmlu"], - prompt_function="mmlu_high_school_statistics", + prompt_function=prompt.mmlu_high_school_statistics, hf_repo="cais/mmlu", hf_subset="high_school_statistics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12439,7 +12440,7 @@ mmlu_high_school_statistics_leaderboard = LightevalTaskConfig( name="mmlu:high_school_statistics", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="high_school_statistics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12457,7 +12458,7 @@ mmlu_high_school_statistics_helm = LightevalTaskConfig( name="mmlu:high_school_statistics", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="high_school_statistics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12475,7 +12476,7 @@ mmlu_high_school_us_history_original = LightevalTaskConfig( name="mmlu:high_school_us_history", suite=["original", "mmlu"], - prompt_function="mmlu_high_school_us_history", + prompt_function=prompt.mmlu_high_school_us_history, hf_repo="cais/mmlu", hf_subset="high_school_us_history", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12493,7 +12494,7 @@ mmlu_high_school_us_history_leaderboard = LightevalTaskConfig( name="mmlu:high_school_us_history", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="high_school_us_history", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12511,7 +12512,7 @@ mmlu_high_school_us_history_helm = LightevalTaskConfig( name="mmlu:high_school_us_history", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="high_school_us_history", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12529,7 +12530,7 @@ mmlu_high_school_world_history_original = LightevalTaskConfig( name="mmlu:high_school_world_history", suite=["original", "mmlu"], - prompt_function="mmlu_high_school_world_history", + prompt_function=prompt.mmlu_high_school_world_history, hf_repo="cais/mmlu", hf_subset="high_school_world_history", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12547,7 +12548,7 @@ mmlu_high_school_world_history_leaderboard = LightevalTaskConfig( name="mmlu:high_school_world_history", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="high_school_world_history", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12565,7 +12566,7 @@ mmlu_high_school_world_history_helm = LightevalTaskConfig( name="mmlu:high_school_world_history", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="high_school_world_history", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12583,7 +12584,7 @@ mmlu_human_aging_original = LightevalTaskConfig( name="mmlu:human_aging", suite=["original", "mmlu"], - prompt_function="mmlu_human_aging", + prompt_function=prompt.mmlu_human_aging, hf_repo="cais/mmlu", hf_subset="human_aging", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12601,7 +12602,7 @@ mmlu_human_aging_leaderboard = LightevalTaskConfig( name="mmlu:human_aging", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="human_aging", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12619,7 +12620,7 @@ mmlu_human_aging_helm = LightevalTaskConfig( name="mmlu:human_aging", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="human_aging", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12637,7 +12638,7 @@ mmlu_human_sexuality_original = LightevalTaskConfig( name="mmlu:human_sexuality", suite=["original", "mmlu"], - prompt_function="mmlu_human_sexuality", + prompt_function=prompt.mmlu_human_sexuality, hf_repo="cais/mmlu", hf_subset="human_sexuality", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12655,7 +12656,7 @@ mmlu_human_sexuality_leaderboard = LightevalTaskConfig( name="mmlu:human_sexuality", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="human_sexuality", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12673,7 +12674,7 @@ mmlu_human_sexuality_helm = LightevalTaskConfig( name="mmlu:human_sexuality", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="human_sexuality", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12691,7 +12692,7 @@ mmlu_international_law_original = LightevalTaskConfig( name="mmlu:international_law", suite=["original", "mmlu"], - prompt_function="mmlu_international_law", + prompt_function=prompt.mmlu_international_law, hf_repo="cais/mmlu", hf_subset="international_law", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12709,7 +12710,7 @@ mmlu_international_law_leaderboard = LightevalTaskConfig( name="mmlu:international_law", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="international_law", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12727,7 +12728,7 @@ mmlu_international_law_helm = LightevalTaskConfig( name="mmlu:international_law", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="international_law", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12745,7 +12746,7 @@ mmlu_jurisprudence_original = LightevalTaskConfig( name="mmlu:jurisprudence", suite=["original", "mmlu"], - prompt_function="mmlu_jurisprudence", + prompt_function=prompt.mmlu_jurisprudence, hf_repo="cais/mmlu", hf_subset="jurisprudence", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12763,7 +12764,7 @@ mmlu_jurisprudence_leaderboard = LightevalTaskConfig( name="mmlu:jurisprudence", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="jurisprudence", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12781,7 +12782,7 @@ mmlu_jurisprudence_helm = LightevalTaskConfig( name="mmlu:jurisprudence", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="jurisprudence", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12799,7 +12800,7 @@ mmlu_logical_fallacies_original = LightevalTaskConfig( name="mmlu:logical_fallacies", suite=["original", "mmlu"], - prompt_function="mmlu_logical_fallacies", + prompt_function=prompt.mmlu_logical_fallacies, hf_repo="cais/mmlu", hf_subset="logical_fallacies", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12817,7 +12818,7 @@ mmlu_logical_fallacies_leaderboard = LightevalTaskConfig( name="mmlu:logical_fallacies", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="logical_fallacies", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12835,7 +12836,7 @@ mmlu_logical_fallacies_helm = LightevalTaskConfig( name="mmlu:logical_fallacies", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="logical_fallacies", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12853,7 +12854,7 @@ mmlu_machine_learning_original = LightevalTaskConfig( name="mmlu:machine_learning", suite=["original", "mmlu"], - prompt_function="mmlu_machine_learning", + prompt_function=prompt.mmlu_machine_learning, hf_repo="cais/mmlu", hf_subset="machine_learning", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12871,7 +12872,7 @@ mmlu_machine_learning_leaderboard = LightevalTaskConfig( name="mmlu:machine_learning", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="machine_learning", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12889,7 +12890,7 @@ mmlu_machine_learning_helm = LightevalTaskConfig( name="mmlu:machine_learning", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="machine_learning", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12907,7 +12908,7 @@ mmlu_management_original = LightevalTaskConfig( name="mmlu:management", suite=["original", "mmlu"], - prompt_function="mmlu_management", + prompt_function=prompt.mmlu_management, hf_repo="cais/mmlu", hf_subset="management", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12925,7 +12926,7 @@ mmlu_management_leaderboard = LightevalTaskConfig( name="mmlu:management", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="management", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12943,7 +12944,7 @@ mmlu_management_helm = LightevalTaskConfig( name="mmlu:management", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="management", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12961,7 +12962,7 @@ mmlu_marketing_original = LightevalTaskConfig( name="mmlu:marketing", suite=["original", "mmlu"], - prompt_function="mmlu_marketing", + prompt_function=prompt.mmlu_marketing, hf_repo="cais/mmlu", hf_subset="marketing", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12979,7 +12980,7 @@ mmlu_marketing_leaderboard = LightevalTaskConfig( name="mmlu:marketing", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="marketing", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -12997,7 +12998,7 @@ mmlu_marketing_helm = LightevalTaskConfig( name="mmlu:marketing", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="marketing", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13015,7 +13016,7 @@ mmlu_medical_genetics_original = LightevalTaskConfig( name="mmlu:medical_genetics", suite=["original", "mmlu"], - prompt_function="mmlu_medical_genetics", + prompt_function=prompt.mmlu_medical_genetics, hf_repo="cais/mmlu", hf_subset="medical_genetics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13033,7 +13034,7 @@ mmlu_medical_genetics_leaderboard = LightevalTaskConfig( name="mmlu:medical_genetics", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="medical_genetics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13051,7 +13052,7 @@ mmlu_medical_genetics_helm = LightevalTaskConfig( name="mmlu:medical_genetics", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="medical_genetics", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13069,7 +13070,7 @@ mmlu_miscellaneous_original = LightevalTaskConfig( name="mmlu:miscellaneous", suite=["original", "mmlu"], - prompt_function="mmlu_miscellaneous", + prompt_function=prompt.mmlu_miscellaneous, hf_repo="cais/mmlu", hf_subset="miscellaneous", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13087,7 +13088,7 @@ mmlu_miscellaneous_leaderboard = LightevalTaskConfig( name="mmlu:miscellaneous", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="miscellaneous", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13105,7 +13106,7 @@ mmlu_miscellaneous_helm = LightevalTaskConfig( name="mmlu:miscellaneous", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="miscellaneous", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13123,7 +13124,7 @@ mmlu_moral_disputes_original = LightevalTaskConfig( name="mmlu:moral_disputes", suite=["original", "mmlu"], - prompt_function="mmlu_moral_disputes", + prompt_function=prompt.mmlu_moral_disputes, hf_repo="cais/mmlu", hf_subset="moral_disputes", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13141,7 +13142,7 @@ mmlu_moral_disputes_leaderboard = LightevalTaskConfig( name="mmlu:moral_disputes", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="moral_disputes", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13159,7 +13160,7 @@ mmlu_moral_disputes_helm = LightevalTaskConfig( name="mmlu:moral_disputes", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="moral_disputes", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13177,7 +13178,7 @@ mmlu_moral_scenarios_original = LightevalTaskConfig( name="mmlu:moral_scenarios", suite=["original", "mmlu"], - prompt_function="mmlu_moral_scenarios", + prompt_function=prompt.mmlu_moral_scenarios, hf_repo="cais/mmlu", hf_subset="moral_scenarios", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13195,7 +13196,7 @@ mmlu_moral_scenarios_leaderboard = LightevalTaskConfig( name="mmlu:moral_scenarios", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="moral_scenarios", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13213,7 +13214,7 @@ mmlu_moral_scenarios_helm = LightevalTaskConfig( name="mmlu:moral_scenarios", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="moral_scenarios", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13231,7 +13232,7 @@ mmlu_nutrition_original = LightevalTaskConfig( name="mmlu:nutrition", suite=["original", "mmlu"], - prompt_function="mmlu_nutrition", + prompt_function=prompt.mmlu_nutrition, hf_repo="cais/mmlu", hf_subset="nutrition", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13249,7 +13250,7 @@ mmlu_nutrition_leaderboard = LightevalTaskConfig( name="mmlu:nutrition", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="nutrition", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13267,7 +13268,7 @@ mmlu_nutrition_helm = LightevalTaskConfig( name="mmlu:nutrition", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="nutrition", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13285,7 +13286,7 @@ mmlu_philosophy_original = LightevalTaskConfig( name="mmlu:philosophy", suite=["original", "mmlu"], - prompt_function="mmlu_philosophy", + prompt_function=prompt.mmlu_philosophy, hf_repo="cais/mmlu", hf_subset="philosophy", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13303,7 +13304,7 @@ mmlu_philosophy_leaderboard = LightevalTaskConfig( name="mmlu:philosophy", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="philosophy", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13321,7 +13322,7 @@ mmlu_philosophy_helm = LightevalTaskConfig( name="mmlu:philosophy", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="philosophy", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13339,7 +13340,7 @@ mmlu_prehistory_original = LightevalTaskConfig( name="mmlu:prehistory", suite=["original", "mmlu"], - prompt_function="mmlu_prehistory", + prompt_function=prompt.mmlu_prehistory, hf_repo="cais/mmlu", hf_subset="prehistory", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13357,7 +13358,7 @@ mmlu_prehistory_leaderboard = LightevalTaskConfig( name="mmlu:prehistory", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="prehistory", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13375,7 +13376,7 @@ mmlu_prehistory_helm = LightevalTaskConfig( name="mmlu:prehistory", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="prehistory", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13393,7 +13394,7 @@ mmlu_professional_accounting_original = LightevalTaskConfig( name="mmlu:professional_accounting", suite=["original", "mmlu"], - prompt_function="mmlu_professional_accounting", + prompt_function=prompt.mmlu_professional_accounting, hf_repo="cais/mmlu", hf_subset="professional_accounting", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13411,7 +13412,7 @@ mmlu_professional_accounting_leaderboard = LightevalTaskConfig( name="mmlu:professional_accounting", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="professional_accounting", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13429,7 +13430,7 @@ mmlu_professional_accounting_helm = LightevalTaskConfig( name="mmlu:professional_accounting", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="professional_accounting", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13447,7 +13448,7 @@ mmlu_professional_law_original = LightevalTaskConfig( name="mmlu:professional_law", suite=["original", "mmlu"], - prompt_function="mmlu_professional_law", + prompt_function=prompt.mmlu_professional_law, hf_repo="cais/mmlu", hf_subset="professional_law", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13465,7 +13466,7 @@ mmlu_professional_law_leaderboard = LightevalTaskConfig( name="mmlu:professional_law", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="professional_law", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13483,7 +13484,7 @@ mmlu_professional_law_helm = LightevalTaskConfig( name="mmlu:professional_law", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="professional_law", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13501,7 +13502,7 @@ mmlu_professional_medicine_original = LightevalTaskConfig( name="mmlu:professional_medicine", suite=["original", "mmlu"], - prompt_function="mmlu_professional_medicine", + prompt_function=prompt.mmlu_professional_medicine, hf_repo="cais/mmlu", hf_subset="professional_medicine", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13519,7 +13520,7 @@ mmlu_professional_medicine_leaderboard = LightevalTaskConfig( name="mmlu:professional_medicine", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="professional_medicine", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13537,7 +13538,7 @@ mmlu_professional_medicine_helm = LightevalTaskConfig( name="mmlu:professional_medicine", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="professional_medicine", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13555,7 +13556,7 @@ mmlu_professional_psychology_original = LightevalTaskConfig( name="mmlu:professional_psychology", suite=["original", "mmlu"], - prompt_function="mmlu_professional_psychology", + prompt_function=prompt.mmlu_professional_psychology, hf_repo="cais/mmlu", hf_subset="professional_psychology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13573,7 +13574,7 @@ mmlu_professional_psychology_leaderboard = LightevalTaskConfig( name="mmlu:professional_psychology", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="professional_psychology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13591,7 +13592,7 @@ mmlu_professional_psychology_helm = LightevalTaskConfig( name="mmlu:professional_psychology", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="professional_psychology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13609,7 +13610,7 @@ mmlu_public_relations_original = LightevalTaskConfig( name="mmlu:public_relations", suite=["original", "mmlu"], - prompt_function="mmlu_public_relations", + prompt_function=prompt.mmlu_public_relations, hf_repo="cais/mmlu", hf_subset="public_relations", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13627,7 +13628,7 @@ mmlu_public_relations_leaderboard = LightevalTaskConfig( name="mmlu:public_relations", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="public_relations", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13645,7 +13646,7 @@ mmlu_public_relations_helm = LightevalTaskConfig( name="mmlu:public_relations", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="public_relations", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13663,7 +13664,7 @@ mmlu_security_studies_original = LightevalTaskConfig( name="mmlu:security_studies", suite=["original", "mmlu"], - prompt_function="mmlu_security_studies", + prompt_function=prompt.mmlu_security_studies, hf_repo="cais/mmlu", hf_subset="security_studies", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13681,7 +13682,7 @@ mmlu_security_studies_leaderboard = LightevalTaskConfig( name="mmlu:security_studies", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="security_studies", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13699,7 +13700,7 @@ mmlu_security_studies_helm = LightevalTaskConfig( name="mmlu:security_studies", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="security_studies", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13717,7 +13718,7 @@ mmlu_sociology_original = LightevalTaskConfig( name="mmlu:sociology", suite=["original", "mmlu"], - prompt_function="mmlu_sociology", + prompt_function=prompt.mmlu_sociology, hf_repo="cais/mmlu", hf_subset="sociology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13735,7 +13736,7 @@ mmlu_sociology_leaderboard = LightevalTaskConfig( name="mmlu:sociology", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="sociology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13753,7 +13754,7 @@ mmlu_sociology_helm = LightevalTaskConfig( name="mmlu:sociology", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="sociology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13771,7 +13772,7 @@ mmlu_us_foreign_policy_original = LightevalTaskConfig( name="mmlu:us_foreign_policy", suite=["original", "mmlu"], - prompt_function="mmlu_us_foreign_policy", + prompt_function=prompt.mmlu_us_foreign_policy, hf_repo="cais/mmlu", hf_subset="us_foreign_policy", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13789,7 +13790,7 @@ mmlu_us_foreign_policy_leaderboard = LightevalTaskConfig( name="mmlu:us_foreign_policy", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="us_foreign_policy", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13807,7 +13808,7 @@ mmlu_us_foreign_policy_helm = LightevalTaskConfig( name="mmlu:us_foreign_policy", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="us_foreign_policy", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13825,7 +13826,7 @@ mmlu_virology_original = LightevalTaskConfig( name="mmlu:virology", suite=["original", "mmlu"], - prompt_function="mmlu_virology", + prompt_function=prompt.mmlu_virology, hf_repo="cais/mmlu", hf_subset="virology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13843,7 +13844,7 @@ mmlu_virology_leaderboard = LightevalTaskConfig( name="mmlu:virology", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="virology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13861,7 +13862,7 @@ mmlu_virology_helm = LightevalTaskConfig( name="mmlu:virology", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="virology", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13879,7 +13880,7 @@ mmlu_world_religions_original = LightevalTaskConfig( name="mmlu:world_religions", suite=["original", "mmlu"], - prompt_function="mmlu_world_religions", + prompt_function=prompt.mmlu_world_religions, hf_repo="cais/mmlu", hf_subset="world_religions", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13897,7 +13898,7 @@ mmlu_world_religions_leaderboard = LightevalTaskConfig( name="mmlu:world_religions", suite=["leaderboard", "mmlu"], - prompt_function="mmlu_harness", + prompt_function=prompt.mmlu_harness, hf_repo="lighteval/mmlu", hf_subset="world_religions", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13915,7 +13916,7 @@ mmlu_world_religions_helm = LightevalTaskConfig( name="mmlu:world_religions", suite=["helm", "helm_general"], - prompt_function="mmlu_helm", + prompt_function=prompt.mmlu_helm, hf_repo="lighteval/mmlu", hf_subset="world_religions", hf_avail_splits=["auxiliary_train", "test", "validation", "dev"], @@ -13933,7 +13934,7 @@ mnist_ascii_bigbench = LightevalTaskConfig( name="mnist_ascii", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="mnist_ascii", hf_avail_splits=["default", "train", "validation"], @@ -13951,7 +13952,7 @@ modified_arithmetic_bigbench = LightevalTaskConfig( name="modified_arithmetic", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="modified_arithmetic", hf_avail_splits=["default", "train", "validation"], @@ -13969,7 +13970,7 @@ moral_permissibility_bigbench = LightevalTaskConfig( name="moral_permissibility", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="moral_permissibility", hf_avail_splits=["default", "train", "validation"], @@ -13987,7 +13988,7 @@ movie_dialog_same_or_different_bigbench = LightevalTaskConfig( name="movie_dialog_same_or_different", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="movie_dialog_same_or_different", hf_avail_splits=["default", "train", "validation"], @@ -14005,7 +14006,7 @@ movie_recommendation_bigbench = LightevalTaskConfig( name="movie_recommendation", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="movie_recommendation", hf_avail_splits=["default", "train", "validation"], @@ -14023,7 +14024,7 @@ mtnt2019_en_fr_lighteval = LightevalTaskConfig( name="mtnt2019:en-fr", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="mtnt2019_en-fr", hf_avail_splits=["test"], @@ -14041,7 +14042,7 @@ mtnt2019_en_ja_lighteval = LightevalTaskConfig( name="mtnt2019:en-ja", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="mtnt2019_en-ja", hf_avail_splits=["test"], @@ -14059,7 +14060,7 @@ mtnt2019_fr_en_lighteval = LightevalTaskConfig( name="mtnt2019:fr-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="mtnt2019_fr-en", hf_avail_splits=["test"], @@ -14077,7 +14078,7 @@ mtnt2019_ja_en_lighteval = LightevalTaskConfig( name="mtnt2019:ja-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="mtnt2019_ja-en", hf_avail_splits=["test"], @@ -14095,7 +14096,7 @@ mult_data_wrangling_bigbench = LightevalTaskConfig( name="mult_data_wrangling", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="mult_data_wrangling", hf_avail_splits=["default", "train", "validation"], @@ -14113,7 +14114,7 @@ multiemo_bigbench = LightevalTaskConfig( name="multiemo", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="multiemo", hf_avail_splits=["default", "train", "validation"], @@ -14131,7 +14132,7 @@ mutual_lighteval = LightevalTaskConfig( name="mutual", suite=["lighteval"], - prompt_function="mutual", + prompt_function=prompt.mutual, hf_repo="lighteval/mutual_harness", hf_subset="mutual", hf_avail_splits=["train", "validation"], @@ -14149,7 +14150,7 @@ mutual_plus_lighteval = LightevalTaskConfig( name="mutual_plus", suite=["lighteval"], - prompt_function="mutual", + prompt_function=prompt.mutual, hf_repo="lighteval/mutual_harness", hf_subset="mutual_plus", hf_avail_splits=["train", "validation"], @@ -14167,7 +14168,7 @@ narrativeqa_helm = LightevalTaskConfig( name="narrativeqa", suite=["helm", "helm_general"], - prompt_function="narrativeqa", + prompt_function=prompt.narrativeqa, hf_repo="lighteval/narrative_qa_helm", hf_subset="default", hf_avail_splits=["train", "test", "validation"], @@ -14185,7 +14186,7 @@ natural_instructions_bigbench = LightevalTaskConfig( name="natural_instructions", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="natural_instructions", hf_avail_splits=["default", "train", "validation"], @@ -14203,7 +14204,7 @@ navigate_bigbench = LightevalTaskConfig( name="navigate", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="navigate", hf_avail_splits=["default", "train", "validation"], @@ -14221,7 +14222,7 @@ nonsense_words_grammar_bigbench = LightevalTaskConfig( name="nonsense_words_grammar", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="nonsense_words_grammar", hf_avail_splits=["default", "train", "validation"], @@ -14239,7 +14240,7 @@ novel_concepts_bigbench_lite = LightevalTaskConfig( name="novel_concepts", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="novel_concepts", hf_avail_splits=["default", "train", "validation"], @@ -14257,7 +14258,7 @@ numeracy_linear_example_helm = LightevalTaskConfig( name="numeracy:linear_example", suite=["helm"], - prompt_function="numeracy", + prompt_function=prompt.numeracy, hf_repo="lighteval/numeracy", hf_subset="linear_example", hf_avail_splits=["train", "test"], @@ -14275,7 +14276,7 @@ numeracy_linear_standard_helm = LightevalTaskConfig( name="numeracy:linear_standard", suite=["helm"], - prompt_function="numeracy", + prompt_function=prompt.numeracy, hf_repo="lighteval/numeracy", hf_subset="linear_standard", hf_avail_splits=["train", "test"], @@ -14293,7 +14294,7 @@ numeracy_parabola_example_helm = LightevalTaskConfig( name="numeracy:parabola_example", suite=["helm"], - prompt_function="numeracy", + prompt_function=prompt.numeracy, hf_repo="lighteval/numeracy", hf_subset="parabola_example", hf_avail_splits=["train", "test"], @@ -14311,7 +14312,7 @@ numeracy_parabola_standard_helm = LightevalTaskConfig( name="numeracy:parabola_standard", suite=["helm"], - prompt_function="numeracy", + prompt_function=prompt.numeracy, hf_repo="lighteval/numeracy", hf_subset="parabola_standard", hf_avail_splits=["train", "test"], @@ -14329,7 +14330,7 @@ numeracy_paraboloid_example_helm = LightevalTaskConfig( name="numeracy:paraboloid_example", suite=["helm"], - prompt_function="numeracy", + prompt_function=prompt.numeracy, hf_repo="lighteval/numeracy", hf_subset="paraboloid_example", hf_avail_splits=["train", "test"], @@ -14347,7 +14348,7 @@ numeracy_paraboloid_standard_helm = LightevalTaskConfig( name="numeracy:paraboloid_standard", suite=["helm"], - prompt_function="numeracy", + prompt_function=prompt.numeracy, hf_repo="lighteval/numeracy", hf_subset="paraboloid_standard", hf_avail_splits=["train", "test"], @@ -14365,7 +14366,7 @@ numeracy_plane_example_helm = LightevalTaskConfig( name="numeracy:plane_example", suite=["helm"], - prompt_function="numeracy", + prompt_function=prompt.numeracy, hf_repo="lighteval/numeracy", hf_subset="plane_example", hf_avail_splits=["train", "test"], @@ -14383,7 +14384,7 @@ numeracy_plane_standard_helm = LightevalTaskConfig( name="numeracy:plane_standard", suite=["helm"], - prompt_function="numeracy", + prompt_function=prompt.numeracy, hf_repo="lighteval/numeracy", hf_subset="plane_standard", hf_avail_splits=["train", "test"], @@ -14401,7 +14402,7 @@ object_counting_bigbench = LightevalTaskConfig( name="object_counting", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="object_counting", hf_avail_splits=["default", "train", "validation"], @@ -14419,7 +14420,7 @@ odd_one_out_bigbench = LightevalTaskConfig( name="odd_one_out", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="odd_one_out", hf_avail_splits=["default", "train", "validation"], @@ -14437,7 +14438,7 @@ openbookqa_helm = LightevalTaskConfig( name="openbookqa", suite=["helm", "commonsense_scenario", "helm_general"], - prompt_function="openbookqa_helm", + prompt_function=prompt.openbookqa_helm, hf_repo="openbookqa", hf_subset="main", hf_avail_splits=["train", "test", "validation"], @@ -14455,7 +14456,7 @@ openbookqa_lighteval = LightevalTaskConfig( name="openbookqa", suite=["lighteval"], - prompt_function="openbookqa", + prompt_function=prompt.openbookqa, hf_repo="openbookqa", hf_subset="main", hf_avail_splits=["train", "test", "validation"], @@ -14473,7 +14474,7 @@ operators_bigbench_lite = LightevalTaskConfig( name="operators", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_whitespace_after_query", + prompt_function=prompt.bigbench_whitespace_after_query, hf_repo="bigbench", hf_subset="operators", hf_avail_splits=["default", "train", "validation"], @@ -14490,7 +14491,7 @@ paragraph_segmentation_bigbench = LightevalTaskConfig( name="paragraph_segmentation", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="paragraph_segmentation", hf_avail_splits=["default", "train", "validation"], @@ -14508,7 +14509,7 @@ parsinlu_qa_bigbench = LightevalTaskConfig( name="parsinlu_qa", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="parsinlu_qa", hf_avail_splits=["default", "train", "validation"], @@ -14526,7 +14527,7 @@ parsinlu_reading_comprehension_bigbench_lite = LightevalTaskConfig( name="parsinlu_reading_comprehension", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_linefeed_before_whitespace_after_query", + prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query, hf_repo="bigbench", hf_subset="parsinlu_reading_comprehension", hf_avail_splits=["default", "train", "validation"], @@ -14543,7 +14544,7 @@ penguins_in_a_table_bigbench = LightevalTaskConfig( name="penguins_in_a_table", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="penguins_in_a_table", hf_avail_splits=["default", "train", "validation"], @@ -14561,7 +14562,7 @@ periodic_elements_bigbench = LightevalTaskConfig( name="periodic_elements", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="periodic_elements", hf_avail_splits=["default", "train", "validation"], @@ -14579,7 +14580,7 @@ persian_idioms_bigbench = LightevalTaskConfig( name="persian_idioms", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="persian_idioms", hf_avail_splits=["default", "train", "validation"], @@ -14597,7 +14598,7 @@ phrase_relatedness_bigbench = LightevalTaskConfig( name="phrase_relatedness", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="phrase_relatedness", hf_avail_splits=["default", "train", "validation"], @@ -14615,7 +14616,7 @@ physical_intuition_bigbench = LightevalTaskConfig( name="physical_intuition", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="physical_intuition", hf_avail_splits=["default", "train", "validation"], @@ -14633,7 +14634,7 @@ physics_bigbench = LightevalTaskConfig( name="physics", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="physics", hf_avail_splits=["default", "train", "validation"], @@ -14651,7 +14652,7 @@ physics_questions_bigbench = LightevalTaskConfig( name="physics_questions", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="physics_questions", hf_avail_splits=["default", "train", "validation"], @@ -14669,7 +14670,7 @@ piqa_lighteval = LightevalTaskConfig( name="piqa", suite=["lighteval"], - prompt_function="piqa_harness", + prompt_function=prompt.piqa_harness, hf_repo="piqa", hf_subset="plain_text", hf_avail_splits=["train", "test", "validation"], @@ -14687,7 +14688,7 @@ piqa_helm = LightevalTaskConfig( name="piqa", suite=["helm", "commonsense_scenario"], - prompt_function="piqa_helm", + prompt_function=prompt.piqa_helm, hf_repo="piqa", hf_subset="plain_text", hf_avail_splits=["train", "test", "validation"], @@ -14705,7 +14706,7 @@ play_dialog_same_or_different_bigbench_lite = LightevalTaskConfig( name="play_dialog_same_or_different", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_linefeed_before_whitespace_after_query", + prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query, hf_repo="bigbench", hf_subset="play_dialog_same_or_different", hf_avail_splits=["default", "train", "validation"], @@ -14723,7 +14724,7 @@ polish_sequence_labeling_bigbench = LightevalTaskConfig( name="polish_sequence_labeling", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="polish_sequence_labeling", hf_avail_splits=["default", "train", "validation"], @@ -14741,7 +14742,7 @@ presuppositions_as_nli_bigbench = LightevalTaskConfig( name="presuppositions_as_nli", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="presuppositions_as_nli", hf_avail_splits=["default", "train", "validation"], @@ -14759,7 +14760,7 @@ prost_lighteval = LightevalTaskConfig( name="prost", suite=["lighteval"], - prompt_function="prost", + prompt_function=prompt.prost, hf_repo="corypaik/prost", hf_subset="default", hf_avail_splits=["test"], @@ -14777,7 +14778,7 @@ pubmedqa_lighteval = LightevalTaskConfig( name="pubmedqa", suite=["lighteval"], - prompt_function="pubmed_qa", + prompt_function=prompt.pubmed_qa, hf_repo="pubmed_qa", hf_subset="pqa_labeled", hf_avail_splits=["train"], @@ -14795,7 +14796,7 @@ pubmedqa_helm = LightevalTaskConfig( name="pubmedqa", suite=["helm"], - prompt_function="pubmed_qa_helm", + prompt_function=prompt.pubmed_qa_helm, hf_repo="pubmed_qa", hf_subset="pqa_labeled", hf_avail_splits=["train"], @@ -14813,7 +14814,7 @@ qa4mre_2011_lighteval = LightevalTaskConfig( name="qa4mre:2011", suite=["lighteval"], - prompt_function="qa4mre", + prompt_function=prompt.qa4mre, hf_repo="qa4mre", hf_subset="2011.main.EN", hf_avail_splits=["train"], @@ -14831,7 +14832,7 @@ qa4mre_2012_lighteval = LightevalTaskConfig( name="qa4mre:2012", suite=["lighteval"], - prompt_function="qa4mre", + prompt_function=prompt.qa4mre, hf_repo="qa4mre", hf_subset="2012.main.EN", hf_avail_splits=["train"], @@ -14849,7 +14850,7 @@ qa4mre_2013_lighteval = LightevalTaskConfig( name="qa4mre:2013", suite=["lighteval"], - prompt_function="qa4mre", + prompt_function=prompt.qa4mre, hf_repo="qa4mre", hf_subset="2013.main.EN", hf_avail_splits=["train"], @@ -14867,7 +14868,7 @@ qa_wikidata_bigbench = LightevalTaskConfig( name="qa_wikidata", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="qa_wikidata", hf_avail_splits=["default", "train", "validation"], @@ -14885,7 +14886,7 @@ qasper_lighteval = LightevalTaskConfig( name="qasper", suite=["lighteval"], - prompt_function="qasper", + prompt_function=prompt.qasper, hf_repo="qasper", hf_subset="qasper", hf_avail_splits=["train", "validation"], @@ -14903,7 +14904,7 @@ qasper_ll_lighteval = LightevalTaskConfig( name="qasper_ll", suite=["lighteval"], - prompt_function="qasper_ll", + prompt_function=prompt.qasper_ll, hf_repo="qasper", hf_subset="qasper", hf_avail_splits=["train", "validation"], @@ -14921,7 +14922,7 @@ quac_helm = LightevalTaskConfig( name="quac", suite=["helm"], - prompt_function="quac", + prompt_function=prompt.quac, hf_repo="lighteval/quac_helm", hf_subset="default", hf_avail_splits=["train", "validation"], @@ -14939,7 +14940,7 @@ question_selection_bigbench = LightevalTaskConfig( name="question_selection", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="question_selection", hf_avail_splits=["default", "train", "validation"], @@ -14957,7 +14958,7 @@ race_high_lighteval = LightevalTaskConfig( name="race:high", suite=["lighteval", "race"], - prompt_function="race", + prompt_function=prompt.race, hf_repo="EleutherAI/race", hf_subset="high", hf_avail_splits=["test"], @@ -14975,7 +14976,7 @@ raft_ade_corpus_v2_helm = LightevalTaskConfig( name="raft:ade_corpus_v2", suite=["helm", "helm_general"], - prompt_function="raft_ade_corpus_v2", + prompt_function=prompt.raft_ade_corpus_v2, hf_repo="ought/raft", hf_subset="ade_corpus_v2", hf_avail_splits=["train", "test"], @@ -15000,7 +15001,7 @@ raft_banking_77_helm = LightevalTaskConfig( name="raft:banking_77", suite=["helm", "helm_general"], - prompt_function="raft_banking_77", + prompt_function=prompt.raft_banking_77, hf_repo="ought/raft", hf_subset="banking_77", hf_avail_splits=["train", "test"], @@ -15025,7 +15026,7 @@ raft_neurips_impact_statement_risks_helm = LightevalTaskConfig( name="raft:neurips_impact_statement_risks", suite=["helm", "helm_general"], - prompt_function="raft_neurips_impact_statement_risks", + prompt_function=prompt.raft_neurips_impact_statement_risks, hf_repo="ought/raft", hf_subset="neurips_impact_statement_risks", hf_avail_splits=["train", "test"], @@ -15050,7 +15051,7 @@ raft_one_stop_english_helm = LightevalTaskConfig( name="raft:one_stop_english", suite=["helm", "helm_general"], - prompt_function="raft_one_stop_english", + prompt_function=prompt.raft_one_stop_english, hf_repo="ought/raft", hf_subset="one_stop_english", hf_avail_splits=["train", "test"], @@ -15075,7 +15076,7 @@ raft_overruling_helm = LightevalTaskConfig( name="raft:overruling", suite=["helm", "helm_general"], - prompt_function="raft_overruling", + prompt_function=prompt.raft_overruling, hf_repo="ought/raft", hf_subset="overruling", hf_avail_splits=["train", "test"], @@ -15100,7 +15101,7 @@ raft_semiconductor_org_types_helm = LightevalTaskConfig( name="raft:semiconductor_org_types", suite=["helm", "helm_general"], - prompt_function="raft_semiconductor_org_types", + prompt_function=prompt.raft_semiconductor_org_types, hf_repo="ought/raft", hf_subset="semiconductor_org_types", hf_avail_splits=["train", "test"], @@ -15125,7 +15126,7 @@ raft_systematic_review_inclusion_helm = LightevalTaskConfig( name="raft:systematic_review_inclusion", suite=["helm", "helm_general"], - prompt_function="raft_systematic_review_inclusion", + prompt_function=prompt.raft_systematic_review_inclusion, hf_repo="ought/raft", hf_subset="systematic_review_inclusion", hf_avail_splits=["train", "test"], @@ -15150,7 +15151,7 @@ raft_tai_safety_research_helm = LightevalTaskConfig( name="raft:tai_safety_research", suite=["helm", "helm_general"], - prompt_function="raft_tai_safety_research", + prompt_function=prompt.raft_tai_safety_research, hf_repo="ought/raft", hf_subset="tai_safety_research", hf_avail_splits=["train", "test"], @@ -15175,7 +15176,7 @@ raft_terms_of_service_helm = LightevalTaskConfig( name="raft:terms_of_service", suite=["helm", "helm_general"], - prompt_function="raft_terms_of_service", + prompt_function=prompt.raft_terms_of_service, hf_repo="ought/raft", hf_subset="terms_of_service", hf_avail_splits=["train", "test"], @@ -15200,7 +15201,7 @@ raft_tweet_eval_hate_helm = LightevalTaskConfig( name="raft:tweet_eval_hate", suite=["helm", "helm_general"], - prompt_function="raft_tweet_eval_hate", + prompt_function=prompt.raft_tweet_eval_hate, hf_repo="ought/raft", hf_subset="tweet_eval_hate", hf_avail_splits=["train", "test"], @@ -15225,7 +15226,7 @@ raft_twitter_complaints_helm = LightevalTaskConfig( name="raft:twitter_complaints", suite=["helm", "helm_general"], - prompt_function="raft_twitter_complaints", + prompt_function=prompt.raft_twitter_complaints, hf_repo="ought/raft", hf_subset="twitter_complaints", hf_avail_splits=["train", "test"], @@ -15250,7 +15251,7 @@ real_or_fake_text_bigbench = LightevalTaskConfig( name="real_or_fake_text", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="real_or_fake_text", hf_avail_splits=["default", "train", "validation"], @@ -15268,7 +15269,7 @@ real_toxicity_prompts_helm = LightevalTaskConfig( name="real_toxicity_prompts", suite=["helm"], - prompt_function="real_toxicity_prompts", + prompt_function=prompt.real_toxicity_prompts, hf_repo="allenai/real-toxicity-prompts", hf_subset="default", hf_avail_splits=["train"], @@ -15286,7 +15287,7 @@ reasoning_about_colored_objects_bigbench = LightevalTaskConfig( name="reasoning_about_colored_objects", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="reasoning_about_colored_objects", hf_avail_splits=["default", "train", "validation"], @@ -15304,7 +15305,7 @@ repeat_copy_logic_bigbench_lite = LightevalTaskConfig( name="repeat_copy_logic", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_whitespace_after_query", + prompt_function=prompt.bigbench_whitespace_after_query, hf_repo="bigbench", hf_subset="repeat_copy_logic", hf_avail_splits=["default", "train", "validation"], @@ -15322,7 +15323,7 @@ rephrase_bigbench = LightevalTaskConfig( name="rephrase", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="rephrase", hf_avail_splits=["default", "train", "validation"], @@ -15340,7 +15341,7 @@ rhyming_bigbench = LightevalTaskConfig( name="rhyming", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="rhyming", hf_avail_splits=["default", "train", "validation"], @@ -15358,7 +15359,7 @@ riddle_sense_bigbench = LightevalTaskConfig( name="riddle_sense", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="riddle_sense", hf_avail_splits=["default", "train", "validation"], @@ -15376,7 +15377,7 @@ ruin_names_bigbench = LightevalTaskConfig( name="ruin_names", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="ruin_names", hf_avail_splits=["default", "train", "validation"], @@ -15394,7 +15395,7 @@ salient_translation_error_detection_bigbench = LightevalTaskConfig( name="salient_translation_error_detection", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="salient_translation_error_detection", hf_avail_splits=["default", "train", "validation"], @@ -15412,7 +15413,7 @@ scientific_press_release_bigbench = LightevalTaskConfig( name="scientific_press_release", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="scientific_press_release", hf_avail_splits=["default", "train", "validation"], @@ -15430,7 +15431,7 @@ sciq_lighteval = LightevalTaskConfig( name="sciq", suite=["lighteval"], - prompt_function="sciq", + prompt_function=prompt.sciq, hf_repo="sciq", hf_subset="default", hf_avail_splits=["train", "validation", "test"], @@ -15448,7 +15449,7 @@ semantic_parsing_in_context_sparc_bigbench = LightevalTaskConfig( name="semantic_parsing_in_context_sparc", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="semantic_parsing_in_context_sparc", hf_avail_splits=["default", "train", "validation"], @@ -15466,7 +15467,7 @@ semantic_parsing_spider_bigbench = LightevalTaskConfig( name="semantic_parsing_spider", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="semantic_parsing_spider", hf_avail_splits=["default", "train", "validation"], @@ -15484,7 +15485,7 @@ sentence_ambiguity_bigbench = LightevalTaskConfig( name="sentence_ambiguity", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="sentence_ambiguity", hf_avail_splits=["default", "train", "validation"], @@ -15502,7 +15503,7 @@ similarities_abstraction_bigbench = LightevalTaskConfig( name="similarities_abstraction", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="similarities_abstraction", hf_avail_splits=["default", "train", "validation"], @@ -15520,7 +15521,7 @@ simp_turing_concept_bigbench = LightevalTaskConfig( name="simp_turing_concept", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="simp_turing_concept", hf_avail_splits=["default", "train", "validation"], @@ -15538,7 +15539,7 @@ simple_arithmetic_json_bigbench = LightevalTaskConfig( name="simple_arithmetic_json", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="simple_arithmetic_json", hf_avail_splits=["default", "train", "validation"], @@ -15556,7 +15557,7 @@ simple_arithmetic_json_multiple_choice_bigbench = LightevalTaskConfig( name="simple_arithmetic_json_multiple_choice", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="simple_arithmetic_json_multiple_choice", hf_avail_splits=["default", "train", "validation"], @@ -15574,7 +15575,7 @@ simple_arithmetic_json_subtasks_bigbench = LightevalTaskConfig( name="simple_arithmetic_json_subtasks", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="simple_arithmetic_json_subtasks", hf_avail_splits=["default", "train", "validation"], @@ -15592,7 +15593,7 @@ simple_arithmetic_multiple_targets_json_bigbench = LightevalTaskConfig( name="simple_arithmetic_multiple_targets_json", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="simple_arithmetic_multiple_targets_json", hf_avail_splits=["default", "train", "validation"], @@ -15610,7 +15611,7 @@ simple_ethical_questions_bigbench = LightevalTaskConfig( name="simple_ethical_questions", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="simple_ethical_questions", hf_avail_splits=["default", "train", "validation"], @@ -15628,7 +15629,7 @@ simple_text_editing_bigbench = LightevalTaskConfig( name="simple_text_editing", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="simple_text_editing", hf_avail_splits=["default", "train", "validation"], @@ -15646,7 +15647,7 @@ siqa_helm = LightevalTaskConfig( name="siqa", suite=["helm", "commonsense_scenario"], - prompt_function="siqa", + prompt_function=prompt.siqa, hf_repo="social_i_qa", hf_subset="default", hf_avail_splits=["train", "validation"], @@ -15664,7 +15665,7 @@ snarks_bigbench = LightevalTaskConfig( name="snarks", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="snarks", hf_avail_splits=["default", "train", "validation"], @@ -15682,7 +15683,7 @@ social_iqa_bigbench = LightevalTaskConfig( name="social_iqa", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="social_iqa", hf_avail_splits=["default", "train", "validation"], @@ -15700,7 +15701,7 @@ social_support_bigbench = LightevalTaskConfig( name="social_support", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="social_support", hf_avail_splits=["default", "train", "validation"], @@ -15718,7 +15719,7 @@ sports_understanding_bigbench = LightevalTaskConfig( name="sports_understanding", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="sports_understanding", hf_avail_splits=["default", "train", "validation"], @@ -15736,7 +15737,7 @@ storycloze_2016_lighteval = LightevalTaskConfig( name="storycloze:2016", suite=["lighteval", "storycloze"], - prompt_function="storycloze", + prompt_function=prompt.storycloze, hf_repo="story_cloze", hf_subset="2016", hf_avail_splits=["validation"], @@ -15754,7 +15755,7 @@ storycloze_2018_lighteval = LightevalTaskConfig( name="storycloze:2018", suite=["lighteval", "storycloze"], - prompt_function="storycloze", + prompt_function=prompt.storycloze, hf_repo="story_cloze", hf_subset="2018", hf_avail_splits=["validation"], @@ -15772,7 +15773,7 @@ strange_stories_bigbench_lite = LightevalTaskConfig( name="strange_stories", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_whitespace_after_query", + prompt_function=prompt.bigbench_whitespace_after_query, hf_repo="bigbench", hf_subset="strange_stories", hf_avail_splits=["default", "train", "validation"], @@ -15790,7 +15791,7 @@ strategyqa_bigbench_lite = LightevalTaskConfig( name="strategyqa", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_linefeed_before_whitespace_after_query", + prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query, hf_repo="bigbench", hf_subset="strategyqa", hf_avail_splits=["default", "train", "validation"], @@ -15808,7 +15809,7 @@ sufficient_information_bigbench = LightevalTaskConfig( name="sufficient_information", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="sufficient_information", hf_avail_splits=["default", "train", "validation"], @@ -15826,7 +15827,7 @@ suicide_risk_bigbench = LightevalTaskConfig( name="suicide_risk", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="suicide_risk", hf_avail_splits=["default", "train", "validation"], @@ -15844,7 +15845,7 @@ summarization_cnn_dm_helm = LightevalTaskConfig( name="summarization:cnn-dm", suite=["helm", "helm_general"], - prompt_function="cnn_dm", + prompt_function=prompt.cnn_dm, hf_repo="lighteval/summarization", hf_subset="cnn-dm", hf_avail_splits=["train", "test", "validation"], @@ -15862,7 +15863,7 @@ summarization_xsum_helm = LightevalTaskConfig( name="summarization:xsum", suite=["helm", "helm_general"], - prompt_function="xsum", + prompt_function=prompt.xsum, hf_repo="lighteval/summarization", hf_subset="xsum", hf_avail_splits=["train", "test", "validation"], @@ -15880,7 +15881,7 @@ summarization_xsum_sampled_helm = LightevalTaskConfig( name="summarization:xsum-sampled", suite=["helm"], - prompt_function="xsum", + prompt_function=prompt.xsum, hf_repo="lighteval/summarization", hf_subset="xsum-sampled", hf_avail_splits=["train", "test", "validation"], @@ -15898,7 +15899,7 @@ super_glue_boolq_lighteval = LightevalTaskConfig( name="super_glue:boolq", suite=["lighteval", "superglue"], - prompt_function="boolq_harness", + prompt_function=prompt.boolq_harness, hf_repo="super_glue", hf_subset="boolq", hf_avail_splits=["test", "train", "validation"], @@ -15916,7 +15917,7 @@ super_glue_cb_lighteval = LightevalTaskConfig( name="super_glue:cb", suite=["lighteval", "superglue"], - prompt_function="cb", + prompt_function=prompt.cb, hf_repo="super_glue", hf_subset="cb", hf_avail_splits=["test", "train", "validation"], @@ -15934,7 +15935,7 @@ super_glue_copa_lighteval = LightevalTaskConfig( name="super_glue:copa", suite=["lighteval", "superglue"], - prompt_function="copa", + prompt_function=prompt.copa, hf_repo="super_glue", hf_subset="copa", hf_avail_splits=["test", "train", "validation"], @@ -15952,7 +15953,7 @@ super_glue_rte_lighteval = LightevalTaskConfig( name="super_glue:rte", suite=["lighteval", "superglue"], - prompt_function="rte", + prompt_function=prompt.rte, hf_repo="super_glue", hf_subset="rte", hf_avail_splits=["test", "train", "validation"], @@ -15970,7 +15971,7 @@ super_glue_multirc_lighteval = LightevalTaskConfig( name="super_glue:multirc", suite=["lighteval", "superglue"], - prompt_function="multirc", + prompt_function=prompt.multirc, hf_repo="super_glue", hf_subset="multirc", hf_avail_splits=["train", "validation"], @@ -15988,7 +15989,7 @@ super_glue_wic_lighteval = LightevalTaskConfig( name="super_glue:wic", suite=["lighteval", "superglue"], - prompt_function="wic", + prompt_function=prompt.wic, hf_repo="super_glue", hf_subset="wic", hf_avail_splits=["test", "train", "validation"], @@ -16006,7 +16007,7 @@ super_glue_wsc_lighteval = LightevalTaskConfig( name="super_glue:wsc", suite=["lighteval", "superglue"], - prompt_function="wsc", + prompt_function=prompt.wsc, hf_repo="super_glue", hf_subset="wsc", hf_avail_splits=["test", "train", "validation"], @@ -16024,7 +16025,7 @@ swahili_english_proverbs_bigbench = LightevalTaskConfig( name="swahili_english_proverbs", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="swahili_english_proverbs", hf_avail_splits=["default", "train", "validation"], @@ -16042,7 +16043,7 @@ swag_lighteval = LightevalTaskConfig( name="swag", suite=["lighteval"], - prompt_function="swag", + prompt_function=prompt.swag, hf_repo="swag", hf_subset="regular", hf_avail_splits=["train", "validation"], @@ -16060,7 +16061,7 @@ swedish_to_german_proverbs_bigbench = LightevalTaskConfig( name="swedish_to_german_proverbs", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="swedish_to_german_proverbs", hf_avail_splits=["default", "train", "validation"], @@ -16078,7 +16079,7 @@ symbol_interpretation_bigbench_lite = LightevalTaskConfig( name="symbol_interpretation", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_linefeed_before_whitespace_after_query", + prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query, hf_repo="bigbench", hf_subset="symbol_interpretation", hf_avail_splits=["default", "train", "validation"], @@ -16096,7 +16097,7 @@ synthetic_reasoning_induction_helm = LightevalTaskConfig( name="synthetic_reasoning:induction", suite=["helm"], - prompt_function="synthetic_reasoning", + prompt_function=prompt.synthetic_reasoning, hf_repo="lighteval/synthetic_reasoning", hf_subset="induction", hf_avail_splits=["train", "test", "validation"], @@ -16114,7 +16115,7 @@ synthetic_reasoning_natural_easy_helm = LightevalTaskConfig( name="synthetic_reasoning:natural_easy", suite=["helm"], - prompt_function="synthetic_reasoning_natural", + prompt_function=prompt.synthetic_reasoning_natural, hf_repo="lighteval/synthetic_reasoning_natural", hf_subset="easy", hf_avail_splits=["train", "test", "validation"], @@ -16132,7 +16133,7 @@ synthetic_reasoning_natural_hard_helm = LightevalTaskConfig( name="synthetic_reasoning:natural_hard", suite=["helm"], - prompt_function="synthetic_reasoning_natural", + prompt_function=prompt.synthetic_reasoning_natural, hf_repo="lighteval/synthetic_reasoning_natural", hf_subset="hard", hf_avail_splits=["train", "test", "validation"], @@ -16150,7 +16151,7 @@ synthetic_reasoning_pattern_match_helm = LightevalTaskConfig( name="synthetic_reasoning:pattern_match", suite=["helm"], - prompt_function="synthetic_reasoning", + prompt_function=prompt.synthetic_reasoning, hf_repo="lighteval/synthetic_reasoning", hf_subset="pattern_match", hf_avail_splits=["train", "test", "validation"], @@ -16168,7 +16169,7 @@ synthetic_reasoning_variable_substitution_helm = LightevalTaskConfig( name="synthetic_reasoning:variable_substitution", suite=["helm"], - prompt_function="synthetic_reasoning", + prompt_function=prompt.synthetic_reasoning, hf_repo="lighteval/synthetic_reasoning", hf_subset="variable_substitution", hf_avail_splits=["train", "test", "validation"], @@ -16186,7 +16187,7 @@ tellmewhy_bigbench = LightevalTaskConfig( name="tellmewhy", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="tellmewhy", hf_avail_splits=["default", "train", "validation"], @@ -16204,7 +16205,7 @@ temporal_sequences_bigbench = LightevalTaskConfig( name="temporal_sequences", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="temporal_sequences", hf_avail_splits=["default", "train", "validation"], @@ -16222,7 +16223,7 @@ tense_bigbench = LightevalTaskConfig( name="tense", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="tense", hf_avail_splits=["default", "train", "validation"], @@ -16240,7 +16241,7 @@ the_pile_arxiv_lighteval = LightevalTaskConfig( name="the_pile:arxiv", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_arxiv", hf_avail_splits=["validation", "test"], @@ -16258,7 +16259,7 @@ the_pile_arxiv_helm = LightevalTaskConfig( name="the_pile:arxiv", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="arxiv", hf_avail_splits=["test"], @@ -16276,7 +16277,7 @@ the_pile_bibliotik_helm = LightevalTaskConfig( name="the_pile:bibliotik", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="bibliotik", hf_avail_splits=["test"], @@ -16294,7 +16295,7 @@ the_pile_bookcorpus2_lighteval = LightevalTaskConfig( name="the_pile:bookcorpus2", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_bookcorpus2", hf_avail_splits=["validation", "test"], @@ -16312,7 +16313,7 @@ the_pile_books3_lighteval = LightevalTaskConfig( name="the_pile:books3", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_books3", hf_avail_splits=["validation", "test"], @@ -16330,7 +16331,7 @@ the_pile_commoncrawl_helm = LightevalTaskConfig( name="the_pile:commoncrawl", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="commoncrawl", hf_avail_splits=["test"], @@ -16348,7 +16349,7 @@ the_pile_dm_mathematics_lighteval = LightevalTaskConfig( name="the_pile:dm-mathematics", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_dm-mathematics", hf_avail_splits=["validation", "test"], @@ -16366,7 +16367,7 @@ the_pile_dm_mathematics_helm = LightevalTaskConfig( name="the_pile:dm-mathematics", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="dm-mathematics", hf_avail_splits=["test"], @@ -16384,7 +16385,7 @@ the_pile_enron_lighteval = LightevalTaskConfig( name="the_pile:enron", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_enron", hf_avail_splits=["validation", "test"], @@ -16402,7 +16403,7 @@ the_pile_enron_helm = LightevalTaskConfig( name="the_pile:enron", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="enron", hf_avail_splits=["test"], @@ -16420,7 +16421,7 @@ the_pile_europarl_lighteval = LightevalTaskConfig( name="the_pile:europarl", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_europarl", hf_avail_splits=["validation", "test"], @@ -16438,7 +16439,7 @@ the_pile_europarl_helm = LightevalTaskConfig( name="the_pile:europarl", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="europarl", hf_avail_splits=["test"], @@ -16456,7 +16457,7 @@ the_pile_freelaw_lighteval = LightevalTaskConfig( name="the_pile:freelaw", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_freelaw", hf_avail_splits=["validation", "test"], @@ -16474,7 +16475,7 @@ the_pile_freelaw_helm = LightevalTaskConfig( name="the_pile:freelaw", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="freelaw", hf_avail_splits=["test"], @@ -16492,7 +16493,7 @@ the_pile_github_lighteval = LightevalTaskConfig( name="the_pile:github", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_github", hf_avail_splits=["validation", "test"], @@ -16510,7 +16511,7 @@ the_pile_github_helm = LightevalTaskConfig( name="the_pile:github", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="github", hf_avail_splits=["test"], @@ -16528,7 +16529,7 @@ the_pile_gutenberg_lighteval = LightevalTaskConfig( name="the_pile:gutenberg", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_gutenberg", hf_avail_splits=["validation", "test"], @@ -16546,7 +16547,7 @@ the_pile_gutenberg_helm = LightevalTaskConfig( name="the_pile:gutenberg", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="gutenberg", hf_avail_splits=["test"], @@ -16564,7 +16565,7 @@ the_pile_hackernews_lighteval = LightevalTaskConfig( name="the_pile:hackernews", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_hackernews", hf_avail_splits=["validation", "test"], @@ -16582,7 +16583,7 @@ the_pile_hackernews_helm = LightevalTaskConfig( name="the_pile:hackernews", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="hackernews", hf_avail_splits=["test"], @@ -16600,7 +16601,7 @@ the_pile_nih_exporter_lighteval = LightevalTaskConfig( name="the_pile:nih-exporter", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_nih-exporter", hf_avail_splits=["validation", "test"], @@ -16618,7 +16619,7 @@ the_pile_nih_exporter_helm = LightevalTaskConfig( name="the_pile:nih-exporter", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="nih-exporter", hf_avail_splits=["test"], @@ -16636,7 +16637,7 @@ the_pile_opensubtitles_lighteval = LightevalTaskConfig( name="the_pile:opensubtitles", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_opensubtitles", hf_avail_splits=["validation", "test"], @@ -16654,7 +16655,7 @@ the_pile_opensubtitles_helm = LightevalTaskConfig( name="the_pile:opensubtitles", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="opensubtitles", hf_avail_splits=["test"], @@ -16672,7 +16673,7 @@ the_pile_openwebtext2_lighteval = LightevalTaskConfig( name="the_pile:openwebtext2", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_openwebtext2", hf_avail_splits=["validation", "test"], @@ -16690,7 +16691,7 @@ the_pile_openwebtext2_helm = LightevalTaskConfig( name="the_pile:openwebtext2", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="openwebtext2", hf_avail_splits=["test"], @@ -16708,7 +16709,7 @@ the_pile_philpapers_lighteval = LightevalTaskConfig( name="the_pile:philpapers", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_philpapers", hf_avail_splits=["validation", "test"], @@ -16726,7 +16727,7 @@ the_pile_pile_cc_lighteval = LightevalTaskConfig( name="the_pile:pile-cc", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_pile-cc", hf_avail_splits=["validation", "test"], @@ -16744,7 +16745,7 @@ the_pile_pubmed_abstracts_lighteval = LightevalTaskConfig( name="the_pile:pubmed-abstracts", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_pubmed-abstracts", hf_avail_splits=["validation", "test"], @@ -16762,7 +16763,7 @@ the_pile_pubmed_abstracts_helm = LightevalTaskConfig( name="the_pile:pubmed-abstracts", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="pubmed-abstracts", hf_avail_splits=["test"], @@ -16780,7 +16781,7 @@ the_pile_pubmed_central_lighteval = LightevalTaskConfig( name="the_pile:pubmed-central", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_pubmed-central", hf_avail_splits=["validation", "test"], @@ -16798,7 +16799,7 @@ the_pile_pubmed_central_helm = LightevalTaskConfig( name="the_pile:pubmed-central", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="pubmed-central", hf_avail_splits=["test"], @@ -16816,7 +16817,7 @@ the_pile_stackexchange_lighteval = LightevalTaskConfig( name="the_pile:stackexchange", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_stackexchange", hf_avail_splits=["validation", "test"], @@ -16834,7 +16835,7 @@ the_pile_stackexchange_helm = LightevalTaskConfig( name="the_pile:stackexchange", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="stackexchange", hf_avail_splits=["test"], @@ -16852,7 +16853,7 @@ the_pile_ubuntu_irc_lighteval = LightevalTaskConfig( name="the_pile:ubuntu-irc", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_ubuntu-irc", hf_avail_splits=["validation", "test"], @@ -16870,7 +16871,7 @@ the_pile_uspto_lighteval = LightevalTaskConfig( name="the_pile:uspto", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_upsto", hf_avail_splits=["validation", "test"], @@ -16888,7 +16889,7 @@ the_pile_upsto_helm = LightevalTaskConfig( name="the_pile:upsto", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="uspto", hf_avail_splits=["test"], @@ -16906,7 +16907,7 @@ the_pile_wikipedia_lighteval = LightevalTaskConfig( name="the_pile:wikipedia", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_wikipedia", hf_avail_splits=["validation", "test"], @@ -16924,7 +16925,7 @@ the_pile_wikipedia_helm = LightevalTaskConfig( name="the_pile:wikipedia", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="wikipedia", hf_avail_splits=["test"], @@ -16942,7 +16943,7 @@ the_pile_youtubesubtitles_lighteval = LightevalTaskConfig( name="the_pile:youtubesubtitles", suite=["lighteval", "pile"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile", hf_subset="pile_youtubesubtitles", hf_avail_splits=["validation", "test"], @@ -16960,7 +16961,7 @@ the_pile_youtubesubtitles_helm = LightevalTaskConfig( name="the_pile:youtubesubtitles", suite=["helm"], - prompt_function="the_pile", + prompt_function=prompt.the_pile, hf_repo="lighteval/pile_helm", hf_subset="youtubesubtitles", hf_avail_splits=["test"], @@ -16978,7 +16979,7 @@ timedial_bigbench = LightevalTaskConfig( name="timedial", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="timedial", hf_avail_splits=["default", "train", "validation"], @@ -16996,7 +16997,7 @@ toxigen_lighteval = LightevalTaskConfig( name="toxigen", suite=["lighteval"], - prompt_function="toxigen", + prompt_function=prompt.toxigen, hf_repo="skg/toxigen-data", hf_subset="annotated", hf_avail_splits=["train", "test"], @@ -17014,7 +17015,7 @@ topical_chat_bigbench = LightevalTaskConfig( name="topical_chat", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="topical_chat", hf_avail_splits=["default", "train", "validation"], @@ -17032,7 +17033,7 @@ tracking_shuffled_objects_bigbench = LightevalTaskConfig( name="tracking_shuffled_objects", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="tracking_shuffled_objects", hf_avail_splits=["default", "train", "validation"], @@ -17050,7 +17051,7 @@ triviaqa_lighteval = LightevalTaskConfig( name="triviaqa", suite=["lighteval"], - prompt_function="triviaqa", + prompt_function=prompt.triviaqa, hf_repo="trivia_qa", hf_subset="rc.nocontext", hf_avail_splits=["train", "test", "validation"], @@ -17068,7 +17069,7 @@ truthfulqa_gen_lighteval = LightevalTaskConfig( name="truthfulqa:gen", suite=["lighteval"], - prompt_function="truthful_qa_generative", + prompt_function=prompt.truthful_qa_generative, hf_repo="truthful_qa", hf_subset="generation", hf_avail_splits=["validation"], @@ -17086,7 +17087,7 @@ truthfulqa_mc_leaderboard = LightevalTaskConfig( name="truthfulqa:mc", suite=["leaderboard"], - prompt_function="truthful_qa_multiple_choice", + prompt_function=prompt.truthful_qa_multiple_choice, hf_repo="truthful_qa", hf_subset="multiple_choice", hf_avail_splits=["validation"], @@ -17104,7 +17105,7 @@ truthfulqa_helm = LightevalTaskConfig( name="truthfulqa", suite=["helm", "helm_general"], - prompt_function="truthful_qa_helm", + prompt_function=prompt.truthful_qa_helm, hf_repo="lighteval/truthfulqa_helm", hf_subset="default", hf_avail_splits=["train", "valid"], @@ -17122,7 +17123,7 @@ twitterAAE_aa_helm = LightevalTaskConfig( name="twitterAAE:aa", suite=["helm"], - prompt_function="twitter_aae", + prompt_function=prompt.twitter_aae, hf_repo="lighteval/twitterAAE", hf_subset="aa", hf_avail_splits=["test"], @@ -17140,7 +17141,7 @@ twitterAAE_white_helm = LightevalTaskConfig( name="twitterAAE:white", suite=["helm"], - prompt_function="twitter_aae", + prompt_function=prompt.twitter_aae, hf_repo="lighteval/twitterAAE", hf_subset="white", hf_avail_splits=["test"], @@ -17158,7 +17159,7 @@ understanding_fables_bigbench = LightevalTaskConfig( name="understanding_fables", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="understanding_fables", hf_avail_splits=["default", "train", "validation"], @@ -17176,7 +17177,7 @@ undo_permutation_bigbench = LightevalTaskConfig( name="undo_permutation", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="undo_permutation", hf_avail_splits=["default", "train", "validation"], @@ -17194,7 +17195,7 @@ unit_conversion_bigbench = LightevalTaskConfig( name="unit_conversion", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="unit_conversion", hf_avail_splits=["default", "train", "validation"], @@ -17212,7 +17213,7 @@ unit_interpretation_bigbench = LightevalTaskConfig( name="unit_interpretation", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="unit_interpretation", hf_avail_splits=["default", "train", "validation"], @@ -17230,7 +17231,7 @@ unnatural_in_context_learning_bigbench = LightevalTaskConfig( name="unnatural_in_context_learning", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="unnatural_in_context_learning", hf_avail_splits=["default", "train", "validation"], @@ -17248,7 +17249,7 @@ unscramble_anagrams1_lighteval = LightevalTaskConfig( name="unscramble:anagrams1", suite=["lighteval", "unscramble"], - prompt_function="unscramble", + prompt_function=prompt.unscramble, hf_repo="lighteval/GPT3_unscramble", hf_subset="default", hf_avail_splits=["mid_word_1_anagrams"], @@ -17266,7 +17267,7 @@ unscramble_anagrams2_lighteval = LightevalTaskConfig( name="unscramble:anagrams2", suite=["lighteval", "unscramble"], - prompt_function="unscramble", + prompt_function=prompt.unscramble, hf_repo="lighteval/GPT3_unscramble", hf_subset="default", hf_avail_splits=["mid_word_2_anagrams"], @@ -17284,7 +17285,7 @@ unscramble_cycle_letters_lighteval = LightevalTaskConfig( name="unscramble:cycle_letters", suite=["lighteval", "unscramble"], - prompt_function="unscramble", + prompt_function=prompt.unscramble, hf_repo="lighteval/GPT3_unscramble", hf_subset="default", hf_avail_splits=["cycle_letters_in_word"], @@ -17302,7 +17303,7 @@ unscramble_random_insertion_lighteval = LightevalTaskConfig( name="unscramble:random_insertion", suite=["lighteval", "unscramble"], - prompt_function="unscramble", + prompt_function=prompt.unscramble, hf_repo="lighteval/GPT3_unscramble", hf_subset="default", hf_avail_splits=["random_insertion_in_word"], @@ -17320,7 +17321,7 @@ unscramble_reversed_words_lighteval = LightevalTaskConfig( name="unscramble:reversed_words", suite=["lighteval", "unscramble"], - prompt_function="unscramble", + prompt_function=prompt.unscramble, hf_repo="lighteval/GPT3_unscramble", hf_subset="default", hf_avail_splits=["reversed_words"], @@ -17338,7 +17339,7 @@ vitaminc_fact_verification_bigbench_lite = LightevalTaskConfig( name="vitaminc_fact_verification", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_whitespace_after_query", + prompt_function=prompt.bigbench_whitespace_after_query, hf_repo="bigbench", hf_subset="vitaminc_fact_verification", hf_avail_splits=["default", "train", "validation"], @@ -17356,7 +17357,7 @@ webqs_lighteval = LightevalTaskConfig( name="webqs", suite=["lighteval"], - prompt_function="webqs", + prompt_function=prompt.webqs, hf_repo="web_questions", hf_subset="default", hf_avail_splits=["train", "test"], @@ -17374,7 +17375,7 @@ what_is_the_tao_bigbench = LightevalTaskConfig( name="what_is_the_tao", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="what_is_the_tao", hf_avail_splits=["default", "train", "validation"], @@ -17392,7 +17393,7 @@ which_wiki_edit_bigbench = LightevalTaskConfig( name="which_wiki_edit", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="which_wiki_edit", hf_avail_splits=["default", "train", "validation"], @@ -17410,7 +17411,7 @@ wikifact_applies_to_jurisdiction_helm = LightevalTaskConfig( name="wikifact:applies_to_jurisdiction", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="applies_to_jurisdiction", hf_avail_splits=["train", "test"], @@ -17428,7 +17429,7 @@ wikifact_atomic_number_helm = LightevalTaskConfig( name="wikifact:atomic_number", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="atomic_number", hf_avail_splits=["train", "test"], @@ -17446,7 +17447,7 @@ wikifact_author_helm = LightevalTaskConfig( name="wikifact:author", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="author", hf_avail_splits=["train", "test"], @@ -17464,7 +17465,7 @@ wikifact_award_received_helm = LightevalTaskConfig( name="wikifact:award_received", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="award_received", hf_avail_splits=["train", "test"], @@ -17482,7 +17483,7 @@ wikifact_basic_form_of_government_helm = LightevalTaskConfig( name="wikifact:basic_form_of_government", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="basic_form_of_government", hf_avail_splits=["train", "test"], @@ -17500,7 +17501,7 @@ wikifact_capital_helm = LightevalTaskConfig( name="wikifact:capital", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="capital", hf_avail_splits=["train", "test"], @@ -17518,7 +17519,7 @@ wikifact_capital_of_helm = LightevalTaskConfig( name="wikifact:capital_of", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="capital_of", hf_avail_splits=["train", "test"], @@ -17536,7 +17537,7 @@ wikifact_central_bank_helm = LightevalTaskConfig( name="wikifact:central_bank", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="central_bank", hf_avail_splits=["train", "test"], @@ -17554,7 +17555,7 @@ wikifact_composer_helm = LightevalTaskConfig( name="wikifact:composer", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="composer", hf_avail_splits=["train", "test"], @@ -17572,7 +17573,7 @@ wikifact_continent_helm = LightevalTaskConfig( name="wikifact:continent", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="continent", hf_avail_splits=["train", "test"], @@ -17590,7 +17591,7 @@ wikifact_country_helm = LightevalTaskConfig( name="wikifact:country", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="country", hf_avail_splits=["train", "test"], @@ -17608,7 +17609,7 @@ wikifact_country_of_citizenship_helm = LightevalTaskConfig( name="wikifact:country_of_citizenship", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="country_of_citizenship", hf_avail_splits=["train", "test"], @@ -17626,7 +17627,7 @@ wikifact_country_of_origin_helm = LightevalTaskConfig( name="wikifact:country_of_origin", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="country_of_origin", hf_avail_splits=["train", "test"], @@ -17644,7 +17645,7 @@ wikifact_creator_helm = LightevalTaskConfig( name="wikifact:creator", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="creator", hf_avail_splits=["train", "test"], @@ -17662,7 +17663,7 @@ wikifact_currency_helm = LightevalTaskConfig( name="wikifact:currency", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="currency", hf_avail_splits=["train", "test"], @@ -17680,7 +17681,7 @@ wikifact_defendant_helm = LightevalTaskConfig( name="wikifact:defendant", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="defendant", hf_avail_splits=["train", "test"], @@ -17698,7 +17699,7 @@ wikifact_developer_helm = LightevalTaskConfig( name="wikifact:developer", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="developer", hf_avail_splits=["train", "test"], @@ -17716,7 +17717,7 @@ wikifact_diplomatic_relation_helm = LightevalTaskConfig( name="wikifact:diplomatic_relation", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="diplomatic_relation", hf_avail_splits=["train", "test"], @@ -17734,7 +17735,7 @@ wikifact_director_helm = LightevalTaskConfig( name="wikifact:director", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="director", hf_avail_splits=["train", "test"], @@ -17752,7 +17753,7 @@ wikifact_discoverer_or_inventor_helm = LightevalTaskConfig( name="wikifact:discoverer_or_inventor", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="discoverer_or_inventor", hf_avail_splits=["train", "test"], @@ -17770,7 +17771,7 @@ wikifact_drug_or_therapy_used_for_treatment_helm = LightevalTaskConfig( name="wikifact:drug_or_therapy_used_for_treatment", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="drug_or_therapy_used_for_treatment", hf_avail_splits=["train", "test"], @@ -17788,7 +17789,7 @@ wikifact_educated_at_helm = LightevalTaskConfig( name="wikifact:educated_at", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="educated_at", hf_avail_splits=["train", "test"], @@ -17806,7 +17807,7 @@ wikifact_electron_configuration_helm = LightevalTaskConfig( name="wikifact:electron_configuration", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="electron_configuration", hf_avail_splits=["train", "test"], @@ -17824,7 +17825,7 @@ wikifact_employer_helm = LightevalTaskConfig( name="wikifact:employer", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="employer", hf_avail_splits=["train", "test"], @@ -17842,7 +17843,7 @@ wikifact_field_of_work_helm = LightevalTaskConfig( name="wikifact:field_of_work", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="field_of_work", hf_avail_splits=["train", "test"], @@ -17860,7 +17861,7 @@ wikifact_file_extension_helm = LightevalTaskConfig( name="wikifact:file_extension", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="file_extension", hf_avail_splits=["train", "test"], @@ -17878,7 +17879,7 @@ wikifact_genetic_association_helm = LightevalTaskConfig( name="wikifact:genetic_association", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="genetic_association", hf_avail_splits=["train", "test"], @@ -17896,7 +17897,7 @@ wikifact_genre_helm = LightevalTaskConfig( name="wikifact:genre", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="genre", hf_avail_splits=["train", "test"], @@ -17914,7 +17915,7 @@ wikifact_has_part_helm = LightevalTaskConfig( name="wikifact:has_part", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="has_part", hf_avail_splits=["train", "test"], @@ -17932,7 +17933,7 @@ wikifact_head_of_government_helm = LightevalTaskConfig( name="wikifact:head_of_government", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="head_of_government", hf_avail_splits=["train", "test"], @@ -17950,7 +17951,7 @@ wikifact_head_of_state_helm = LightevalTaskConfig( name="wikifact:head_of_state", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="head_of_state", hf_avail_splits=["train", "test"], @@ -17968,7 +17969,7 @@ wikifact_headquarters_location_helm = LightevalTaskConfig( name="wikifact:headquarters_location", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="headquarters_location", hf_avail_splits=["train", "test"], @@ -17986,7 +17987,7 @@ wikifact_industry_helm = LightevalTaskConfig( name="wikifact:industry", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="industry", hf_avail_splits=["train", "test"], @@ -18004,7 +18005,7 @@ wikifact_influenced_by_helm = LightevalTaskConfig( name="wikifact:influenced_by", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="influenced_by", hf_avail_splits=["train", "test"], @@ -18022,7 +18023,7 @@ wikifact_instance_of_helm = LightevalTaskConfig( name="wikifact:instance_of", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="instance_of", hf_avail_splits=["train", "test"], @@ -18040,7 +18041,7 @@ wikifact_instrument_helm = LightevalTaskConfig( name="wikifact:instrument", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="instrument", hf_avail_splits=["train", "test"], @@ -18058,7 +18059,7 @@ wikifact_language_of_work_or_name_helm = LightevalTaskConfig( name="wikifact:language_of_work_or_name", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="language_of_work_or_name", hf_avail_splits=["train", "test"], @@ -18076,7 +18077,7 @@ wikifact_languages_spoken_written_or_signed_helm = LightevalTaskConfig( name="wikifact:languages_spoken_written_or_signed", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="languages_spoken_written_or_signed", hf_avail_splits=["train", "test"], @@ -18094,7 +18095,7 @@ wikifact_laws_applied_helm = LightevalTaskConfig( name="wikifact:laws_applied", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="laws_applied", hf_avail_splits=["train", "test"], @@ -18112,7 +18113,7 @@ wikifact_located_in_the_administrative_territorial_entity_helm = LightevalTaskConfig( name="wikifact:located_in_the_administrative_territorial_entity", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="located_in_the_administrative_territorial_entity", hf_avail_splits=["train", "test"], @@ -18130,7 +18131,7 @@ wikifact_location_helm = LightevalTaskConfig( name="wikifact:location", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="location", hf_avail_splits=["train", "test"], @@ -18148,7 +18149,7 @@ wikifact_location_of_discovery_helm = LightevalTaskConfig( name="wikifact:location_of_discovery", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="location_of_discovery", hf_avail_splits=["train", "test"], @@ -18166,7 +18167,7 @@ wikifact_location_of_formation_helm = LightevalTaskConfig( name="wikifact:location_of_formation", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="location_of_formation", hf_avail_splits=["train", "test"], @@ -18184,7 +18185,7 @@ wikifact_majority_opinion_by_helm = LightevalTaskConfig( name="wikifact:majority_opinion_by", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="majority_opinion_by", hf_avail_splits=["train", "test"], @@ -18202,7 +18203,7 @@ wikifact_manufacturer_helm = LightevalTaskConfig( name="wikifact:manufacturer", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="manufacturer", hf_avail_splits=["train", "test"], @@ -18220,7 +18221,7 @@ wikifact_measured_physical_quantity_helm = LightevalTaskConfig( name="wikifact:measured_physical_quantity", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="measured_physical_quantity", hf_avail_splits=["train", "test"], @@ -18238,7 +18239,7 @@ wikifact_medical_condition_treated_helm = LightevalTaskConfig( name="wikifact:medical_condition_treated", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="medical_condition_treated", hf_avail_splits=["train", "test"], @@ -18256,7 +18257,7 @@ wikifact_member_of_helm = LightevalTaskConfig( name="wikifact:member_of", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="member_of", hf_avail_splits=["train", "test"], @@ -18274,7 +18275,7 @@ wikifact_member_of_political_party_helm = LightevalTaskConfig( name="wikifact:member_of_political_party", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="member_of_political_party", hf_avail_splits=["train", "test"], @@ -18292,7 +18293,7 @@ wikifact_member_of_sports_team_helm = LightevalTaskConfig( name="wikifact:member_of_sports_team", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="member_of_sports_team", hf_avail_splits=["train", "test"], @@ -18310,7 +18311,7 @@ wikifact_movement_helm = LightevalTaskConfig( name="wikifact:movement", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="movement", hf_avail_splits=["train", "test"], @@ -18328,7 +18329,7 @@ wikifact_named_after_helm = LightevalTaskConfig( name="wikifact:named_after", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="named_after", hf_avail_splits=["train", "test"], @@ -18346,7 +18347,7 @@ wikifact_native_language_helm = LightevalTaskConfig( name="wikifact:native_language", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="native_language", hf_avail_splits=["train", "test"], @@ -18364,7 +18365,7 @@ wikifact_number_of_processor_cores_helm = LightevalTaskConfig( name="wikifact:number_of_processor_cores", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="number_of_processor_cores", hf_avail_splits=["train", "test"], @@ -18382,7 +18383,7 @@ wikifact_occupation_helm = LightevalTaskConfig( name="wikifact:occupation", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="occupation", hf_avail_splits=["train", "test"], @@ -18400,7 +18401,7 @@ wikifact_office_held_by_head_of_government_helm = LightevalTaskConfig( name="wikifact:office_held_by_head_of_government", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="office_held_by_head_of_government", hf_avail_splits=["train", "test"], @@ -18418,7 +18419,7 @@ wikifact_office_held_by_head_of_state_helm = LightevalTaskConfig( name="wikifact:office_held_by_head_of_state", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="office_held_by_head_of_state", hf_avail_splits=["train", "test"], @@ -18436,7 +18437,7 @@ wikifact_official_language_helm = LightevalTaskConfig( name="wikifact:official_language", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="official_language", hf_avail_splits=["train", "test"], @@ -18454,7 +18455,7 @@ wikifact_operating_system_helm = LightevalTaskConfig( name="wikifact:operating_system", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="operating_system", hf_avail_splits=["train", "test"], @@ -18472,7 +18473,7 @@ wikifact_original_language_of_film_or_TV_show_helm = LightevalTaskConfig( name="wikifact:original_language_of_film_or_TV_show", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="original_language_of_film_or_TV_show", hf_avail_splits=["train", "test"], @@ -18490,7 +18491,7 @@ wikifact_original_network_helm = LightevalTaskConfig( name="wikifact:original_network", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="original_network", hf_avail_splits=["train", "test"], @@ -18508,7 +18509,7 @@ wikifact_overrules_helm = LightevalTaskConfig( name="wikifact:overrules", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="overrules", hf_avail_splits=["train", "test"], @@ -18526,7 +18527,7 @@ wikifact_owned_by_helm = LightevalTaskConfig( name="wikifact:owned_by", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="owned_by", hf_avail_splits=["train", "test"], @@ -18544,7 +18545,7 @@ wikifact_part_of_helm = LightevalTaskConfig( name="wikifact:part_of", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="part_of", hf_avail_splits=["train", "test"], @@ -18562,7 +18563,7 @@ wikifact_participating_team_helm = LightevalTaskConfig( name="wikifact:participating_team", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="participating_team", hf_avail_splits=["train", "test"], @@ -18580,7 +18581,7 @@ wikifact_place_of_birth_helm = LightevalTaskConfig( name="wikifact:place_of_birth", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="place_of_birth", hf_avail_splits=["train", "test"], @@ -18598,7 +18599,7 @@ wikifact_place_of_death_helm = LightevalTaskConfig( name="wikifact:place_of_death", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="place_of_death", hf_avail_splits=["train", "test"], @@ -18616,7 +18617,7 @@ wikifact_plaintiff_helm = LightevalTaskConfig( name="wikifact:plaintiff", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="plaintiff", hf_avail_splits=["train", "test"], @@ -18634,7 +18635,7 @@ wikifact_position_held_helm = LightevalTaskConfig( name="wikifact:position_held", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="position_held", hf_avail_splits=["train", "test"], @@ -18652,7 +18653,7 @@ wikifact_position_played_on_team_helm = LightevalTaskConfig( name="wikifact:position_played_on_team", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="position_played_on_team", hf_avail_splits=["train", "test"], @@ -18670,7 +18671,7 @@ wikifact_programming_language_helm = LightevalTaskConfig( name="wikifact:programming_language", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="programming_language", hf_avail_splits=["train", "test"], @@ -18688,7 +18689,7 @@ wikifact_recommended_unit_of_measurement_helm = LightevalTaskConfig( name="wikifact:recommended_unit_of_measurement", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="recommended_unit_of_measurement", hf_avail_splits=["train", "test"], @@ -18706,7 +18707,7 @@ wikifact_record_label_helm = LightevalTaskConfig( name="wikifact:record_label", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="record_label", hf_avail_splits=["train", "test"], @@ -18724,7 +18725,7 @@ wikifact_religion_helm = LightevalTaskConfig( name="wikifact:religion", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="religion", hf_avail_splits=["train", "test"], @@ -18742,7 +18743,7 @@ wikifact_repealed_by_helm = LightevalTaskConfig( name="wikifact:repealed_by", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="repealed_by", hf_avail_splits=["train", "test"], @@ -18760,7 +18761,7 @@ wikifact_shares_border_with_helm = LightevalTaskConfig( name="wikifact:shares_border_with", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="shares_border_with", hf_avail_splits=["train", "test"], @@ -18778,7 +18779,7 @@ wikifact_solved_by_helm = LightevalTaskConfig( name="wikifact:solved_by", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="solved_by", hf_avail_splits=["train", "test"], @@ -18796,7 +18797,7 @@ wikifact_statement_describes_helm = LightevalTaskConfig( name="wikifact:statement_describes", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="statement_describes", hf_avail_splits=["train", "test"], @@ -18814,7 +18815,7 @@ wikifact_stock_exchange_helm = LightevalTaskConfig( name="wikifact:stock_exchange", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="stock_exchange", hf_avail_splits=["train", "test"], @@ -18832,7 +18833,7 @@ wikifact_subclass_of_helm = LightevalTaskConfig( name="wikifact:subclass_of", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="subclass_of", hf_avail_splits=["train", "test"], @@ -18850,7 +18851,7 @@ wikifact_subsidiary_helm = LightevalTaskConfig( name="wikifact:subsidiary", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="subsidiary", hf_avail_splits=["train", "test"], @@ -18868,7 +18869,7 @@ wikifact_symptoms_and_signs_helm = LightevalTaskConfig( name="wikifact:symptoms_and_signs", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="symptoms_and_signs", hf_avail_splits=["train", "test"], @@ -18886,7 +18887,7 @@ wikifact_therapeutic_area_helm = LightevalTaskConfig( name="wikifact:therapeutic_area", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="therapeutic_area", hf_avail_splits=["train", "test"], @@ -18904,7 +18905,7 @@ wikifact_time_of_discovery_or_invention_helm = LightevalTaskConfig( name="wikifact:time_of_discovery_or_invention", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="time_of_discovery_or_invention", hf_avail_splits=["train", "test"], @@ -18922,7 +18923,7 @@ wikifact_twinned_administrative_body_helm = LightevalTaskConfig( name="wikifact:twinned_administrative_body", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="twinned_administrative_body", hf_avail_splits=["train", "test"], @@ -18940,7 +18941,7 @@ wikifact_work_location_helm = LightevalTaskConfig( name="wikifact:work_location", suite=["helm"], - prompt_function="wikifact", + prompt_function=prompt.wikifact, hf_repo="lighteval/wikifact", hf_subset="work_location", hf_avail_splits=["train", "test"], @@ -18958,7 +18959,7 @@ wikitext_2_lighteval = LightevalTaskConfig( name="wikitext:2", suite=["lighteval"], - prompt_function="wikitext", + prompt_function=prompt.wikitext, hf_repo="wikitext", hf_subset="wikitext-2-raw-v1", hf_avail_splits=["train", "validation", "test"], @@ -18976,7 +18977,7 @@ wikitext_103_document_level_harness = LightevalTaskConfig( name="wikitext:103:document_level", suite=["harness"], - prompt_function="wikitext_harness", + prompt_function=prompt.wikitext_harness, hf_repo="EleutherAI/wikitext_document_level", hf_subset="wikitext-103-raw-v1", hf_avail_splits=["train", "test"], @@ -18994,7 +18995,7 @@ wikitext_103_document_level_helm = LightevalTaskConfig( name="wikitext:103:document_level", suite=["helm"], - prompt_function="wikitext_helm", + prompt_function=prompt.wikitext_helm, hf_repo="EleutherAI/wikitext_document_level", hf_subset="wikitext-103-raw-v1", hf_avail_splits=["train", "test"], @@ -19012,7 +19013,7 @@ wino_x_german_bigbench = LightevalTaskConfig( name="wino_x_german", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="wino_x_german", hf_avail_splits=["default", "train", "validation"], @@ -19030,7 +19031,7 @@ winogrande_leaderboard = LightevalTaskConfig( name="winogrande", suite=["leaderboard"], - prompt_function="winogrande", + prompt_function=prompt.winogrande, hf_repo="winogrande", hf_subset="winogrande_xl", hf_avail_splits=["train", "test", "validation"], @@ -19048,7 +19049,7 @@ winowhy_bigbench_lite = LightevalTaskConfig( name="winowhy", suite=["bigbench_lite", "bigbench", "bigbench_json"], - prompt_function="bigbench_whitespace_after_query", + prompt_function=prompt.bigbench_whitespace_after_query, hf_repo="bigbench", hf_subset="winowhy", hf_avail_splits=["default", "train", "validation"], @@ -19066,7 +19067,7 @@ wmt08_cs_en_lighteval = LightevalTaskConfig( name="wmt08:cs-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt08_cs-en", hf_avail_splits=["test"], @@ -19084,7 +19085,7 @@ wmt08_de_en_lighteval = LightevalTaskConfig( name="wmt08:de-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt08_de-en", hf_avail_splits=["test"], @@ -19102,7 +19103,7 @@ wmt08_en_cs_lighteval = LightevalTaskConfig( name="wmt08:en-cs", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt08_en-cs", hf_avail_splits=["test"], @@ -19120,7 +19121,7 @@ wmt08_en_de_lighteval = LightevalTaskConfig( name="wmt08:en-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt08_en-de", hf_avail_splits=["test"], @@ -19138,7 +19139,7 @@ wmt08_en_es_lighteval = LightevalTaskConfig( name="wmt08:en-es", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt08_en-es", hf_avail_splits=["test"], @@ -19156,7 +19157,7 @@ wmt08_en_fr_lighteval = LightevalTaskConfig( name="wmt08:en-fr", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt08_en-fr", hf_avail_splits=["test"], @@ -19174,7 +19175,7 @@ wmt08_en_hu_lighteval = LightevalTaskConfig( name="wmt08:en-hu", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt08_en-hu", hf_avail_splits=["test"], @@ -19192,7 +19193,7 @@ wmt08_es_en_lighteval = LightevalTaskConfig( name="wmt08:es-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt08_es-en", hf_avail_splits=["test"], @@ -19210,7 +19211,7 @@ wmt08_fr_en_lighteval = LightevalTaskConfig( name="wmt08:fr-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt08_fr-en", hf_avail_splits=["test"], @@ -19228,7 +19229,7 @@ wmt08_hu_en_lighteval = LightevalTaskConfig( name="wmt08:hu-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt08_hu-en", hf_avail_splits=["test"], @@ -19246,7 +19247,7 @@ wmt09_cs_en_lighteval = LightevalTaskConfig( name="wmt09:cs-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt09_cs-en", hf_avail_splits=["test"], @@ -19264,7 +19265,7 @@ wmt09_de_en_lighteval = LightevalTaskConfig( name="wmt09:de-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt09_de-en", hf_avail_splits=["test"], @@ -19282,7 +19283,7 @@ wmt09_en_cs_lighteval = LightevalTaskConfig( name="wmt09:en-cs", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt09_en-cs", hf_avail_splits=["test"], @@ -19300,7 +19301,7 @@ wmt09_en_de_lighteval = LightevalTaskConfig( name="wmt09:en-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt09_en-de", hf_avail_splits=["test"], @@ -19318,7 +19319,7 @@ wmt09_en_es_lighteval = LightevalTaskConfig( name="wmt09:en-es", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt09_en-es", hf_avail_splits=["test"], @@ -19336,7 +19337,7 @@ wmt09_en_fr_lighteval = LightevalTaskConfig( name="wmt09:en-fr", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt09_en-fr", hf_avail_splits=["test"], @@ -19354,7 +19355,7 @@ wmt09_en_hu_lighteval = LightevalTaskConfig( name="wmt09:en-hu", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt09_en-hu", hf_avail_splits=["test"], @@ -19372,7 +19373,7 @@ wmt09_en_it_lighteval = LightevalTaskConfig( name="wmt09:en-it", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt09_en-it", hf_avail_splits=["test"], @@ -19390,7 +19391,7 @@ wmt09_es_en_lighteval = LightevalTaskConfig( name="wmt09:es-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt09_es-en", hf_avail_splits=["test"], @@ -19408,7 +19409,7 @@ wmt09_fr_en_lighteval = LightevalTaskConfig( name="wmt09:fr-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt09_fr-en", hf_avail_splits=["test"], @@ -19426,7 +19427,7 @@ wmt09_hu_en_lighteval = LightevalTaskConfig( name="wmt09:hu-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt09_hu-en", hf_avail_splits=["test"], @@ -19444,7 +19445,7 @@ wmt09_it_en_lighteval = LightevalTaskConfig( name="wmt09:it-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt09_it-en", hf_avail_splits=["test"], @@ -19462,7 +19463,7 @@ wmt10_cs_en_lighteval = LightevalTaskConfig( name="wmt10:cs-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt10_cs-en", hf_avail_splits=["test"], @@ -19480,7 +19481,7 @@ wmt10_de_en_lighteval = LightevalTaskConfig( name="wmt10:de-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt10_de-en", hf_avail_splits=["test"], @@ -19498,7 +19499,7 @@ wmt10_en_cs_lighteval = LightevalTaskConfig( name="wmt10:en-cs", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt10_en-cs", hf_avail_splits=["test"], @@ -19516,7 +19517,7 @@ wmt10_en_de_lighteval = LightevalTaskConfig( name="wmt10:en-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt10_en-de", hf_avail_splits=["test"], @@ -19534,7 +19535,7 @@ wmt10_en_es_lighteval = LightevalTaskConfig( name="wmt10:en-es", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt10_en-es", hf_avail_splits=["test"], @@ -19552,7 +19553,7 @@ wmt10_en_fr_lighteval = LightevalTaskConfig( name="wmt10:en-fr", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt10_en-fr", hf_avail_splits=["test"], @@ -19570,7 +19571,7 @@ wmt10_es_en_lighteval = LightevalTaskConfig( name="wmt10:es-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt10_es-en", hf_avail_splits=["test"], @@ -19588,7 +19589,7 @@ wmt10_fr_en_lighteval = LightevalTaskConfig( name="wmt10:fr-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt10_fr-en", hf_avail_splits=["test"], @@ -19606,7 +19607,7 @@ wmt11_cs_en_lighteval = LightevalTaskConfig( name="wmt11:cs-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt11_cs-en", hf_avail_splits=["test"], @@ -19624,7 +19625,7 @@ wmt11_de_en_lighteval = LightevalTaskConfig( name="wmt11:de-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt11_de-en", hf_avail_splits=["test"], @@ -19642,7 +19643,7 @@ wmt11_en_cs_lighteval = LightevalTaskConfig( name="wmt11:en-cs", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt11_en-cs", hf_avail_splits=["test"], @@ -19660,7 +19661,7 @@ wmt11_en_de_lighteval = LightevalTaskConfig( name="wmt11:en-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt11_en-de", hf_avail_splits=["test"], @@ -19678,7 +19679,7 @@ wmt11_en_es_lighteval = LightevalTaskConfig( name="wmt11:en-es", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt11_en-es", hf_avail_splits=["test"], @@ -19696,7 +19697,7 @@ wmt11_en_fr_lighteval = LightevalTaskConfig( name="wmt11:en-fr", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt11_en-fr", hf_avail_splits=["test"], @@ -19714,7 +19715,7 @@ wmt11_es_en_lighteval = LightevalTaskConfig( name="wmt11:es-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt11_es-en", hf_avail_splits=["test"], @@ -19732,7 +19733,7 @@ wmt11_fr_en_lighteval = LightevalTaskConfig( name="wmt11:fr-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt11_fr-en", hf_avail_splits=["test"], @@ -19750,7 +19751,7 @@ wmt12_cs_en_lighteval = LightevalTaskConfig( name="wmt12:cs-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt12_cs-en", hf_avail_splits=["test"], @@ -19768,7 +19769,7 @@ wmt12_de_en_lighteval = LightevalTaskConfig( name="wmt12:de-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt12_de-en", hf_avail_splits=["test"], @@ -19786,7 +19787,7 @@ wmt12_en_cs_lighteval = LightevalTaskConfig( name="wmt12:en-cs", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt12_en-cs", hf_avail_splits=["test"], @@ -19804,7 +19805,7 @@ wmt12_en_de_lighteval = LightevalTaskConfig( name="wmt12:en-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt12_en-de", hf_avail_splits=["test"], @@ -19822,7 +19823,7 @@ wmt12_en_es_lighteval = LightevalTaskConfig( name="wmt12:en-es", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt12_en-es", hf_avail_splits=["test"], @@ -19840,7 +19841,7 @@ wmt12_en_fr_lighteval = LightevalTaskConfig( name="wmt12:en-fr", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt12_en-fr", hf_avail_splits=["test"], @@ -19858,7 +19859,7 @@ wmt12_es_en_lighteval = LightevalTaskConfig( name="wmt12:es-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt12_es-en", hf_avail_splits=["test"], @@ -19876,7 +19877,7 @@ wmt12_fr_en_lighteval = LightevalTaskConfig( name="wmt12:fr-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt12_fr-en", hf_avail_splits=["test"], @@ -19894,7 +19895,7 @@ wmt13_cs_en_lighteval = LightevalTaskConfig( name="wmt13:cs-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt13_cs-en", hf_avail_splits=["test"], @@ -19912,7 +19913,7 @@ wmt13_de_en_lighteval = LightevalTaskConfig( name="wmt13:de-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt13_de-en", hf_avail_splits=["test"], @@ -19930,7 +19931,7 @@ wmt13_en_cs_lighteval = LightevalTaskConfig( name="wmt13:en-cs", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt13_en-cs", hf_avail_splits=["test"], @@ -19948,7 +19949,7 @@ wmt13_en_de_lighteval = LightevalTaskConfig( name="wmt13:en-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt13_en-de", hf_avail_splits=["test"], @@ -19966,7 +19967,7 @@ wmt13_en_es_lighteval = LightevalTaskConfig( name="wmt13:en-es", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt13_en-es", hf_avail_splits=["test"], @@ -19984,7 +19985,7 @@ wmt13_en_fr_lighteval = LightevalTaskConfig( name="wmt13:en-fr", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt13_en-fr", hf_avail_splits=["test"], @@ -20002,7 +20003,7 @@ wmt13_en_ru_lighteval = LightevalTaskConfig( name="wmt13:en-ru", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt13_en-ru", hf_avail_splits=["test"], @@ -20020,7 +20021,7 @@ wmt13_es_en_lighteval = LightevalTaskConfig( name="wmt13:es-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt13_es-en", hf_avail_splits=["test"], @@ -20038,7 +20039,7 @@ wmt13_fr_en_lighteval = LightevalTaskConfig( name="wmt13:fr-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt13_fr-en", hf_avail_splits=["test"], @@ -20056,7 +20057,7 @@ wmt13_ru_en_lighteval = LightevalTaskConfig( name="wmt13:ru-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt13_ru-en", hf_avail_splits=["test"], @@ -20074,7 +20075,7 @@ wmt14_cs_en_lighteval = LightevalTaskConfig( name="wmt14:cs-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt14_cs-en", hf_avail_splits=["test"], @@ -20092,7 +20093,7 @@ wmt14_de_en_lighteval = LightevalTaskConfig( name="wmt14:de-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt14_de-en", hf_avail_splits=["test"], @@ -20110,7 +20111,7 @@ wmt14_en_cs_lighteval = LightevalTaskConfig( name="wmt14:en-cs", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt14_en-cs", hf_avail_splits=["test"], @@ -20128,7 +20129,7 @@ wmt14_en_de_lighteval = LightevalTaskConfig( name="wmt14:en-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt14_en-de", hf_avail_splits=["test"], @@ -20146,7 +20147,7 @@ wmt14_en_fr_lighteval = LightevalTaskConfig( name="wmt14:en-fr", suite=["lighteval", "gpt3_benchmarks"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="wmt14", hf_subset="fr-en", hf_avail_splits=["train", "validation", "test"], @@ -20164,7 +20165,7 @@ wmt14_en_fr_lighteval = LightevalTaskConfig( name="wmt14:en-fr", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt14_en-fr", hf_avail_splits=["test"], @@ -20182,7 +20183,7 @@ wmt14_en_hi_lighteval = LightevalTaskConfig( name="wmt14:en-hi", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt14_en-hi", hf_avail_splits=["test"], @@ -20200,7 +20201,7 @@ wmt14_en_ru_lighteval = LightevalTaskConfig( name="wmt14:en-ru", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt14_en-ru", hf_avail_splits=["test"], @@ -20218,7 +20219,7 @@ wmt14_fr_en_lighteval = LightevalTaskConfig( name="wmt14:fr-en", suite=["lighteval", "gpt3_benchmarks"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="wmt14", hf_subset="fr-en", hf_avail_splits=["train", "validation", "test"], @@ -20236,7 +20237,7 @@ wmt14_fr_en_lighteval = LightevalTaskConfig( name="wmt14:fr-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt14_fr-en", hf_avail_splits=["test"], @@ -20254,7 +20255,7 @@ wmt14_hi_en_lighteval = LightevalTaskConfig( name="wmt14:hi-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt14_hi-en", hf_avail_splits=["test"], @@ -20272,7 +20273,7 @@ wmt14_ru_en_lighteval = LightevalTaskConfig( name="wmt14:ru-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt14_ru-en", hf_avail_splits=["test"], @@ -20290,7 +20291,7 @@ wmt14_cs_en_helm = LightevalTaskConfig( name="wmt14:cs-en", suite=["helm"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/wmt14", hf_subset="cs-en", hf_avail_splits=["train", "test", "validation"], @@ -20308,7 +20309,7 @@ wmt14_de_en_helm = LightevalTaskConfig( name="wmt14:de-en", suite=["helm"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/wmt14", hf_subset="de-en", hf_avail_splits=["train", "test", "validation"], @@ -20326,7 +20327,7 @@ wmt14_fr_en_helm = LightevalTaskConfig( name="wmt14:fr-en", suite=["helm"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/wmt14", hf_subset="fr-en", hf_avail_splits=["train", "test", "validation"], @@ -20344,7 +20345,7 @@ wmt14_hi_en_helm = LightevalTaskConfig( name="wmt14:hi-en", suite=["helm"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/wmt14", hf_subset="hi-en", hf_avail_splits=["train", "test", "validation"], @@ -20362,7 +20363,7 @@ wmt14_ru_en_helm = LightevalTaskConfig( name="wmt14:ru-en", suite=["helm"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/wmt14", hf_subset="ru-en", hf_avail_splits=["train", "test", "validation"], @@ -20380,7 +20381,7 @@ wmt15_cs_en_lighteval = LightevalTaskConfig( name="wmt15:cs-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt15_cs-en", hf_avail_splits=["test"], @@ -20398,7 +20399,7 @@ wmt15_de_en_lighteval = LightevalTaskConfig( name="wmt15:de-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt15_de-en", hf_avail_splits=["test"], @@ -20416,7 +20417,7 @@ wmt15_en_cs_lighteval = LightevalTaskConfig( name="wmt15:en-cs", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt15_en-cs", hf_avail_splits=["test"], @@ -20434,7 +20435,7 @@ wmt15_en_de_lighteval = LightevalTaskConfig( name="wmt15:en-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt15_en-de", hf_avail_splits=["test"], @@ -20452,7 +20453,7 @@ wmt15_en_fi_lighteval = LightevalTaskConfig( name="wmt15:en-fi", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt15_en-fi", hf_avail_splits=["test"], @@ -20470,7 +20471,7 @@ wmt15_en_fr_lighteval = LightevalTaskConfig( name="wmt15:en-fr", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt15_en-fr", hf_avail_splits=["test"], @@ -20488,7 +20489,7 @@ wmt15_en_ru_lighteval = LightevalTaskConfig( name="wmt15:en-ru", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt15_en-ru", hf_avail_splits=["test"], @@ -20506,7 +20507,7 @@ wmt15_fi_en_lighteval = LightevalTaskConfig( name="wmt15:fi-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt15_fi-en", hf_avail_splits=["test"], @@ -20524,7 +20525,7 @@ wmt15_fr_en_lighteval = LightevalTaskConfig( name="wmt15:fr-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt15_fr-en", hf_avail_splits=["test"], @@ -20542,7 +20543,7 @@ wmt15_ru_en_lighteval = LightevalTaskConfig( name="wmt15:ru-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt15_ru-en", hf_avail_splits=["test"], @@ -20560,7 +20561,7 @@ wmt16_cs_en_lighteval = LightevalTaskConfig( name="wmt16:cs-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt16_cs-en", hf_avail_splits=["test"], @@ -20578,7 +20579,7 @@ wmt16_de_en_lighteval = LightevalTaskConfig( name="wmt16:de-en", suite=["lighteval", "gpt3_benchmarks"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="wmt16", hf_subset="de-en", hf_avail_splits=["train", "validation", "test"], @@ -20596,7 +20597,7 @@ wmt16_de_en_lighteval = LightevalTaskConfig( name="wmt16:de-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt16_de-en", hf_avail_splits=["test"], @@ -20614,7 +20615,7 @@ wmt16_en_cs_lighteval = LightevalTaskConfig( name="wmt16:en-cs", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt16_en-cs", hf_avail_splits=["test"], @@ -20632,7 +20633,7 @@ wmt16_en_de_lighteval = LightevalTaskConfig( name="wmt16:en-de", suite=["lighteval", "gpt3_benchmarks"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="wmt16", hf_subset="de-en", hf_avail_splits=["train", "validation", "test"], @@ -20650,7 +20651,7 @@ wmt16_en_de_lighteval = LightevalTaskConfig( name="wmt16:en-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt16_en-de", hf_avail_splits=["test"], @@ -20668,7 +20669,7 @@ wmt16_en_fi_lighteval = LightevalTaskConfig( name="wmt16:en-fi", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt16_en-fi", hf_avail_splits=["test"], @@ -20686,7 +20687,7 @@ wmt16_en_ro_lighteval = LightevalTaskConfig( name="wmt16:en-ro", suite=["lighteval", "gpt3_benchmarks"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="wmt16", hf_subset="ro-en", hf_avail_splits=["train", "validation", "test"], @@ -20704,7 +20705,7 @@ wmt16_en_ro_lighteval = LightevalTaskConfig( name="wmt16:en-ro", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt16_en-ro", hf_avail_splits=["test"], @@ -20722,7 +20723,7 @@ wmt16_en_ru_lighteval = LightevalTaskConfig( name="wmt16:en-ru", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt16_en-ru", hf_avail_splits=["test"], @@ -20740,7 +20741,7 @@ wmt16_en_tr_lighteval = LightevalTaskConfig( name="wmt16:en-tr", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt16_en-tr", hf_avail_splits=["test"], @@ -20758,7 +20759,7 @@ wmt16_fi_en_lighteval = LightevalTaskConfig( name="wmt16:fi-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt16_fi-en", hf_avail_splits=["test"], @@ -20776,7 +20777,7 @@ wmt16_ro_en_lighteval = LightevalTaskConfig( name="wmt16:ro-en", suite=["lighteval", "gpt3_benchmarks"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="wmt16", hf_subset="ro-en", hf_avail_splits=["train", "validation", "test"], @@ -20794,7 +20795,7 @@ wmt16_ro_en_lighteval = LightevalTaskConfig( name="wmt16:ro-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt16_ro-en", hf_avail_splits=["test"], @@ -20812,7 +20813,7 @@ wmt16_ru_en_lighteval = LightevalTaskConfig( name="wmt16:ru-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt16_ru-en", hf_avail_splits=["test"], @@ -20830,7 +20831,7 @@ wmt16_tr_en_lighteval = LightevalTaskConfig( name="wmt16:tr-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt16_tr-en", hf_avail_splits=["test"], @@ -20848,7 +20849,7 @@ wmt17_cs_en_lighteval = LightevalTaskConfig( name="wmt17:cs-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt17_cs-en", hf_avail_splits=["test"], @@ -20866,7 +20867,7 @@ wmt17_de_en_lighteval = LightevalTaskConfig( name="wmt17:de-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt17_de-en", hf_avail_splits=["test"], @@ -20884,7 +20885,7 @@ wmt17_en_cs_lighteval = LightevalTaskConfig( name="wmt17:en-cs", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt17_en-cs", hf_avail_splits=["test"], @@ -20902,7 +20903,7 @@ wmt17_en_de_lighteval = LightevalTaskConfig( name="wmt17:en-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt17_en-de", hf_avail_splits=["test"], @@ -20920,7 +20921,7 @@ wmt17_en_fi_lighteval = LightevalTaskConfig( name="wmt17:en-fi", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt17_en-fi", hf_avail_splits=["test"], @@ -20938,7 +20939,7 @@ wmt17_en_lv_lighteval = LightevalTaskConfig( name="wmt17:en-lv", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt17_en-lv", hf_avail_splits=["test"], @@ -20956,7 +20957,7 @@ wmt17_en_ru_lighteval = LightevalTaskConfig( name="wmt17:en-ru", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt17_en-ru", hf_avail_splits=["test"], @@ -20974,7 +20975,7 @@ wmt17_en_tr_lighteval = LightevalTaskConfig( name="wmt17:en-tr", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt17_en-tr", hf_avail_splits=["test"], @@ -20992,7 +20993,7 @@ wmt17_en_zh_lighteval = LightevalTaskConfig( name="wmt17:en-zh", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt17_en-zh", hf_avail_splits=["test"], @@ -21010,7 +21011,7 @@ wmt17_fi_en_lighteval = LightevalTaskConfig( name="wmt17:fi-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt17_fi-en", hf_avail_splits=["test"], @@ -21028,7 +21029,7 @@ wmt17_lv_en_lighteval = LightevalTaskConfig( name="wmt17:lv-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt17_lv-en", hf_avail_splits=["test"], @@ -21046,7 +21047,7 @@ wmt17_ru_en_lighteval = LightevalTaskConfig( name="wmt17:ru-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt17_ru-en", hf_avail_splits=["test"], @@ -21064,7 +21065,7 @@ wmt17_tr_en_lighteval = LightevalTaskConfig( name="wmt17:tr-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt17_tr-en", hf_avail_splits=["test"], @@ -21082,7 +21083,7 @@ wmt17_zh_en_lighteval = LightevalTaskConfig( name="wmt17:zh-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt17_zh-en", hf_avail_splits=["test"], @@ -21100,7 +21101,7 @@ wmt18_cs_en_lighteval = LightevalTaskConfig( name="wmt18:cs-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt18_cs-en", hf_avail_splits=["test"], @@ -21118,7 +21119,7 @@ wmt18_de_en_lighteval = LightevalTaskConfig( name="wmt18:de-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt18_de-en", hf_avail_splits=["test"], @@ -21136,7 +21137,7 @@ wmt18_en_cs_lighteval = LightevalTaskConfig( name="wmt18:en-cs", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt18_en-cs", hf_avail_splits=["test"], @@ -21154,7 +21155,7 @@ wmt18_en_de_lighteval = LightevalTaskConfig( name="wmt18:en-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt18_en-de", hf_avail_splits=["test"], @@ -21172,7 +21173,7 @@ wmt18_en_et_lighteval = LightevalTaskConfig( name="wmt18:en-et", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt18_en-et", hf_avail_splits=["test"], @@ -21190,7 +21191,7 @@ wmt18_en_fi_lighteval = LightevalTaskConfig( name="wmt18:en-fi", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt18_en-fi", hf_avail_splits=["test"], @@ -21208,7 +21209,7 @@ wmt18_en_ru_lighteval = LightevalTaskConfig( name="wmt18:en-ru", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt18_en-ru", hf_avail_splits=["test"], @@ -21226,7 +21227,7 @@ wmt18_en_tr_lighteval = LightevalTaskConfig( name="wmt18:en-tr", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt18_en-tr", hf_avail_splits=["test"], @@ -21244,7 +21245,7 @@ wmt18_en_zh_lighteval = LightevalTaskConfig( name="wmt18:en-zh", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt18_en-zh", hf_avail_splits=["test"], @@ -21262,7 +21263,7 @@ wmt18_et_en_lighteval = LightevalTaskConfig( name="wmt18:et-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt18_et-en", hf_avail_splits=["test"], @@ -21280,7 +21281,7 @@ wmt18_fi_en_lighteval = LightevalTaskConfig( name="wmt18:fi-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt18_fi-en", hf_avail_splits=["test"], @@ -21298,7 +21299,7 @@ wmt18_ru_en_lighteval = LightevalTaskConfig( name="wmt18:ru-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt18_ru-en", hf_avail_splits=["test"], @@ -21316,7 +21317,7 @@ wmt18_tr_en_lighteval = LightevalTaskConfig( name="wmt18:tr-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt18_tr-en", hf_avail_splits=["test"], @@ -21334,7 +21335,7 @@ wmt18_zh_en_lighteval = LightevalTaskConfig( name="wmt18:zh-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt18_zh-en", hf_avail_splits=["test"], @@ -21352,7 +21353,7 @@ wmt19_cs_de_lighteval = LightevalTaskConfig( name="wmt19:cs-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_cs-de", hf_avail_splits=["test"], @@ -21370,7 +21371,7 @@ wmt19_de_cs_lighteval = LightevalTaskConfig( name="wmt19:de-cs", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_de-cs", hf_avail_splits=["test"], @@ -21388,7 +21389,7 @@ wmt19_de_en_lighteval = LightevalTaskConfig( name="wmt19:de-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_de-en", hf_avail_splits=["test"], @@ -21406,7 +21407,7 @@ wmt19_de_fr_lighteval = LightevalTaskConfig( name="wmt19:de-fr", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_de-fr", hf_avail_splits=["test"], @@ -21424,7 +21425,7 @@ wmt19_en_cs_lighteval = LightevalTaskConfig( name="wmt19:en-cs", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_en-cs", hf_avail_splits=["test"], @@ -21442,7 +21443,7 @@ wmt19_en_de_lighteval = LightevalTaskConfig( name="wmt19:en-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_en-de", hf_avail_splits=["test"], @@ -21460,7 +21461,7 @@ wmt19_en_fi_lighteval = LightevalTaskConfig( name="wmt19:en-fi", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_en-fi", hf_avail_splits=["test"], @@ -21478,7 +21479,7 @@ wmt19_en_gu_lighteval = LightevalTaskConfig( name="wmt19:en-gu", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_en-gu", hf_avail_splits=["test"], @@ -21496,7 +21497,7 @@ wmt19_en_kk_lighteval = LightevalTaskConfig( name="wmt19:en-kk", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_en-kk", hf_avail_splits=["test"], @@ -21514,7 +21515,7 @@ wmt19_en_lt_lighteval = LightevalTaskConfig( name="wmt19:en-lt", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_en-lt", hf_avail_splits=["test"], @@ -21532,7 +21533,7 @@ wmt19_en_ru_lighteval = LightevalTaskConfig( name="wmt19:en-ru", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_en-ru", hf_avail_splits=["test"], @@ -21550,7 +21551,7 @@ wmt19_en_zh_lighteval = LightevalTaskConfig( name="wmt19:en-zh", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_en-zh", hf_avail_splits=["test"], @@ -21568,7 +21569,7 @@ wmt19_fi_en_lighteval = LightevalTaskConfig( name="wmt19:fi-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_fi-en", hf_avail_splits=["test"], @@ -21586,7 +21587,7 @@ wmt19_fr_de_lighteval = LightevalTaskConfig( name="wmt19:fr-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_fr-de", hf_avail_splits=["test"], @@ -21604,7 +21605,7 @@ wmt19_gu_en_lighteval = LightevalTaskConfig( name="wmt19:gu-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_gu-en", hf_avail_splits=["test"], @@ -21622,7 +21623,7 @@ wmt19_kk_en_lighteval = LightevalTaskConfig( name="wmt19:kk-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_kk-en", hf_avail_splits=["test"], @@ -21640,7 +21641,7 @@ wmt19_lt_en_lighteval = LightevalTaskConfig( name="wmt19:lt-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_lt-en", hf_avail_splits=["test"], @@ -21658,7 +21659,7 @@ wmt19_ru_en_lighteval = LightevalTaskConfig( name="wmt19:ru-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_ru-en", hf_avail_splits=["test"], @@ -21676,7 +21677,7 @@ wmt19_zh_en_lighteval = LightevalTaskConfig( name="wmt19:zh-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt19_zh-en", hf_avail_splits=["test"], @@ -21694,7 +21695,7 @@ wmt20_cs_en_lighteval = LightevalTaskConfig( name="wmt20:cs-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_cs-en", hf_avail_splits=["test"], @@ -21712,7 +21713,7 @@ wmt20_de_en_lighteval = LightevalTaskConfig( name="wmt20:de-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_de-en", hf_avail_splits=["test"], @@ -21730,7 +21731,7 @@ wmt20_de_fr_lighteval = LightevalTaskConfig( name="wmt20:de-fr", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_de-fr", hf_avail_splits=["test"], @@ -21748,7 +21749,7 @@ wmt20_en_cs_lighteval = LightevalTaskConfig( name="wmt20:en-cs", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_en-cs", hf_avail_splits=["test"], @@ -21766,7 +21767,7 @@ wmt20_en_de_lighteval = LightevalTaskConfig( name="wmt20:en-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_en-de", hf_avail_splits=["test"], @@ -21784,7 +21785,7 @@ wmt20_en_iu_lighteval = LightevalTaskConfig( name="wmt20:en-iu", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_en-iu", hf_avail_splits=["test"], @@ -21802,7 +21803,7 @@ wmt20_en_ja_lighteval = LightevalTaskConfig( name="wmt20:en-ja", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_en-ja", hf_avail_splits=["test"], @@ -21820,7 +21821,7 @@ wmt20_en_km_lighteval = LightevalTaskConfig( name="wmt20:en-km", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_en-km", hf_avail_splits=["test"], @@ -21838,7 +21839,7 @@ wmt20_en_pl_lighteval = LightevalTaskConfig( name="wmt20:en-pl", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_en-pl", hf_avail_splits=["test"], @@ -21856,7 +21857,7 @@ wmt20_en_ps_lighteval = LightevalTaskConfig( name="wmt20:en-ps", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_en-ps", hf_avail_splits=["test"], @@ -21874,7 +21875,7 @@ wmt20_en_ru_lighteval = LightevalTaskConfig( name="wmt20:en-ru", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_en-ru", hf_avail_splits=["test"], @@ -21892,7 +21893,7 @@ wmt20_en_ta_lighteval = LightevalTaskConfig( name="wmt20:en-ta", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_en-ta", hf_avail_splits=["test"], @@ -21910,7 +21911,7 @@ wmt20_en_zh_lighteval = LightevalTaskConfig( name="wmt20:en-zh", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_alphabetical", + prompt_function=prompt.wmt_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_en-zh", hf_avail_splits=["test"], @@ -21928,7 +21929,7 @@ wmt20_fr_de_lighteval = LightevalTaskConfig( name="wmt20:fr-de", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_fr-de", hf_avail_splits=["test"], @@ -21946,7 +21947,7 @@ wmt20_iu_en_lighteval = LightevalTaskConfig( name="wmt20:iu-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_iu-en", hf_avail_splits=["test"], @@ -21964,7 +21965,7 @@ wmt20_ja_en_lighteval = LightevalTaskConfig( name="wmt20:ja-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_ja-en", hf_avail_splits=["test"], @@ -21982,7 +21983,7 @@ wmt20_km_en_lighteval = LightevalTaskConfig( name="wmt20:km-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_km-en", hf_avail_splits=["test"], @@ -22000,7 +22001,7 @@ wmt20_pl_en_lighteval = LightevalTaskConfig( name="wmt20:pl-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_pl-en", hf_avail_splits=["test"], @@ -22018,7 +22019,7 @@ wmt20_ps_en_lighteval = LightevalTaskConfig( name="wmt20:ps-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_ps-en", hf_avail_splits=["test"], @@ -22036,7 +22037,7 @@ wmt20_ru_en_lighteval = LightevalTaskConfig( name="wmt20:ru-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_ru-en", hf_avail_splits=["test"], @@ -22054,7 +22055,7 @@ wmt20_ta_en_lighteval = LightevalTaskConfig( name="wmt20:ta-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_ta-en", hf_avail_splits=["test"], @@ -22072,7 +22073,7 @@ wmt20_zh_en_lighteval = LightevalTaskConfig( name="wmt20:zh-en", suite=["lighteval", "sacrebleu"], - prompt_function="wmt_reverse_alphabetical", + prompt_function=prompt.wmt_reverse_alphabetical, hf_repo="lighteval/sacrebleu_manual", hf_subset="wmt20_zh-en", hf_avail_splits=["test"], @@ -22090,7 +22091,7 @@ word_sorting_bigbench = LightevalTaskConfig( name="word_sorting", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="word_sorting", hf_avail_splits=["default", "train", "validation"], @@ -22108,7 +22109,7 @@ word_unscrambling_bigbench = LightevalTaskConfig( name="word_unscrambling", suite=["bigbench", "bigbench_json"], - prompt_function="bigbench", + prompt_function=prompt.bigbench, hf_repo="bigbench", hf_subset="word_unscrambling", hf_avail_splits=["default", "train", "validation"], @@ -22126,7 +22127,7 @@ wsc273_lighteval = LightevalTaskConfig( name="wsc273", suite=["lighteval"], - prompt_function="wsc273", + prompt_function=prompt.wsc273, hf_repo="winograd_wsc", hf_subset="wsc273", hf_avail_splits=["test"], @@ -22144,7 +22145,7 @@ xcopa_en_lighteval = LightevalTaskConfig( name="xcopa:en", suite=["lighteval"], - prompt_function="xcopa_en", + prompt_function=prompt.xcopa_en, hf_repo="xcopa", hf_subset="default", hf_avail_splits=["test", "train", "validation"], @@ -22162,7 +22163,7 @@ xcopa_et_lighteval = LightevalTaskConfig( name="xcopa:et", suite=["lighteval"], - prompt_function="xcopa_et", + prompt_function=prompt.xcopa_et, hf_repo="xcopa", hf_subset="et", hf_avail_splits=["test", "train", "validation"], @@ -22180,7 +22181,7 @@ xcopa_ht_lighteval = LightevalTaskConfig( name="xcopa:ht", suite=["lighteval"], - prompt_function="xcopa_ht", + prompt_function=prompt.xcopa_ht, hf_repo="xcopa", hf_subset="ht", hf_avail_splits=["test", "train", "validation"], @@ -22198,7 +22199,7 @@ xcopa_it_lighteval = LightevalTaskConfig( name="xcopa:it", suite=["lighteval"], - prompt_function="xcopa_it", + prompt_function=prompt.xcopa_it, hf_repo="xcopa", hf_subset="it", hf_avail_splits=["test", "train", "validation"], @@ -22216,7 +22217,7 @@ xcopa_id_lighteval = LightevalTaskConfig( name="xcopa:id", suite=["lighteval"], - prompt_function="xcopa_id", + prompt_function=prompt.xcopa_id, hf_repo="xcopa", hf_subset="id", hf_avail_splits=["test", "train", "validation"], @@ -22234,7 +22235,7 @@ xcopa_qu_lighteval = LightevalTaskConfig( name="xcopa:qu", suite=["lighteval"], - prompt_function="xcopa_qu", + prompt_function=prompt.xcopa_qu, hf_repo="xcopa", hf_subset="qu", hf_avail_splits=["test", "train", "validation"], @@ -22252,7 +22253,7 @@ xcopa_sw_lighteval = LightevalTaskConfig( name="xcopa:sw", suite=["lighteval"], - prompt_function="xcopa_sw", + prompt_function=prompt.xcopa_sw, hf_repo="xcopa", hf_subset="sw", hf_avail_splits=["test", "train", "validation"], @@ -22270,7 +22271,7 @@ xcopa_zh_lighteval = LightevalTaskConfig( name="xcopa:zh", suite=["lighteval"], - prompt_function="xcopa_zh", + prompt_function=prompt.xcopa_zh, hf_repo="xcopa", hf_subset="zh", hf_avail_splits=["test", "train", "validation"], @@ -22288,7 +22289,7 @@ xcopa_ta_lighteval = LightevalTaskConfig( name="xcopa:ta", suite=["lighteval"], - prompt_function="xcopa_ta", + prompt_function=prompt.xcopa_ta, hf_repo="xcopa", hf_subset="ta", hf_avail_splits=["test", "train", "validation"], @@ -22306,7 +22307,7 @@ xcopa_th_lighteval = LightevalTaskConfig( name="xcopa:th", suite=["lighteval"], - prompt_function="xcopa_th", + prompt_function=prompt.xcopa_th, hf_repo="xcopa", hf_subset="th", hf_avail_splits=["test", "train", "validation"], @@ -22324,7 +22325,7 @@ xcopa_tr_lighteval = LightevalTaskConfig( name="xcopa:tr", suite=["lighteval"], - prompt_function="xcopa_tr", + prompt_function=prompt.xcopa_tr, hf_repo="xcopa", hf_subset="tr", hf_avail_splits=["test", "train", "validation"], @@ -22342,7 +22343,7 @@ xcopa_vi_lighteval = LightevalTaskConfig( name="xcopa:vi", suite=["lighteval"], - prompt_function="xcopa_vi", + prompt_function=prompt.xcopa_vi, hf_repo="xcopa", hf_subset="vi", hf_avail_splits=["test", "train", "validation"], @@ -22360,7 +22361,7 @@ xstory_cloze_en_lighteval = LightevalTaskConfig( name="xstory_cloze:en", suite=["lighteval"], - prompt_function="storycloze", + prompt_function=prompt.storycloze, hf_repo="juletxara/xstory_cloze", hf_subset="en", hf_avail_splits=["training", "eval"], @@ -22378,7 +22379,7 @@ xstory_cloze_ru_lighteval = LightevalTaskConfig( name="xstory_cloze:ru", suite=["lighteval"], - prompt_function="storycloze", + prompt_function=prompt.storycloze, hf_repo="juletxara/xstory_cloze", hf_subset="ru", hf_avail_splits=["training", "eval"], @@ -22396,7 +22397,7 @@ xstory_cloze_zh_lighteval = LightevalTaskConfig( name="xstory_cloze:zh", suite=["lighteval"], - prompt_function="storycloze", + prompt_function=prompt.storycloze, hf_repo="juletxara/xstory_cloze", hf_subset="zh", hf_avail_splits=["training", "eval"], @@ -22414,7 +22415,7 @@ xstory_cloze_es_lighteval = LightevalTaskConfig( name="xstory_cloze:es", suite=["lighteval"], - prompt_function="storycloze", + prompt_function=prompt.storycloze, hf_repo="juletxara/xstory_cloze", hf_subset="es", hf_avail_splits=["training", "eval"], @@ -22432,7 +22433,7 @@ xstory_cloze_ar_lighteval = LightevalTaskConfig( name="xstory_cloze:ar", suite=["lighteval"], - prompt_function="storycloze", + prompt_function=prompt.storycloze, hf_repo="juletxara/xstory_cloze", hf_subset="ar", hf_avail_splits=["training", "eval"], @@ -22450,7 +22451,7 @@ xstory_cloze_hi_lighteval = LightevalTaskConfig( name="xstory_cloze:hi", suite=["lighteval"], - prompt_function="storycloze", + prompt_function=prompt.storycloze, hf_repo="juletxara/xstory_cloze", hf_subset="hi", hf_avail_splits=["training", "eval"], @@ -22468,7 +22469,7 @@ xstory_cloze_id_lighteval = LightevalTaskConfig( name="xstory_cloze:id", suite=["lighteval"], - prompt_function="storycloze", + prompt_function=prompt.storycloze, hf_repo="juletxara/xstory_cloze", hf_subset="id", hf_avail_splits=["training", "eval"], @@ -22486,7 +22487,7 @@ xstory_cloze_te_lighteval = LightevalTaskConfig( name="xstory_cloze:te", suite=["lighteval"], - prompt_function="storycloze", + prompt_function=prompt.storycloze, hf_repo="juletxara/xstory_cloze", hf_subset="te", hf_avail_splits=["training", "eval"], @@ -22504,7 +22505,7 @@ xstory_cloze_sw_lighteval = LightevalTaskConfig( name="xstory_cloze:sw", suite=["lighteval"], - prompt_function="storycloze", + prompt_function=prompt.storycloze, hf_repo="juletxara/xstory_cloze", hf_subset="sw", hf_avail_splits=["training", "eval"], @@ -22522,7 +22523,7 @@ xstory_cloze_eu_lighteval = LightevalTaskConfig( name="xstory_cloze:eu", suite=["lighteval"], - prompt_function="storycloze", + prompt_function=prompt.storycloze, hf_repo="juletxara/xstory_cloze", hf_subset="eu", hf_avail_splits=["training", "eval"], @@ -22540,7 +22541,7 @@ xstory_cloze_my_lighteval = LightevalTaskConfig( name="xstory_cloze:my", suite=["lighteval"], - prompt_function="storycloze", + prompt_function=prompt.storycloze, hf_repo="juletxara/xstory_cloze", hf_subset="my", hf_avail_splits=["training", "eval"], @@ -22558,7 +22559,7 @@ xwinograd_en_lighteval = LightevalTaskConfig( name="xwinograd:en", suite=["lighteval"], - prompt_function="winogrande", + prompt_function=prompt.winogrande, hf_repo="Muennighoff/xwinograd", hf_subset="en", hf_avail_splits=["test"], @@ -22576,7 +22577,7 @@ xwinograd_fr_lighteval = LightevalTaskConfig( name="xwinograd:fr", suite=["lighteval"], - prompt_function="winogrande", + prompt_function=prompt.winogrande, hf_repo="Muennighoff/xwinograd", hf_subset="fr", hf_avail_splits=["test"], @@ -22594,7 +22595,7 @@ xwinograd_jp_lighteval = LightevalTaskConfig( name="xwinograd:jp", suite=["lighteval"], - prompt_function="winogrande", + prompt_function=prompt.winogrande, hf_repo="Muennighoff/xwinograd", hf_subset="jp", hf_avail_splits=["test"], @@ -22612,7 +22613,7 @@ xwinograd_pt_lighteval = LightevalTaskConfig( name="xwinograd:pt", suite=["lighteval"], - prompt_function="winogrande", + prompt_function=prompt.winogrande, hf_repo="Muennighoff/xwinograd", hf_subset="pt", hf_avail_splits=["test"], @@ -22630,7 +22631,7 @@ xwinograd_ru_lighteval = LightevalTaskConfig( name="xwinograd:ru", suite=["lighteval"], - prompt_function="winogrande", + prompt_function=prompt.winogrande, hf_repo="Muennighoff/xwinograd", hf_subset="ru", hf_avail_splits=["test"], @@ -22648,7 +22649,7 @@ xwinograd_zh_lighteval = LightevalTaskConfig( name="xwinograd:zh", suite=["lighteval"], - prompt_function="winogrande", + prompt_function=prompt.winogrande, hf_repo="Muennighoff/xwinograd", hf_subset="zh", hf_avail_splits=["test"], diff --git a/src/lighteval/tasks/extended/ifeval/main.py b/src/lighteval/tasks/extended/ifeval/main.py index c7290c3f..b3274d50 100644 --- a/src/lighteval/tasks/extended/ifeval/main.py +++ b/src/lighteval/tasks/extended/ifeval/main.py @@ -34,10 +34,22 @@ from lighteval.tasks.requests import Doc +# Very specific task where there are no precise outputs but instead we test if the format obeys rules +def ifeval_prompt(line, task_name: str = None): + return Doc( + task_name=task_name, + query=line["prompt"], + choices=[""], + gold_index=0, + instruction="", + specific={"instructions_id_list": line["instruction_id_list"], "kwargs": line["kwargs"]}, + ) + + # We create the task config ifeval = LightevalTaskConfig( name="ifeval", - prompt_function="ifeval_prompt", + prompt_function=ifeval_prompt, suite=["extended"], hf_repo="wis-k/instruction-following-eval", hf_subset="default", @@ -51,18 +63,6 @@ ) -# very specific task where there are no precise outputs but instead we test if the format obeys rules -def ifeval_prompt(line, task_name: str = None): - return Doc( - task_name=task_name, - query=line["prompt"], - choices=[""], - gold_index=0, - instruction="", - specific={"instructions_id_list": line["instruction_id_list"], "kwargs": line["kwargs"]}, - ) - - submetric_names = [ "prompt_level_strict_acc", "inst_level_strict_acc", diff --git a/src/lighteval/tasks/extended/mt_bench/main.py b/src/lighteval/tasks/extended/mt_bench/main.py index 4dfdeb41..77b8f3ee 100644 --- a/src/lighteval/tasks/extended/mt_bench/main.py +++ b/src/lighteval/tasks/extended/mt_bench/main.py @@ -21,34 +21,8 @@ # SOFTWARE. # ruff: noqa: F405, F403, F401, I001 - -import numpy as np -from aenum import extend_enum -from transformers import AutoModelForCausalLM, AutoTokenizer - -from lighteval.metrics import Metrics -from lighteval.metrics.utils import MetricCategory, MetricUseCase, SampleLevelMetric, SampleLevelMetricGrouping from lighteval.tasks.lighteval_task import LightevalTaskConfig from lighteval.tasks.requests import Doc -from lighteval.tasks.tasks_prompt_formatting import LETTER_INDICES -from colorama import Fore, Style -import os - - -task = LightevalTaskConfig( - name="mt_bench", - prompt_function="mt_bench_prompt", # must be defined in the file or imported from src/lighteval/tasks/tasks_prompt_formatting.py - suite=["extended"], - hf_repo="lighteval/mt-bench", - hf_subset="default", - hf_avail_splits=["train"], - evaluation_splits=["train"], - few_shots_split="", - few_shots_select="random", - metric=["llm_judge_multi_turn_openai"], - generation_size=1024, - stop_sequence=[], -) def mt_bench_prompt(line, task_name: str = None): @@ -71,6 +45,22 @@ def mt_bench_prompt(line, task_name: str = None): ) +task = LightevalTaskConfig( + name="mt_bench", + prompt_function=mt_bench_prompt, # must be defined in the file or imported from src/lighteval/tasks/tasks_prompt_formatting.py + suite=["extended"], + hf_repo="lighteval/mt-bench", + hf_subset="default", + hf_avail_splits=["train"], + evaluation_splits=["train"], + few_shots_split="", + few_shots_select="random", + metric=["llm_judge_multi_turn_openai"], + generation_size=1024, + stop_sequence=[], +) + + TASKS_TABLE = [task] if __name__ == "__main__": diff --git a/src/lighteval/tasks/extended/tiny_benchmarks/main.py b/src/lighteval/tasks/extended/tiny_benchmarks/main.py index a8ce41a3..866b5924 100644 --- a/src/lighteval/tasks/extended/tiny_benchmarks/main.py +++ b/src/lighteval/tasks/extended/tiny_benchmarks/main.py @@ -35,6 +35,7 @@ from aenum import extend_enum from scipy.optimize import minimize +import lighteval.tasks.tasks_prompt_formatting as prompt from lighteval.metrics import Metrics from lighteval.metrics.metrics import CorpusLevelMetricGrouping from lighteval.metrics.metrics_sample import ExactMatches, LoglikelihoodAcc @@ -186,7 +187,7 @@ def aggregate(self, y_input): "name": "winogrande", "dataset": "tinyBenchmarks/tinyWinogrande", "subset": "winogrande_xl", - "prompt": "winogrande", + "prompt": prompt.winogrande, "splits": ["train", "validation", "test"], "evaluation_split": ["validation"], }, @@ -194,7 +195,7 @@ def aggregate(self, y_input): "name": "arc", "dataset": "tinyBenchmarks/tinyAI2_arc", "subset": "ARC-Challenge", - "prompt": "arc", + "prompt": prompt.arc, "splits": ["train", "validation", "test"], "evaluation_split": ["validation"], }, @@ -202,7 +203,7 @@ def aggregate(self, y_input): "name": "hellaswag", "dataset": "tinyBenchmarks/tinyHellaswag", "subset": "default", - "prompt": "hellaswag_harness", + "prompt": prompt.hellaswag_harness, "splits": ["train", "validation", "test"], "evaluation_split": ["validation"], }, @@ -210,7 +211,7 @@ def aggregate(self, y_input): "name": "mmlu", "dataset": "tinyBenchmarks/tinyMMLU", "subset": "all", - "prompt": "mmlu_harness", + "prompt": prompt.mmlu_harness, "splits": ["validation", "dev", "test"], "evaluation_split": ["test"], }, @@ -218,7 +219,7 @@ def aggregate(self, y_input): "name": "truthfulqa", "dataset": "tinyBenchmarks/tinyTruthfulQA", "subset": "multiple_choice", - "prompt": "truthful_qa_multiple_choice", + "prompt": prompt.truthful_qa_multiple_choice, "splits": ["validation"], "evaluation_split": ["validation"], }, @@ -226,7 +227,7 @@ def aggregate(self, y_input): "name": "gsm8k", "dataset": "tinyBenchmarks/tinyGSM8k", "subset": "main", - "prompt": "gsm8k", + "prompt": prompt.gsm8k, "splits": ["train", "test"], "evaluation_split": ["test"], }, diff --git a/src/lighteval/tasks/lighteval_task.py b/src/lighteval/tasks/lighteval_task.py index fa1b1d5a..b92cb8fa 100644 --- a/src/lighteval/tasks/lighteval_task.py +++ b/src/lighteval/tasks/lighteval_task.py @@ -26,11 +26,10 @@ from dataclasses import dataclass from multiprocessing import Pool from pathlib import Path -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union from datasets import load_dataset -import lighteval.tasks.tasks_prompt_formatting as tasks_prompt_formatting from lighteval.few_shot_manager import FewShotSampler from lighteval.logging.hierarchical_logger import hlog, hlog_warn from lighteval.metrics import ( @@ -69,7 +68,7 @@ class LightevalTaskConfig: Arguments: name (str): Short name of the evaluation task. suite (list[str]): Evaluation suites to which the task belongs. - prompt_function (str): Name of the function used to create the [`Doc`] samples from each line of the evaluation dataset. + prompt_function (Callable[[dict, str], Doc]): Function used to create the [`Doc`] samples from each line of the evaluation dataset. hf_repo (str): Path of the hub dataset repository containing the evaluation information. hf_subset (str): Subset used for the current task, will be default if none is selected. hf_avail_splits (list[str]): All the available splits in the evaluation dataset @@ -89,7 +88,7 @@ class LightevalTaskConfig: """ name: str - prompt_function: str + prompt_function: Callable # [[dict, str], Doc] hf_repo: str hf_subset: str metric: Tuple[Union[str, Metrics]] @@ -203,31 +202,12 @@ def __init__( # noqa: C901 self.num_samples = [1] + [ int(metric.replace("maj_at_", "").split("_")[0]) for metric in self.metrics if "maj_at_" in metric ] + if not isinstance(cfg.prompt_function, Callable): + raise TypeError( + f"Prompt formatting function ({str(cfg.prompt_function)}) should have been passed as a callable, was {type(cfg.prompt_function)} instead." + ) + self.formatter = cfg.prompt_function - # Data processing - # to use once prompt formatting is managed as a module - if custom_tasks_module is None: - self.formatter = getattr(tasks_prompt_formatting, cfg.prompt_function) - else: - formatter = [] - for module in custom_tasks_module: - if hasattr(module, cfg.prompt_function): - formatter.append(getattr(module, cfg.prompt_function)) - - if len(formatter) == 0: # Default version - self.formatter = getattr(tasks_prompt_formatting, cfg.prompt_function) - elif len(formatter) == 1: - # If we have a prompt in both the module and our tasks_prompt_formatting - # We take the prompt from the module - if hasattr(tasks_prompt_formatting, cfg.prompt_function): - hlog_warn( - f"Be careful you are using custom prompt function {cfg.prompt_function} and not the default one." - ) - self.formatter = formatter[0] - else: - raise Exception( - f"You defined the prompt function {cfg.prompt_function} several times in the different custom modules you are loading." - ) self.generation_size = cfg.generation_size self.stop_sequence = cfg.stop_sequence self.output_regex = cfg.output_regex