From 4550cb7e119971c204e000d9c176740235855c9a Mon Sep 17 00:00:00 2001 From: Nathan Habib <30601243+NathanHB@users.noreply.github.com> Date: Wed, 17 Jul 2024 11:16:13 +0200 Subject: [PATCH] launch lighteval using `lighteval --args` (#152) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --------- Co-authored-by: Nathan Habib Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com> --- README.md | 35 +++-- examples/model_configs/endpoint_model.yaml | 2 +- examples/model_configs/tgi_model.yaml | 2 +- pyproject.toml | 2 +- run_evals_accelerate.py | 89 ------------ run_evals_nanotron.py | 55 -------- src/lighteval/__main__.py | 65 +++++++++ src/lighteval/commands/lighteval_cli.py | 150 --------------------- src/lighteval/parsers.py | 128 ++++++++++++++++++ src/lighteval/tasks/registry.py | 14 ++ tests/test_main.py | 6 +- 11 files changed, 234 insertions(+), 314 deletions(-) delete mode 100644 run_evals_accelerate.py delete mode 100644 run_evals_nanotron.py create mode 100644 src/lighteval/__main__.py delete mode 100644 src/lighteval/commands/lighteval_cli.py create mode 100644 src/lighteval/parsers.py diff --git a/README.md b/README.md index 10364fe4b..a6dfbc482 100644 --- a/README.md +++ b/README.md @@ -78,8 +78,8 @@ pre-commit install We provide two main entry points to evaluate models: -* `run_evals_accelerate.py`: evaluate models on CPU or one or more GPUs using [🤗 Accelerate](https://github.com/huggingface/accelerate). -* `run_evals_nanotron.py`: evaluate models in distributed settings using [⚡️ Nanotron](https://github.com/huggingface/nanotron). +* `lighteval accelerate`: evaluate models on CPU or one or more GPUs using [🤗 Accelerate](https://github.com/huggingface/accelerate). +* `lighteval nanotron`: evaluate models in distributed settings using [⚡️ Nanotron](https://github.com/huggingface/nanotron). For most users, we recommend using the 🤗 Accelerate backend - see below for specific commands. @@ -94,7 +94,8 @@ accelerate config You can then evaluate a model using data parallelism as follows: ```shell -accelerate launch --multi_gpu --num_processes= run_evals_accelerate.py \ +accelerate launch --multi_gpu --num_processes= -m \ + lighteval accelerate \ --model_args="pretrained=" \ --tasks \ --output_dir output_dir @@ -109,7 +110,8 @@ suite|task|num_few_shot|{0 or 1 to automatically reduce `num_few_shot` if prompt or a file path like [`examples/tasks/recommended_set.txt`](./examples/tasks/recommended_set.txt) which specifies multiple task configurations. For example, to evaluate GPT-2 on the Truthful QA benchmark run: ```shell -accelerate launch --multi_gpu --num_processes=8 run_evals_accelerate.py \ +accelerate launch --multi_gpu --num_processes=8 -m \ + lighteval accelerate \ --model_args "pretrained=gpt2" \ --tasks "lighteval|truthfulqa:mc|0|0" \ --override_batch_size 1 \ @@ -119,7 +121,8 @@ accelerate launch --multi_gpu --num_processes=8 run_evals_accelerate.py \ Here, `--override_batch_size` defines the _batch size per device_, so the effective batch size will be `override_batch_size x num_gpus`. To evaluate on multiple benchmarks, separate each task configuration with a comma, e.g. ```shell -accelerate launch --multi_gpu --num_processes=8 run_evals_accelerate.py \ +accelerate launch --multi_gpu --num_processes=8 -m \ + lighteval accelerate \ --model_args "pretrained=gpt2" \ --tasks "leaderboard|truthfulqa:mc|0|0,leaderboard|gsm8k|0|0" \ --override_batch_size 1 \ @@ -133,7 +136,8 @@ See the [`examples/tasks/recommended_set.txt`](./examples/tasks/recommended_set. If you want to evaluate a model by spinning up inference endpoints, use adapter/delta weights, or more complex configuration options, you can load models using a configuration file. This is done as follows: ```shell -accelerate launch --multi_gpu --num_processes= run_evals_accelerate.py \ +accelerate launch --multi_gpu --num_processes= -m \ + lighteval accelerate \ --model_config_path="" \ --tasks \ --output_dir output_dir @@ -147,13 +151,15 @@ To evaluate models larger that ~40B parameters in 16-bit precision, you will nee ```shell # PP=2, DP=4 - good for models < 70B params -accelerate launch --multi_gpu --num_processes=4 run_evals_accelerate.py \ +accelerate launch --multi_gpu --num_processes=4 -m \ + lighteval accelerate \ --model_args="pretrained=,model_parallel=True" \ --tasks \ --output_dir output_dir # PP=4, DP=2 - good for huge models >= 70B params -accelerate launch --multi_gpu --num_processes=2 run_evals_accelerate.py \ +accelerate launch --multi_gpu --num_processes=2 -m \ + lighteval accelerate \ --model_args="pretrained=,model_parallel=True" \ --tasks \ --output_dir output_dir @@ -164,7 +170,8 @@ accelerate launch --multi_gpu --num_processes=2 run_evals_accelerate.py \ To evaluate a model on all the benchmarks of the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) using a single node of 8 GPUs, run: ```shell -accelerate launch --multi_gpu --num_processes=8 run_evals_accelerate.py \ +accelerate launch --multi_gpu --num_processes=8 -m \ + lighteval accelerate \ --model_args "pretrained=" \ --tasks examples/tasks/open_llm_leaderboard_tasks.txt \ --override_batch_size 1 \ @@ -176,7 +183,7 @@ accelerate launch --multi_gpu --num_processes=8 run_evals_accelerate.py \ You can also use `lighteval` to evaluate models on CPU, although note this will typically be very slow for large models. To do so, run: ```shell -python run_evals_accelerate.py \ +lighteval accelerate \ --model_args="pretrained="\ --tasks \ --output_dir output_dir @@ -211,7 +218,7 @@ Independently of the default tasks provided in `lighteval` that you will find in For example, to run an extended task like `ifeval`, you can run: ```shell -python run_evals_accelerate.py \ +lighteval accelerate \ --model_args "pretrained=HuggingFaceH4/zephyr-7b-beta" \ --use_chat_template \ # optional, if you want to run the evaluation with the chat template --tasks "extended|ifeval|0|0" \ @@ -221,7 +228,7 @@ python run_evals_accelerate.py \ To run a community or custom task, you can use (note the custom_tasks flag): ```shell -python run_evals_accelerate.py \ +lighteval accelerate \ --model_args="pretrained="\ --tasks \ --custom_tasks \ @@ -231,7 +238,7 @@ python run_evals_accelerate.py \ For example, to launch `lighteval` on `arabic_mmlu:abstract_algebra` for `HuggingFaceH4/zephyr-7b-beta`, run: ```shell -python run_evals_accelerate.py \ +lighteval accelerate \ --model_args "pretrained=HuggingFaceH4/zephyr-7b-beta" \ --use_chat_template \ # optional, if you want to run the evaluation with the chat template --tasks "community|arabic_mmlu:abstract_algebra|5|1" \ @@ -464,7 +471,7 @@ source /activate #or conda activate yourenv cd /lighteval export CUDA_LAUNCH_BLOCKING=1 -srun accelerate launch --multi_gpu --num_processes=8 run_evals_accelerate.py --model_args "pretrained=your model name" --tasks examples/tasks/open_llm_leaderboard_tasks.txt --override_batch_size 1 --save_details --output_dir=your output dir +srun accelerate launch --multi_gpu --num_processes=8 -m lighteval accelerate --model_args "pretrained=your model name" --tasks examples/tasks/open_llm_leaderboard_tasks.txt --override_batch_size 1 --save_details --output_dir=your output dir ``` ## Releases diff --git a/examples/model_configs/endpoint_model.yaml b/examples/model_configs/endpoint_model.yaml index 9e0db4374..2834cdd28 100644 --- a/examples/model_configs/endpoint_model.yaml +++ b/examples/model_configs/endpoint_model.yaml @@ -16,7 +16,7 @@ model: endpoint_type: "protected" namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace image_url: null # Optionally specify the docker image to use when launching the endpoint model. E.g., launching models with later releases of the TGI container with support for newer models. - env_vars: + env_vars: null # Optional environment variables to include when launching the endpoint. e.g., `MAX_INPUT_LENGTH: 2048` generation: add_special_tokens: true diff --git a/examples/model_configs/tgi_model.yaml b/examples/model_configs/tgi_model.yaml index 5e45641f9..82ac50a79 100644 --- a/examples/model_configs/tgi_model.yaml +++ b/examples/model_configs/tgi_model.yaml @@ -3,4 +3,4 @@ model: instance: inference_server_address: "" inference_server_auth: null - model_id: null # Optional, only required if the TGI container was launched with model_id pointing to a local directory \ No newline at end of file + model_id: null # Optional, only required if the TGI container was launched with model_id pointing to a local directory diff --git a/pyproject.toml b/pyproject.toml index b771942d3..95f74147b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,4 +102,4 @@ Issues = "https://github.com/huggingface/lighteval/issues" # Changelog = "https://github.com/huggingface/lighteval/blob/master/CHANGELOG.md" [project.scripts] -lighteval = "lighteval.commands.lighteval_cli:main" +lighteval = "lighteval.__main__:cli_evaluate" diff --git a/run_evals_accelerate.py b/run_evals_accelerate.py deleted file mode 100644 index d623de256..000000000 --- a/run_evals_accelerate.py +++ /dev/null @@ -1,89 +0,0 @@ -# MIT License - -# Copyright (c) 2024 The HuggingFace Team - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -"""Example run command: -accelerate config -accelerate launch run_evals_accelerate.py --tasks="leaderboard|hellaswag|5|1" --output_dir "/scratch/evals" --model_args "pretrained=gpt2" -""" - -import argparse - -from lighteval.main_accelerate import CACHE_DIR, main - - -def get_parser(): - parser = argparse.ArgumentParser() - group = parser.add_mutually_exclusive_group(required=True) - task_type_group = parser.add_mutually_exclusive_group(required=True) - - # Model type: either use a config file or simply the model name - task_type_group.add_argument("--model_config_path") - task_type_group.add_argument("--model_args") - - # Debug - parser.add_argument("--max_samples", type=int, default=None) - parser.add_argument("--override_batch_size", type=int, default=-1) - parser.add_argument("--job_id", type=str, help="Optional Job ID for future reference", default="") - # Saving - parser.add_argument("--output_dir", required=True) - parser.add_argument("--push_results_to_hub", default=False, action="store_true") - parser.add_argument("--save_details", action="store_true") - parser.add_argument("--push_details_to_hub", default=False, action="store_true") - parser.add_argument("--push_results_to_tensorboard", default=False, action="store_true") - parser.add_argument( - "--public_run", default=False, action="store_true", help="Push results and details to a public repo" - ) - parser.add_argument( - "--cache_dir", - type=str, - default=CACHE_DIR, - help="Cache directory for downloaded datasets & model, defaults to `HF_HOME` environment variable", - ) - parser.add_argument( - "--results_org", - type=str, - help="Hub organisation where you want to store the results. Your current token must have write access to it", - ) - # Common parameters - parser.add_argument("--use_chat_template", default=False, action="store_true") - parser.add_argument("--system_prompt", type=str, default=None) - parser.add_argument("--dataset_loading_processes", type=int, default=1) - parser.add_argument( - "--custom_tasks", - type=str, - default=None, - help="Path to a file with custom tasks (a TASK list of dict and potentially prompt formatting functions)", - ) - group.add_argument( - "--tasks", - type=str, - default=None, - help="Comma-separated ids of tasks, e.g. 'original|mmlu:abstract_algebra|5' or path to a text file with a list of tasks", - ) - parser.add_argument("--num_fewshot_seeds", type=int, default=1, help="Number of trials the few shots") - return parser - - -if __name__ == "__main__": - parser = get_parser() - args, unknowns = parser.parse_known_args() - main(args) diff --git a/run_evals_nanotron.py b/run_evals_nanotron.py deleted file mode 100644 index 3a4a2a421..000000000 --- a/run_evals_nanotron.py +++ /dev/null @@ -1,55 +0,0 @@ -# MIT License - -# Copyright (c) 2024 The HuggingFace Team - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -# flake8: noqa: C901 -import argparse - -from lighteval.main_nanotron import main - - -def get_parser(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--checkpoint-config-path", - type=str, - required=True, - help="Path to the brr checkpoint YAML or python config file, potentially on S3", - ) - parser.add_argument( - "--lighteval-override", - type=str, - help="Path to an optional YAML or python Lighteval config to override part of the checkpoint Lighteval config", - ) - parser.add_argument( - "--cache-dir", - type=str, - default=None, - help="Cache directory", - ) - - return parser - - -if __name__ == "__main__": - parser = get_parser() - args, unknowns = parser.parse_known_args() - main(args.checkpoint_config_path, args.lighteval_override, args.cache_dir) diff --git a/src/lighteval/__main__.py b/src/lighteval/__main__.py new file mode 100644 index 000000000..9deb09251 --- /dev/null +++ b/src/lighteval/__main__.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python + +# MIT License + +# Copyright (c) 2024 Taratra D. RAHARISON and The HuggingFace Team + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import argparse + +from lighteval.parsers import parser_accelerate, parser_nanotron +from lighteval.tasks.registry import Registry + + +def cli_evaluate(): + parser = argparse.ArgumentParser(description="CLI tool for lighteval, a lightweight framework for LLM evaluation") + subparsers = parser.add_subparsers(help="help for subcommand", dest="subcommand") + + # create the parser for the "accelerate" command + parser_a = subparsers.add_parser("accelerate", help="use accelerate and transformers as backend for evaluation.") + parser_accelerate(parser_a) + + # create the parser for the "nanotron" command + parser_b = subparsers.add_parser("nanotron", help="use nanotron as backend for evaluation.") + parser_nanotron(parser_b) + + parser.add_argument("--list-tasks", action="store_true", help="List available tasks") + + args = parser.parse_args() + + if args.subcommand == "accelerate": + from lighteval.main_accelerate import main as main_accelerate + + main_accelerate(args) + return + + if args.subcommand == "nanotron": + from lighteval.main_nanotron import main as main_nanotron + + main_nanotron(args.checkpoint_config_path, args.lighteval_override, args.cache_dir) + return + + if args.list_tasks: + Registry(cache_dir="").print_all_tasks() + return + + +if __name__ == "__main__": + cli_evaluate() diff --git a/src/lighteval/commands/lighteval_cli.py b/src/lighteval/commands/lighteval_cli.py deleted file mode 100644 index 618663342..000000000 --- a/src/lighteval/commands/lighteval_cli.py +++ /dev/null @@ -1,150 +0,0 @@ -#!/usr/bin/env python - -# MIT License - -# Copyright (c) 2024 Taratra D. RAHARISON and The HuggingFace Team - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import argparse -import importlib -import json -import os - -import pkg_resources - - -def load_tasks_table_extended(module_name: any) -> list: - """ - load the module module_name - - Args: - - module_name the name of the module we want to load - Returns: - - TASKS_TABLE: a list of the task in the module - """ - module_path = f"lighteval.tasks.extended.{module_name}.main" - module_loaded = importlib.import_module(module_path) - tasks_list = None - try: - tasks_list = module_loaded.TASKS_TABLE - except Exception as e: - print(e) - return tasks_list if tasks_list is not None else [] - - -def get_tasks_table_json() -> list: - """ - Fetch tasks/tasks_table.jsonl - Returns - - a list of all the tasks in tasks/tasks_table.jsonl - """ - tasks = [] - # Handling tasks_table.jsonl - # Get the path to the resource file - tasks_table_path = pkg_resources.resource_filename("lighteval", "tasks/tasks_table.jsonl") - with open(tasks_table_path) as jsonl_tasks_table: - jsonl_tasks_table_content = jsonl_tasks_table.read() - for jline in jsonl_tasks_table_content.splitlines(): - tasks.append(json.loads(jline)) - return tasks - - -def get_extended_tasks() -> list: - """ - Fetch all the tasks in the extended suite - Returns - - a list of all the extended tasks - """ - tasks_extended = [] - extended_tasks_dir = pkg_resources.resource_filename("lighteval", "tasks/extended") - for root, dirs, files in os.walk(extended_tasks_dir): - for file in files: - if file == "main.py": - module_name = os.path.basename(root) - tasks_table = load_tasks_table_extended(module_name) - tasks_extended += tasks_table - return tasks_extended - - -def group_by_suite(tasks: list, tasks_extended: list) -> dict: - """ - Group tasks by suite and sort them alphabetically - Args: - - tasks: list of tasks in tasks/tasks_table.jsonl - - tasks_extended: list of extended tasks - Returns: - - a dict of tasks grouped by suite - """ - grouped_by_suite = {} - for task in tasks: - for suite in task["suite"]: - if suite not in grouped_by_suite.keys(): - grouped_by_suite[suite] = [task["name"]] - else: - grouped_by_suite[suite].append(task["name"]) - grouped_by_suite[suite].sort() - - grouped_by_suite["extended"] = [] - # Adding extended suite - for task in tasks_extended: - grouped_by_suite["extended"].append(task["name"]) - grouped_by_suite["extended"].sort() - return grouped_by_suite - - -def list_tasks_command(): - """ - List all the available tasks in tasks_table.jsonl and the extended directory - Assumes the existence of TASKS_TABLE in the main.py file for each extended - tasks in tasks/extended - """ - try: - # Handling tasks_table.jsonl - tasks = get_tasks_table_json() - - # Handling extended tasks - tasks_extended = get_extended_tasks() - - # Grouping by suite the tasks - grouped_by_suite = group_by_suite(tasks, tasks_extended) - - # Print tasks - print("Available tasks: (Grouped by suite)\n") - for suite, task_list in grouped_by_suite.items(): - print("- " + suite) - for task in task_list: - print("\t - " + task) - except Exception as e: - print("Error: ", e) - - -def main(): - parser = argparse.ArgumentParser(description="CLI tool for lighteval, a lightweight framework for LLM evaluation") - parser.add_argument("--list-tasks", action="store_true", help="List available tasks") - args = parser.parse_args() - - if args.list_tasks: - list_tasks_command() - else: - parser.print_help() - - -if __name__ == "__main__": - main() diff --git a/src/lighteval/parsers.py b/src/lighteval/parsers.py new file mode 100644 index 000000000..d05ba312f --- /dev/null +++ b/src/lighteval/parsers.py @@ -0,0 +1,128 @@ +# MIT License + +# Copyright (c) 2024 The HuggingFace Team + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +import argparse +import os + + +TOKEN = os.getenv("HF_TOKEN") +CACHE_DIR = os.getenv("HF_HOME") + + +def parser_accelerate(parser=None): + if parser is None: + parser = argparse.ArgumentParser( + description="CLI tool for lighteval, a lightweight framework for LLM evaluation" + ) + + group = parser.add_mutually_exclusive_group(required=True) + task_type_group = parser.add_mutually_exclusive_group(required=True) + + # Model type: either use a config file or simply the model name + task_type_group.add_argument( + "--model_config_path", + type=str, + help="Path to the model config file, e.g. 'examples/model_configs/base_model.yaml'", + ) + task_type_group.add_argument( + "--model_args", + type=str, + help="Model arguments to pass to the model class, e.g. 'pretrained=gpt2,dtype=float16'", + ) + + # Debug + parser.add_argument("--max_samples", type=int, default=None, help="Maximum number of samples to evaluate on") + parser.add_argument("--override_batch_size", type=int, default=-1) + parser.add_argument("--job_id", type=str, help="Optional Job ID for future reference", default="") + + # Saving + parser.add_argument("--output_dir", required=True, type=str, help="Directory to save the results") + parser.add_argument( + "--push_results_to_hub", default=False, action="store_true", help="Set to push the results to the hub" + ) + parser.add_argument("--save_details", action="store_true", help="Save the details of the run in the output_dir") + parser.add_argument( + "--push_details_to_hub", default=False, action="store_true", help="Set to push the details to the hub" + ) + parser.add_argument("--push_results_to_tensorboard", default=False, action="store_true") + parser.add_argument( + "--public_run", default=False, action="store_true", help="Push results and details to a public repo" + ) + parser.add_argument( + "--cache_dir", type=str, default=CACHE_DIR, help="Cache directory used to store datasets and models" + ) + parser.add_argument( + "--results_org", + type=str, + help="Hub organisation where you want to store the results. Your current token must have write access to it", + ) + # Common parameters + parser.add_argument( + "--use_chat_template", + default=False, + action="store_true", + help="Use the chat template (from the model's tokenizer) for the prompt", + ) + parser.add_argument( + "--system_prompt", type=str, default=None, help="System prompt to use, e.g. 'You are a helpful assistant.'" + ) + parser.add_argument( + "--dataset_loading_processes", type=int, default=1, help="Number of processes to use for loading the datasets" + ) + parser.add_argument( + "--custom_tasks", + type=str, + default=None, + help="Path to a file with custom tasks (a TASK list of dict and potentially prompt formating functions)", + ) + group.add_argument( + "--tasks", + type=str, + default=None, + help="Id of a task, e.g. 'original|mmlu:abstract_algebra|5' or path to a texte file with a list of tasks", + ) + parser.add_argument("--num_fewshot_seeds", type=int, default=1, help="Number of trials the few shots") + return parser + + +def parser_nanotron(parser=None): + if parser is None: + parser = argparse.ArgumentParser( + description="CLI tool for lighteval, a lightweight framework for LLM evaluation" + ) + + parser.add_argument( + "--checkpoint-config-path", + type=str, + required=True, + help="Path to the brr checkpoint YAML or python config file, potentially on S3", + ) + parser.add_argument( + "--lighteval-override", + type=str, + help="Path to an optional YAML or python Lighteval config to override part of the checkpoint Lighteval config", + ) + parser.add_argument( + "--cache-dir", + type=str, + default=None, + help="Cache directory", + ) diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py index ef575b7e1..28597763f 100644 --- a/src/lighteval/tasks/registry.py +++ b/src/lighteval/tasks/registry.py @@ -23,6 +23,7 @@ import collections import importlib import os +from itertools import groupby from pathlib import Path from pprint import pformat from types import ModuleType @@ -152,6 +153,19 @@ def get_task_dict( return tasks_dict + def print_all_tasks(self): + """ + Print all the tasks in the task registry. + """ + tasks_names = list(self.TASK_REGISTRY.keys()) + tasks_names.sort() + for suite, g in groupby(tasks_names, lambda x: x.split("|")[0]): + tasks_names = list(g) + tasks_names.sort() + print(f"\n- {suite}:") + for task_name in tasks_names: + print(f" - {task_name}") + def create_custom_tasks_module(custom_tasks: Union[str, Path, ModuleType]) -> ModuleType: """Creates a custom task module to load tasks defined by the user in their own file. diff --git a/tests/test_main.py b/tests/test_main.py index 00798cb4f..27816c8b2 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -27,7 +27,7 @@ from pytest import approx from lighteval.main_accelerate import main # noqa: E402 -from run_evals_accelerate import get_parser +from lighteval.parsers import parser_accelerate from tests.reference_scores.reference_task_scores import RESULTS_FULL, RESULTS_LITE # noqa: E402 from tests.reference_scores.reference_tasks import ALL_SUBSETS @@ -58,7 +58,7 @@ def run_model_predictions_full(model: str, tasks: list): "1", "--save_details", ] - parser = get_parser() + parser = parser_accelerate() args = parser.parse_args(lighteval_args) results = main(args) return results @@ -77,7 +77,7 @@ def run_model_predictions_lite(model: str, tasks: list): "--save_details", ] lighteval_args += ["--max_samples", "10"] - parser = get_parser() + parser = parser_accelerate() args = parser.parse_args(lighteval_args) results = main(args) return results