From 39befd2b89649a33775e365ae192c1337d4f7bdc Mon Sep 17 00:00:00 2001
From: Thomas Wolf <thomas@huggingface.co>
Date: Wed, 7 Feb 2024 09:43:06 +0000
Subject: [PATCH] update organization

---
 run_evals_accelerate.py                       | 78 +++++++++++++++++++
 run_evals_nanotron.py                         | 33 ++++++++
 src/lighteval/{main.py => main_accelerate.py} | 76 ------------------
 src/lighteval/main_nanotron.py                | 76 ++++--------------
 src/lighteval/utils.py                        |  2 +-
 5 files changed, 127 insertions(+), 138 deletions(-)
 create mode 100644 run_evals_accelerate.py
 create mode 100644 run_evals_nanotron.py
 rename src/lighteval/{main.py => main_accelerate.py} (61%)

diff --git a/run_evals_accelerate.py b/run_evals_accelerate.py
new file mode 100644
index 000000000..337a1c2f7
--- /dev/null
+++ b/run_evals_accelerate.py
@@ -0,0 +1,78 @@
+import argparse
+
+from lighteval.main_accelerate import CACHE_DIR, main
+
+
+def get_parser():
+    parser = argparse.ArgumentParser()
+    group = parser.add_mutually_exclusive_group(required=True)
+    weight_type_group = parser.add_mutually_exclusive_group()
+
+    weight_type_group.add_argument(
+        "--delta_weights",
+        action="store_true",
+        default=False,
+        help="set to True of your model should be merged with a base model, also need to provide the base model name",
+    )
+    weight_type_group.add_argument(
+        "--adapter_weights",
+        action="store_true",
+        default=False,
+        help="set to True of your model has been trained with peft, also need to provide the base model name",
+    )
+    parser.add_argument(
+        "--base_model", type=str, default=None, help="name of the base model to be used for delta or adapter weights"
+    )
+
+    parser.add_argument("--model_args", required=True)
+    parser.add_argument("--output_dir", required=True)
+    parser.add_argument("--model_dtype", type=str, default=None)
+    parser.add_argument(
+        "--multichoice_continuations_start_space",
+        action="store_true",
+        help="Whether to force multiple choice continuations starts with a space",
+    )
+    parser.add_argument(
+        "--no_multichoice_continuations_start_space",
+        action="store_true",
+        help="Whether to force multiple choice continuations do not starts with a space",
+    )
+    parser.add_argument("--push_results_to_hub", default=False, action="store_true")
+    parser.add_argument("--save_details", action="store_true")
+    parser.add_argument("--push_details_to_hub", default=False, action="store_true")
+    parser.add_argument(
+        "--public_run", default=False, action="store_true", help="Push results and details to a public repo"
+    )
+    parser.add_argument("--max_samples", type=int, default=None)
+    parser.add_argument("--override_batch_size", type=int, default=-1)
+    parser.add_argument("--dataset_loading_processes", type=int, default=1)
+    parser.add_argument("--inference_server_address", type=str, default=None)
+    parser.add_argument("--inference_server_auth", type=str, default=None)
+    parser.add_argument("--num_fewshot_seeds", type=int, default=1, help="Number of trials the few shots")
+    parser.add_argument("--cache_dir", type=str, default=CACHE_DIR)
+    parser.add_argument(
+        "--results_org",
+        type=str,
+        help="Hub organisation where you want to store the results. Your current token must have write access to it",
+    )
+    parser.add_argument("--job_id", type=str, help="Optional Job ID for future reference", default="")
+    parser.add_argument("--use_chat_template", default=False, action="store_true")
+    parser.add_argument(
+        "--custom_tasks_file",
+        type=str,
+        default=None,
+        help="Path to a file with custom tasks (a TASK list of dict and potentially prompt formating functions)",
+    )
+    group.add_argument(
+        "--tasks",
+        type=str,
+        default=None,
+        help="Id of a task, e.g. 'original|mmlu:abstract_algebra|5' or path to a texte file with a list of tasks",
+    )
+    return parser
+
+
+if __name__ == "__main__":
+    parser = get_parser()
+    args, unknowns = parser.parse_known_args()
+    main(args)
diff --git a/run_evals_nanotron.py b/run_evals_nanotron.py
new file mode 100644
index 000000000..9b98d0057
--- /dev/null
+++ b/run_evals_nanotron.py
@@ -0,0 +1,33 @@
+# flake8: noqa: C901
+import argparse
+
+from lighteval.main_nanotron import main
+
+
+def get_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--checkpoint-config-path",
+        type=str,
+        required=True,
+        help="Path to the brr checkpoint YAML or python config file, potentially on S3",
+    )
+    parser.add_argument(
+        "--lighteval-override",
+        type=str,
+        help="Path to an optional YAML or python Lighteval config to override part of the checkpoint Lighteval config",
+    )
+    parser.add_argument(
+        "--cache-dir",
+        type=str,
+        default="",
+        help="Cache directory",
+    )
+
+    return parser
+
+
+if __name__ == "__main__":
+    parser = get_parser()
+    args, unknowns = parser.parse_known_args()
+    main(args.checkpoint_config_path, args.lighteval_override, args.cache_dir)
diff --git a/src/lighteval/main.py b/src/lighteval/main_accelerate.py
similarity index 61%
rename from src/lighteval/main.py
rename to src/lighteval/main_accelerate.py
index bfb8615fb..349c8fc27 100644
--- a/src/lighteval/main.py
+++ b/src/lighteval/main_accelerate.py
@@ -1,4 +1,3 @@
-import argparse
 import os
 import random
 import shutil
@@ -32,75 +31,6 @@
     accelerator = None
 
 
-def get_parser():
-    parser = argparse.ArgumentParser()
-    group = parser.add_mutually_exclusive_group(required=True)
-    weight_type_group = parser.add_mutually_exclusive_group()
-
-    weight_type_group.add_argument(
-        "--delta_weights",
-        action="store_true",
-        default=False,
-        help="set to True of your model should be merged with a base model, also need to provide the base model name",
-    )
-    weight_type_group.add_argument(
-        "--adapter_weights",
-        action="store_true",
-        default=False,
-        help="set to True of your model has been trained with peft, also need to provide the base model name",
-    )
-    parser.add_argument(
-        "--base_model", type=str, default=None, help="name of the base model to be used for delta or adapter weights"
-    )
-
-    parser.add_argument("--model_args", required=True)
-    parser.add_argument("--output_dir", required=True)
-    parser.add_argument("--model_dtype", type=str, default=None)
-    parser.add_argument(
-        "--multichoice_continuations_start_space",
-        action="store_true",
-        help="Whether to force multiple choice continuations starts with a space",
-    )
-    parser.add_argument(
-        "--no_multichoice_continuations_start_space",
-        action="store_true",
-        help="Whether to force multiple choice continuations do not starts with a space",
-    )
-    parser.add_argument("--push_results_to_hub", default=False, action="store_true")
-    parser.add_argument("--save_details", action="store_true")
-    parser.add_argument("--push_details_to_hub", default=False, action="store_true")
-    parser.add_argument(
-        "--public_run", default=False, action="store_true", help="Push results and details to a public repo"
-    )
-    parser.add_argument("--max_samples", type=int, default=None)
-    parser.add_argument("--override_batch_size", type=int, default=-1)
-    parser.add_argument("--dataset_loading_processes", type=int, default=1)
-    parser.add_argument("--inference_server_address", type=str, default=None)
-    parser.add_argument("--inference_server_auth", type=str, default=None)
-    parser.add_argument("--num_fewshot_seeds", type=int, default=1, help="Number of trials the few shots")
-    parser.add_argument("--cache_dir", type=str, default=CACHE_DIR)
-    parser.add_argument(
-        "--results_org",
-        type=str,
-        help="Hub organisation where you want to store the results. Your current token must have write access to it",
-    )
-    parser.add_argument("--job_id", type=str, help="Optional Job ID for future reference", default="")
-    parser.add_argument("--use_chat_template", default=False, action="store_true")
-    parser.add_argument(
-        "--custom_tasks_file",
-        type=str,
-        default=None,
-        help="Path to a file with custom tasks (a TASK list of dict and potentially prompt formating functions)",
-    )
-    group.add_argument(
-        "--tasks",
-        type=str,
-        default=None,
-        help="Id of a task, e.g. 'original|mmlu:abstract_algebra|5' or path to a texte file with a list of tasks",
-    )
-    return parser
-
-
 @htrack()
 def main(args):
     env_config = EnvConfig(token=TOKEN, cache_dir=args.cache_dir)
@@ -192,9 +122,3 @@ def main(args):
         print(make_results_table(final_dict))
 
         return final_dict
-
-
-if __name__ == "__main__":
-    parser = get_parser()
-    args, unknowns = parser.parse_known_args()
-    main(args)
diff --git a/src/lighteval/main_nanotron.py b/src/lighteval/main_nanotron.py
index dcdf92657..9597f2734 100644
--- a/src/lighteval/main_nanotron.py
+++ b/src/lighteval/main_nanotron.py
@@ -1,17 +1,10 @@
 # flake8: noqa: C901
-import argparse
 import os
 import random
 from typing import Optional, Type
 
 import numpy as np
 import torch
-from nanotron import distributed as dist
-from nanotron import logging
-from nanotron.config import Config, get_config_from_file
-from nanotron.logging import get_logger, log_rank
-from nanotron.parallel.context import ParallelContext
-from nanotron.utils import local_ranks_zero_first
 
 from lighteval.evaluator import evaluate, make_results_table
 from lighteval.logging.evaluation_tracker import EvaluationTracker
@@ -20,6 +13,19 @@
 from lighteval.models.nanotron_model import NanotronLightevalModel
 from lighteval.tasks.lighteval_task import LightevalTask, create_requests_from_tasks
 from lighteval.tasks.registry import Registry, get_custom_tasks, taskinfo_selector
+from lighteval.utils import is_nanotron_available
+
+
+if is_nanotron_available():
+    from nanotron import distributed as dist
+    from nanotron import logging
+    from nanotron.config import Config, get_config_from_file
+    from nanotron.logging import get_logger, log_rank
+    from nanotron.parallel.context import ParallelContext
+    from nanotron.utils import local_ranks_zero_first
+
+else:
+    dist = None
 
 
 logger = get_logger(__name__)
@@ -29,54 +35,8 @@
 CACHE_DIR = os.getenv("HF_HOME", "/scratch")
 
 
-def get_parser():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--checkpoint-config-path",
-        type=str,
-        required=True,
-        help="Path to the brr checkpoint YAML or python config file, potentially on S3",
-    )
-    parser.add_argument(
-        "--lighteval-override",
-        type=str,
-        help="Path to an optional YAML or python Lighteval config to override part of the checkpoint Lighteval config",
-    )
-    parser.add_argument(
-        "--tokenizer",
-        type=str,
-        help="Local or hub path of an optional tokenizer (if not indicated in the checkpoint)",
-    )
-    parser.add_argument(
-        "--s5cmd-path",
-        type=str,
-        default="/admin/home/thomwolf/miniconda3/envs/b4r/bin/s5cmd",
-        help="Path to s5cmd install",
-    )
-    parser.add_argument(
-        "--s5cmd-numworkers",
-        type=int,
-        default=64,
-        help="s5cmd num workers (optional)",
-    )
-    parser.add_argument(
-        "--s5cmd-concurrency",
-        type=int,
-        default=10,
-        help="s5cmd concurrency (optional)",
-    )
-    parser.add_argument(
-        "--cache-dir",
-        type=str,
-        default="",
-        help="Cache directory",
-    )
-
-    return parser
-
-
 @htrack()
-def eval(
+def main(
     local_config_path: str,
     lighteval_config_path: Optional[str] = None,
     cache_dir: str = None,
@@ -90,7 +50,7 @@ def eval(
     dist.initialize_torch_distributed()
 
     with htrack_block("get config"):
-        if not args.checkpoint_config_path.endswith(".yaml"):
+        if not local_config_path.endswith(".yaml"):
             raise ValueError("The checkpoint path should point to a YAML file")
 
         nanotron_config: config_cls = get_config_from_file(
@@ -228,9 +188,3 @@ def eval(
         hlog(make_results_table(final_dict))
 
         return final_dict
-
-
-if __name__ == "__main__":
-    parser = get_parser()
-    args, unknowns = parser.parse_known_args()
-    eval(args.checkpoint_config_path, args.lighteval_override, args.cache_dir)
diff --git a/src/lighteval/utils.py b/src/lighteval/utils.py
index 21a6a135d..60d523999 100644
--- a/src/lighteval/utils.py
+++ b/src/lighteval/utils.py
@@ -139,7 +139,7 @@ def is_nanotron_available() -> bool:
     return importlib.util.find_spec("nanotron") is not None
 
 
-NO_NANOTRON_ERROR_MSG = "YYou requested the use of nanotron for this evaluation, but it is not available in your current environement. Please install it using pip."
+NO_NANOTRON_ERROR_MSG = "You requested the use of nanotron for this evaluation, but it is not available in your current environement. Please install it using pip."
 
 
 def is_optimum_available() -> bool: