Merge branch 'huggingface:main' into main

huggingface · Mar 5, 2024 · 29c9cc2 · 29c9cc2
2 parents a078e8c + 458d50b
commit 29c9cc2
Show file tree

Hide file tree

Showing 9 changed files with 1,216 additions and 1,174 deletions.
diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml
@@ -24,10 +24,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          python setup.py egg_info
-          sed '/^$/q' src/lighteval.egg-info/requires.txt > src/lighteval.egg-info/requires_lite.txt
-          python -m pip install ruff -c src/lighteval.egg-info/requires_lite.txt
-          rm -rf src/lighteval.egg-info
+          python -m pip install ".[quality]"
       - name: Code quality
         run: |
           make quality
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -26,7 +26,7 @@ jobs:
          cache: 'pip'
      - name: Install lighteval in editable mode
        run: |
-         pip install -e .[accelerate]
+         pip install -e .[dev]
      - name: Get cached files
        uses: actions/cache@v2
        id: get-cache

diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Hugging Face
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -62,7 +62,7 @@ and pasting your access token.
 Lastly, if you intend to push to the code base, you'll need to install the precommit hook for styling tests:
 
 ```bash
-pip install pre-commit
+pip install .[dev]
 pre-commit install
 ```
 
@@ -237,6 +237,7 @@ Summary: create a **line summary** of your evaluation, in `src/lighteval/tasks/t
 - `metric` (list), the metrics you want to use for your evaluation (see next section for a detailed explanation)
 - `output_regex` (str), A regex string that will be used to filter your generation. (Genrative metrics will only select tokens that are between the first and the second sequence matched by the regex. For example, for a regex matching `\n` and a generation `\nModel generation output\nSome other text` the metric will only be fed with `Model generation output`)
 - `frozen` (bool), for now is set to False, but we will steadily pass all stable tasks to True.
+- `trust_dataset` (bool), set to True if you trust the dataset.
 
 Make sure you can launch your model with your new task using `--tasks lighteval|yournewtask|2|0`.
 

diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py
@@ -49,6 +49,7 @@ def __init__(
             stop_sequence=None,
             output_regex=None,
             frozen=False,
+            trust_dataset=True,
         )
 
 
@@ -115,6 +116,7 @@ def __init__(
             stop_sequence=None,
             output_regex=None,
             frozen=False,
+            trust_dataset=True,
         )
 
 
@@ -145,6 +147,7 @@ def acva(line, task_name: str = None):
     few_shots_split="validation",
     few_shots_select="sequential",
     metric=["loglikelihood_acc"],
+    trust_dataset=True,
 )
 
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,13 +1,15 @@
 # Code style
 [tool.ruff]
+line-length = 119
+
+[tool.ruff.lint]
 # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
 # Never enforce `E501` (line length violations).
 ignore = ["E501"]
 select = ["C", "E", "F", "I", "W"]
-line-length = 119
 fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
 
-[tool.ruff.isort]
+[tool.ruff.lint.isort]
 lines-after-imports = 2
 known-first-party = ["lighteval"]
 
@@ -50,12 +52,10 @@ keywords = ["evaluation", "nlp", "llm"]
 dependencies = [
     # Base dependencies
     "transformers>=4.38.0",
-    "huggingface_hub==0.20.3",
+    "huggingface_hub>=0.21.2",
     "torch>=2.0",
-    "GitPython==3.1.31", # for logging
+    "GitPython>=3.1.41", # for logging
     "datasets>=2.14.0",
-    # Test
-    "pytest==7.4.0",
     # Prettiness
     "termcolor==2.3.0",
     "pytablewriter",
@@ -64,16 +64,13 @@ dependencies = [
     "aenum==3.1.15",
     # Base metrics
     "nltk==3.8.1",
-    "numpy",
     "scikit-learn",
     "spacy==3.7.2",
     "sacrebleu",
     "rouge_score==0.1.2",
     "sentencepiece>=0.1.99",
     "protobuf==3.20.*", # pinned for sentencepiece compat
     "pycountry",
-    # Code style
-    "ruff==v0.2.2",
 ]
 
 [project.optional-dependencies]
@@ -87,6 +84,9 @@ nanotron = [
   "nanotron",
   "tensorboardX"
 ]
+quality = ["ruff==v0.2.2","pre-commit"]
+tests = ["pytest==7.4.0"]
+dev = ["lighteval[accelerate,quality,tests]"]
 
 
 [project.urls]

diff --git a/src/lighteval/tasks/lighteval_task.py b/src/lighteval/tasks/lighteval_task.py
@@ -5,7 +5,7 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, List, Optional, Tuple, Union
 
-from datasets import load_dataset
+from datasets import DownloadMode, load_dataset
 
 from lighteval.few_shot_manager import FewShotSampler
 from lighteval.logging.hierarchical_logger import hlog, hlog_warn
@@ -62,7 +62,7 @@ class LightevalTaskConfig:
         truncated_num_docs (bool): Whether less than the total number of documents were used
         output_regex (str)
         frozen (bool)
-
+        trust_dataset (bool): Whether to trust the dataset at execution or not
     """
 
     name: str
@@ -84,6 +84,8 @@ class LightevalTaskConfig:
     original_num_docs: int = -1
     effective_num_docs: int = -1
 
+    trust_dataset: bool = None
+
     def as_dict(self):
         return {
             "name": self.name,
@@ -144,6 +146,7 @@ def __init__(self, name: str, cfg: LightevalTaskConfig, cache_dir: Optional[str]
         self.dataset_path = self.hf_repo
         self.dataset_config_name = self.hf_subset
         self.dataset = None  # Delayed download
+        self.trust_dataset = cfg.trust_dataset
         hlog(f"{self.dataset_path} {self.dataset_config_name}")
         self._fewshot_docs = None
         self._docs = None
@@ -521,14 +524,10 @@ def load_datasets(tasks: list["LightevalTask"], dataset_loading_processes: int =
         """
 
         if dataset_loading_processes <= 1:
-            datasets = [
-                download_dataset_worker((task.dataset_path, task.dataset_config_name)) for task in tasks
-            ]  # Also help us with gdb
+            datasets = [download_dataset_worker(task) for task in tasks]  # Also help us with gdb
         else:
             with Pool(processes=dataset_loading_processes) as pool:
-                datasets = pool.map(
-                    download_dataset_worker, [(task.dataset_path, task.dataset_config_name) for task in tasks]
-                )
+                datasets = pool.map(download_dataset_worker, tasks)
 
         for task, dataset in zip(tasks, datasets):
             task.dataset = dataset
@@ -539,13 +538,14 @@ def download_dataset_worker(args):
     Worker function to download a dataset from the HuggingFace Hub.
     Used for parallel dataset loading.
     """
-    dataset_path, dataset_config_name = args
+    task: LightevalTask = args
     dataset = load_dataset(
-        path=dataset_path,
-        name=dataset_config_name,
+        path=task.dataset_path,
+        name=task.dataset_config_name,
         data_dir=None,
         cache_dir=None,
-        download_mode=None,
+        download_mode=DownloadMode.FORCE_REDOWNLOAD,  # None
+        trust_remote_code=task.trust_dataset,
     )
     return dataset