Skip to content

Commit

Permalink
Merge branch 'huggingface:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
alielfilali01 authored Mar 5, 2024
2 parents a078e8c + 458d50b commit 29c9cc2
Show file tree
Hide file tree
Showing 9 changed files with 1,216 additions and 1,174 deletions.
5 changes: 1 addition & 4 deletions .github/workflows/quality.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python setup.py egg_info
sed '/^$/q' src/lighteval.egg-info/requires.txt > src/lighteval.egg-info/requires_lite.txt
python -m pip install ruff -c src/lighteval.egg-info/requires_lite.txt
rm -rf src/lighteval.egg-info
python -m pip install ".[quality]"
- name: Code quality
run: |
make quality
2 changes: 1 addition & 1 deletion .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
cache: 'pip'
- name: Install lighteval in editable mode
run: |
pip install -e .[accelerate]
pip install -e .[dev]
- name: Get cached files
uses: actions/cache@v2
id: get-cache
Expand Down
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2024 Hugging Face

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ and pasting your access token.
Lastly, if you intend to push to the code base, you'll need to install the precommit hook for styling tests:

```bash
pip install pre-commit
pip install .[dev]
pre-commit install
```

Expand Down Expand Up @@ -237,6 +237,7 @@ Summary: create a **line summary** of your evaluation, in `src/lighteval/tasks/t
- `metric` (list), the metrics you want to use for your evaluation (see next section for a detailed explanation)
- `output_regex` (str), A regex string that will be used to filter your generation. (Genrative metrics will only select tokens that are between the first and the second sequence matched by the regex. For example, for a regex matching `\n` and a generation `\nModel generation output\nSome other text` the metric will only be fed with `Model generation output`)
- `frozen` (bool), for now is set to False, but we will steadily pass all stable tasks to True.
- `trust_dataset` (bool), set to True if you trust the dataset.

Make sure you can launch your model with your new task using `--tasks lighteval|yournewtask|2|0`.

Expand Down
3 changes: 3 additions & 0 deletions community_tasks/arabic_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(
stop_sequence=None,
output_regex=None,
frozen=False,
trust_dataset=True,
)


Expand Down Expand Up @@ -115,6 +116,7 @@ def __init__(
stop_sequence=None,
output_regex=None,
frozen=False,
trust_dataset=True,
)


Expand Down Expand Up @@ -145,6 +147,7 @@ def acva(line, task_name: str = None):
few_shots_split="validation",
few_shots_select="sequential",
metric=["loglikelihood_acc"],
trust_dataset=True,
)


Expand Down
18 changes: 9 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
# Code style
[tool.ruff]
line-length = 119

[tool.ruff.lint]
# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
# Never enforce `E501` (line length violations).
ignore = ["E501"]
select = ["C", "E", "F", "I", "W"]
line-length = 119
fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]

[tool.ruff.isort]
[tool.ruff.lint.isort]
lines-after-imports = 2
known-first-party = ["lighteval"]

Expand Down Expand Up @@ -50,12 +52,10 @@ keywords = ["evaluation", "nlp", "llm"]
dependencies = [
# Base dependencies
"transformers>=4.38.0",
"huggingface_hub==0.20.3",
"huggingface_hub>=0.21.2",
"torch>=2.0",
"GitPython==3.1.31", # for logging
"GitPython>=3.1.41", # for logging
"datasets>=2.14.0",
# Test
"pytest==7.4.0",
# Prettiness
"termcolor==2.3.0",
"pytablewriter",
Expand All @@ -64,16 +64,13 @@ dependencies = [
"aenum==3.1.15",
# Base metrics
"nltk==3.8.1",
"numpy",
"scikit-learn",
"spacy==3.7.2",
"sacrebleu",
"rouge_score==0.1.2",
"sentencepiece>=0.1.99",
"protobuf==3.20.*", # pinned for sentencepiece compat
"pycountry",
# Code style
"ruff==v0.2.2",
]

[project.optional-dependencies]
Expand All @@ -87,6 +84,9 @@ nanotron = [
"nanotron",
"tensorboardX"
]
quality = ["ruff==v0.2.2","pre-commit"]
tests = ["pytest==7.4.0"]
dev = ["lighteval[accelerate,quality,tests]"]


[project.urls]
Expand Down
24 changes: 12 additions & 12 deletions src/lighteval/tasks/lighteval_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pathlib import Path
from typing import TYPE_CHECKING, List, Optional, Tuple, Union

from datasets import load_dataset
from datasets import DownloadMode, load_dataset

from lighteval.few_shot_manager import FewShotSampler
from lighteval.logging.hierarchical_logger import hlog, hlog_warn
Expand Down Expand Up @@ -62,7 +62,7 @@ class LightevalTaskConfig:
truncated_num_docs (bool): Whether less than the total number of documents were used
output_regex (str)
frozen (bool)
trust_dataset (bool): Whether to trust the dataset at execution or not
"""

name: str
Expand All @@ -84,6 +84,8 @@ class LightevalTaskConfig:
original_num_docs: int = -1
effective_num_docs: int = -1

trust_dataset: bool = None

def as_dict(self):
return {
"name": self.name,
Expand Down Expand Up @@ -144,6 +146,7 @@ def __init__(self, name: str, cfg: LightevalTaskConfig, cache_dir: Optional[str]
self.dataset_path = self.hf_repo
self.dataset_config_name = self.hf_subset
self.dataset = None # Delayed download
self.trust_dataset = cfg.trust_dataset
hlog(f"{self.dataset_path} {self.dataset_config_name}")
self._fewshot_docs = None
self._docs = None
Expand Down Expand Up @@ -521,14 +524,10 @@ def load_datasets(tasks: list["LightevalTask"], dataset_loading_processes: int =
"""

if dataset_loading_processes <= 1:
datasets = [
download_dataset_worker((task.dataset_path, task.dataset_config_name)) for task in tasks
] # Also help us with gdb
datasets = [download_dataset_worker(task) for task in tasks] # Also help us with gdb
else:
with Pool(processes=dataset_loading_processes) as pool:
datasets = pool.map(
download_dataset_worker, [(task.dataset_path, task.dataset_config_name) for task in tasks]
)
datasets = pool.map(download_dataset_worker, tasks)

for task, dataset in zip(tasks, datasets):
task.dataset = dataset
Expand All @@ -539,13 +538,14 @@ def download_dataset_worker(args):
Worker function to download a dataset from the HuggingFace Hub.
Used for parallel dataset loading.
"""
dataset_path, dataset_config_name = args
task: LightevalTask = args
dataset = load_dataset(
path=dataset_path,
name=dataset_config_name,
path=task.dataset_path,
name=task.dataset_config_name,
data_dir=None,
cache_dir=None,
download_mode=None,
download_mode=DownloadMode.FORCE_REDOWNLOAD, # None
trust_remote_code=task.trust_dataset,
)
return dataset

Expand Down
Loading

0 comments on commit 29c9cc2

Please sign in to comment.