Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
clefourrier committed Mar 5, 2024
1 parent 458d50b commit 715909a
Showing 1 changed file with 12 additions and 6 deletions.
18 changes: 12 additions & 6 deletions src/lighteval/tasks/lighteval_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,10 +524,16 @@ def load_datasets(tasks: list["LightevalTask"], dataset_loading_processes: int =
"""

if dataset_loading_processes <= 1:
datasets = [download_dataset_worker(task) for task in tasks] # Also help us with gdb
datasets = [
download_dataset_worker(task.dataset_path, task.dataset_config_name, task.trust_dataset)
for task in tasks
]
else:
with Pool(processes=dataset_loading_processes) as pool:
datasets = pool.map(download_dataset_worker, tasks)
datasets = pool.map(
download_dataset_worker,
[(task.dataset_path, task.dataset_config_name, task.trust_dataset) for task in tasks],
)

for task, dataset in zip(tasks, datasets):
task.dataset = dataset
Expand All @@ -538,14 +544,14 @@ def download_dataset_worker(args):
Worker function to download a dataset from the HuggingFace Hub.
Used for parallel dataset loading.
"""
task: LightevalTask = args
dataset_path, dataset_config_name, trust_dataset = args
dataset = load_dataset(
path=task.dataset_path,
name=task.dataset_config_name,
path=dataset_path,
name=dataset_config_name,
data_dir=None,
cache_dir=None,
download_mode=DownloadMode.FORCE_REDOWNLOAD, # None
trust_remote_code=task.trust_dataset,
trust_remote_code=trust_dataset,
)
return dataset

Expand Down

0 comments on commit 715909a

Please sign in to comment.