Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for nanotron #11

Merged
merged 16 commits into from
Feb 7, 2024
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,5 @@ repos:
rev: 'v0.1.6'
hooks:
- id: ruff
args: ['--fix']
clefourrier marked this conversation as resolved.
Show resolved Hide resolved
- id: ruff-format
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ It is still an early, internal version - it should be nice to use but don't expe
In case of problems or question, feel free to open an issue!

## How to install and use
### Requirements
### Installation
0) Create your virtual environment using virtualenv or conda depending on your preferences. We require Python3.10

1) Clone the package using `git clone`, then `cd lighteval-harness`, `pip install -e .` Once the dependencies are installed, `cd src`.
Expand All @@ -22,6 +22,12 @@ Optional:

2) Add your user token to the environment variable `HUGGING_FACE_HUB_TOKEN` if you want to push your results to the hub

For the linting:
```bash
pre-commit install
pre-commit run --config .pre-commit-config.yaml --all-files
```


### Usage
- Launching on CPU
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,7 @@ optimum = ["optimum==1.12.0"]
quantization = ["bitsandbytes>=0.41.0", "auto-gptq>=0.4.2"]
adapters = ["peft==0.3.0"]
nanotron = [
"nanotron@git+https://github.com/huggingface/nanotron@8c1a49588d0745a6404644a86547c2dd6a63640e",
"brrr@git+https://github.com/huggingface/brrr@e8a503e2ec08b34eed7522d331aec3bee8cdd29b",
"nanotron@git+https://github.com/huggingface/nanotron",
clefourrier marked this conversation as resolved.
Show resolved Hide resolved
"tensorboardX"
]

Expand Down
92 changes: 92 additions & 0 deletions run_evals_accelerate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import argparse

from lighteval.main_accelerate import CACHE_DIR, main


def get_parser():
clefourrier marked this conversation as resolved.
Show resolved Hide resolved
parser = argparse.ArgumentParser()
group = parser.add_mutually_exclusive_group(required=True)
task_type_group = parser.add_mutually_exclusive_group(required=True)

# Model type 1) Base model
weight_type_group = parser.add_mutually_exclusive_group()
weight_type_group.add_argument(
"--delta_weights",
action="store_true",
default=False,
help="set to True of your model should be merged with a base model, also need to provide the base model name",
)
weight_type_group.add_argument(
"--adapter_weights",
action="store_true",
default=False,
help="set to True of your model has been trained with peft, also need to provide the base model name",
)
parser.add_argument(
"--base_model", type=str, default=None, help="name of the base model to be used for delta or adapter weights"
)

task_type_group.add_argument("--model_args")
parser.add_argument("--model_dtype", type=str, default=None)
parser.add_argument(
"--multichoice_continuations_start_space",
action="store_true",
help="Whether to force multiple choice continuations to start with a space",
)
parser.add_argument(
"--no_multichoice_continuations_start_space",
action="store_true",
help="Whether to force multiple choice continuations to not start with a space",
)
parser.add_argument("--use_chat_template", default=False, action="store_true")
# Model type 2) TGI
task_type_group.add_argument("--inference_server_address", type=str)
parser.add_argument("--inference_server_auth", type=str, default=None)
# Model type 3) Inference endpoints
task_type_group.add_argument("--endpoint_model_name", type=str)
parser.add_argument("--accelerator", type=str, default=None)
parser.add_argument("--vendor", type=str, default=None)
parser.add_argument("--region", type=str, default=None)
parser.add_argument("--instance_size", type=str, default=None)
parser.add_argument("--instance_type", type=str, default=None)
parser.add_argument("--reuse_existing", default=False, action="store_true")
# Debug
parser.add_argument("--max_samples", type=int, default=None)
parser.add_argument("--job_id", type=str, help="Optional Job ID for future reference", default="")
# Saving
parser.add_argument("--push_results_to_hub", default=False, action="store_true")
parser.add_argument("--save_details", action="store_true")
parser.add_argument("--push_details_to_hub", default=False, action="store_true")
parser.add_argument(
"--public_run", default=False, action="store_true", help="Push results and details to a public repo"
)
parser.add_argument("--cache_dir", type=str, default=CACHE_DIR)
parser.add_argument(
"--results_org",
type=str,
help="Hub organisation where you want to store the results. Your current token must have write access to it",
)
# Common parameters
parser.add_argument("--output_dir", required=True)
parser.add_argument("--override_batch_size", type=int, default=-1)
parser.add_argument("--dataset_loading_processes", type=int, default=1)
parser.add_argument(
"--custom_tasks_file",
type=str,
default=None,
help="Path to a file with custom tasks (a TASK list of dict and potentially prompt formating functions)",
)
group.add_argument(
"--tasks",
type=str,
default=None,
help="Id of a task, e.g. 'original|mmlu:abstract_algebra|5' or path to a texte file with a list of tasks",
)
parser.add_argument("--num_fewshot_seeds", type=int, default=1, help="Number of trials the few shots")
return parser


if __name__ == "__main__":
parser = get_parser()
args, unknowns = parser.parse_known_args()
main(args)
33 changes: 33 additions & 0 deletions run_evals_nanotron.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# flake8: noqa: C901
import argparse

from lighteval.main_nanotron import main


def get_parser():
parser = argparse.ArgumentParser()
parser.add_argument(
"--checkpoint-config-path",
type=str,
required=True,
help="Path to the brr checkpoint YAML or python config file, potentially on S3",
)
parser.add_argument(
"--lighteval-override",
type=str,
help="Path to an optional YAML or python Lighteval config to override part of the checkpoint Lighteval config",
)
parser.add_argument(
"--cache-dir",
type=str,
default="",
help="Cache directory",
)

return parser


if __name__ == "__main__":
parser = get_parser()
args, unknowns = parser.parse_known_args()
main(args.checkpoint_config_path, args.lighteval_override, args.cache_dir)
31 changes: 31 additions & 0 deletions src/lighteval/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,37 @@ def _sorting_criteria(self, request: GreedyUntilRequest | GreedyUntilWithLogitsR
return -(len(toks) + gen_length)


class GenerativeTaskDatasetNanotron(DynamicBatchDataset):
def __getitem__(self, index) -> Request:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you need your own class? (Is it only to return the index with the item?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nathan's requirement

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

base_model does not use the index for each sample, that means that we need to accommodate the dataset to nanotron.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes but I'm unsure why we need to grab the index for brr

"""
Get an item from the dataset depending on the split we are currently in.
For instance, if we are in split 0, we will get the item at index 0, if
we are in split 1, we will get the item at index self.split_size, etc.
Used for dynamic batching.

Args:
index (int): The index of the item.

Returns:
Any: The item at the specified index.
"""
return index, self.sorted_data[index + self.split_start]

def _sorting_criteria(self, request) -> int:
"""
Collate function for generating batches.

Args:
x (Any): The input data.

Returns:
Any: The collated data.
"""
toks = request.tokenized_context
gen_length = request.generation_size
return -(len(toks) + gen_length)


class GenDistributedSampler(DistributedSampler):
"""A distributed sampler that copy the last element only when drop_last is False so we keep a small padding in the batches
as our samples are sorted by length.
Expand Down
4 changes: 2 additions & 2 deletions src/lighteval/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import copy
from typing import Dict, Union

from pytablewriter import LatexTableWriter, MarkdownTableWriter

from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.logging.hierarchical_logger import hlog
from lighteval.models.base_model import BaseModel
Expand Down Expand Up @@ -99,8 +101,6 @@ def evaluate( # noqa: C901

def make_results_table(result_dict):
"""Generate table of results."""
from pytablewriter import LatexTableWriter, MarkdownTableWriter

md_writer = MarkdownTableWriter()
latex_writer = LatexTableWriter()
md_writer.headers = ["Task", "Version", "Metric", "Value", "", "Stderr"]
Expand Down
150 changes: 74 additions & 76 deletions src/lighteval/logging/evaluation_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,11 @@
TaskConfigLogger,
VersionsLogger,
)
from lighteval.utils import is_nanotron_available
from lighteval.utils import is_nanotron_available, obj_to_markdown


if is_nanotron_available():
from brrr.config import BrrrConfig
from brrr.experiment_loggers import obj_to_markdown
from nanotron.config import get_config_from_dict
from nanotron.config import Config, get_config_from_dict


class EnhancedJSONEncoder(json.JSONEncoder):
Expand Down Expand Up @@ -104,81 +102,81 @@ def save(

"""
hlog("Saving experiment tracker")
try:
date_id = datetime.now().isoformat().replace(":", "-")

output_dir_results = Path(output_dir) / "results" / self.general_config_logger.model_name
output_dir_details = Path(output_dir) / "details" / self.general_config_logger.model_name
output_dir_details_sub_folder = output_dir_details / date_id
output_dir_results.mkdir(parents=True, exist_ok=True)
output_dir_details_sub_folder.mkdir(parents=True, exist_ok=True)

output_results_file = output_dir_results / f"results_{date_id}.json"
output_results_in_details_file = output_dir_details / f"results_{date_id}.json"

hlog(f"Saving results to {output_results_file} and {output_results_in_details_file}")

to_dump = {
"config_general": asdict(self.general_config_logger),
"results": self.metrics_logger.metric_aggregated,
"versions": self.versions_logger.versions,
"config_tasks": self.task_config_logger.tasks_configs,
"summary_tasks": self.details_logger.compiled_details,
"summary_general": asdict(self.details_logger.compiled_details_over_all_tasks),
}
dumped = json.dumps(to_dump, cls=EnhancedJSONEncoder, indent=2)

with open(output_results_file, "w") as f:
f.write(dumped)

with open(output_results_in_details_file, "w") as f:
f.write(dumped)

for task_name, task_details in self.details_logger.details.items():
output_file_details = output_dir_details_sub_folder / f"details_{task_name}_{date_id}.parquet"
# Create a dataset from the dictionary
try:
dataset = Dataset.from_list([asdict(detail) for detail in task_details])
except Exception:
# We force cast to str to avoid formatting problems for nested objects
dataset = Dataset.from_list(
[{k: str(v) for k, v in asdict(detail).items()} for detail in task_details]
)
# try:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you remove the high level try catch, please add other try catches to prevent the other possible failures

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we sure we want to silently catch mistake or should we not rather let the run fail?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No because we still want the results to be saved locally. That way we can upload them by hand instead of having to redo the whole eval.

date_id = datetime.now().isoformat().replace(":", "-")

# We don't keep 'id' around if it's there
column_names = dataset.column_names
if "id" in dataset.column_names:
column_names = [t for t in dataset.column_names if t != "id"]

# Sort column names to make it easier later
dataset = dataset.select_columns(sorted(column_names))
# Save the dataset to a Parquet file
dataset.to_parquet(output_file_details.as_posix())

if push_results_to_hub:
self.api.upload_folder(
repo_id=self.hub_results_repo if public else self.hub_private_results_repo,
folder_path=output_dir_results,
path_in_repo=self.general_config_logger.model_name,
repo_type="dataset",
commit_message=f"Updating model {self.general_config_logger.model_name}",
)
output_dir_results = Path(output_dir) / "results" / self.general_config_logger.model_name
output_dir_details = Path(output_dir) / "details" / self.general_config_logger.model_name
output_dir_details_sub_folder = output_dir_details / date_id
output_dir_results.mkdir(parents=True, exist_ok=True)
output_dir_details_sub_folder.mkdir(parents=True, exist_ok=True)

if push_details_to_hub:
self.details_to_hub(
model_name=self.general_config_logger.model_name,
results_file_path=output_results_in_details_file,
details_folder_path=output_dir_details_sub_folder,
push_as_public=public,
)
output_results_file = output_dir_results / f"results_{date_id}.json"
output_results_in_details_file = output_dir_details / f"results_{date_id}.json"

hlog(f"Saving results to {output_results_file} and {output_results_in_details_file}")

if push_results_to_tensorboard:
self.push_results_to_tensorboard(
results=self.metrics_logger.metric_aggregated, details=self.details_logger.details
to_dump = {
"config_general": asdict(self.general_config_logger),
"results": self.metrics_logger.metric_aggregated,
"versions": self.versions_logger.versions,
"config_tasks": self.task_config_logger.tasks_configs,
"summary_tasks": self.details_logger.compiled_details,
"summary_general": asdict(self.details_logger.compiled_details_over_all_tasks),
}
dumped = json.dumps(to_dump, cls=EnhancedJSONEncoder, indent=2)

with open(output_results_file, "w") as f:
f.write(dumped)

with open(output_results_in_details_file, "w") as f:
f.write(dumped)

for task_name, task_details in self.details_logger.details.items():
output_file_details = output_dir_details_sub_folder / f"details_{task_name}_{date_id}.parquet"
# Create a dataset from the dictionary
try:
dataset = Dataset.from_list([asdict(detail) for detail in task_details])
except Exception:
# We force cast to str to avoid formatting problems for nested objects
dataset = Dataset.from_list(
[{k: str(v) for k, v in asdict(detail).items()} for detail in task_details]
)
except Exception as e:
hlog("WARNING: Could not save results")
hlog(repr(e))

# We don't keep 'id' around if it's there
column_names = dataset.column_names
if "id" in dataset.column_names:
column_names = [t for t in dataset.column_names if t != "id"]

# Sort column names to make it easier later
dataset = dataset.select_columns(sorted(column_names))
# Save the dataset to a Parquet file
dataset.to_parquet(output_file_details.as_posix())

if push_results_to_hub:
self.api.upload_folder(
repo_id=self.hub_results_repo if public else self.hub_private_results_repo,
folder_path=output_dir_results,
path_in_repo=self.general_config_logger.model_name,
repo_type="dataset",
commit_message=f"Updating model {self.general_config_logger.model_name}",
)

if push_details_to_hub:
self.details_to_hub(
model_name=self.general_config_logger.model_name,
results_file_path=output_results_in_details_file,
details_folder_path=output_dir_details_sub_folder,
push_as_public=public,
)

if push_results_to_tensorboard:
self.push_results_to_tensorboard(
results=self.metrics_logger.metric_aggregated, details=self.details_logger.details
)
# except Exception as e:
# hlog("WARNING: Could not save results")
# hlog(repr(e))

def generate_final_dict(self) -> dict:
"""Aggregates and returns all the logger's experiment information in a dictionary.
Expand Down Expand Up @@ -487,7 +485,7 @@ def push_results_to_tensorboard( # noqa: C901
if not is_nanotron_available():
hlog_warn("You cannot push results to tensorboard with having nanotron installed. Skipping")
return
config: BrrrConfig = get_config_from_dict(self.general_config_logger.config, config_class=BrrrConfig)
config: Config = get_config_from_dict(self.general_config_logger.config, config_class=Config)
lighteval_config = config.lighteval
try:
global_step = config.general.step
Expand Down
Loading
Loading