Skip to content

Commit

Permalink
Merge branch 'main' into hynek_function
Browse files Browse the repository at this point in the history
  • Loading branch information
clefourrier committed Jul 9, 2024
2 parents 8f98337 + 3aaec22 commit 7d2afa4
Show file tree
Hide file tree
Showing 8 changed files with 139 additions and 134 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ Install the dependencies. For the default installation, you just need:
pip install .
```

If you want to evaluate models with frameworks like `accelerate` or `peft`, you will need to specify the optional dependencies group that fits your use case (`accelerate`,`tgi`,`optimum`,`quantization`,`adapters`,`nanotron`):
If you want to evaluate models with frameworks like `accelerate` or `peft`, you will need to specify the optional dependencies group that fits your use case (`accelerate`,`tgi`,`optimum`,`quantization`,`adapters`,`nanotron`,`tensorboardX`):

```bash
pip install '.[optional1,optional2]'
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ keywords = ["evaluation", "nlp", "llm"]
dependencies = [
# Base dependencies
"transformers>=4.38.0",
"huggingface_hub>=0.22.0",
"huggingface_hub>=0.23.0",
"torch>=2.0",
"GitPython>=3.1.41", # for logging
"datasets>=2.14.0",
Expand Down Expand Up @@ -86,6 +86,7 @@ nanotron = [
"nanotron",
"tensorboardX"
]
tensorboardX = ["tensorboardX"]
quality = ["ruff==v0.2.2","pre-commit"]
tests = ["pytest==7.4.0"]
dev = ["lighteval[accelerate,quality,tests]"]
Expand Down
1 change: 1 addition & 0 deletions run_evals_accelerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def get_parser():
parser.add_argument("--push_results_to_hub", default=False, action="store_true")
parser.add_argument("--save_details", action="store_true")
parser.add_argument("--push_details_to_hub", default=False, action="store_true")
parser.add_argument("--push_results_to_tensorboard", default=False, action="store_true")
parser.add_argument(
"--public_run", default=False, action="store_true", help="Push results and details to a public repo"
)
Expand Down
230 changes: 106 additions & 124 deletions src/lighteval/logging/evaluation_tracker.py

Large diffs are not rendered by default.

14 changes: 10 additions & 4 deletions src/lighteval/main_accelerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,15 @@
@htrack()
def main(args):
env_config = EnvConfig(token=TOKEN, cache_dir=args.cache_dir)
evaluation_tracker = EvaluationTracker(hub_results_org=args.results_org, token=TOKEN)
evaluation_tracker = EvaluationTracker(
output_dir=args.output_dir,
hub_results_org=args.results_org,
push_results_to_hub=args.push_results_to_hub,
push_details_to_hub=args.push_details_to_hub,
push_results_to_tensorboard=args.push_results_to_tensorboard,
public=args.public_run,
token=TOKEN,
)
evaluation_tracker.general_config_logger.log_args_info(
args.num_fewshot_seeds, args.override_batch_size, args.max_samples, args.job_id
)
Expand Down Expand Up @@ -124,9 +132,7 @@ def main(args):
evaluation_tracker.details_logger.aggregate()

if args.output_dir:
evaluation_tracker.save(
args.output_dir, args.push_results_to_hub, args.push_details_to_hub, args.public_run
)
evaluation_tracker.save()

final_dict = evaluation_tracker.generate_final_dict()

Expand Down
8 changes: 7 additions & 1 deletion src/lighteval/main_nanotron.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,13 @@ def main(
data_parallel_size=lighteval_config.parallelism.dp,
)

evaluation_tracker = EvaluationTracker(token=TOKEN)
evaluation_tracker = EvaluationTracker(
token=TOKEN,
output_dir=lighteval_config.logging.local_output_path,
hub_results_org=lighteval_config.logging.hub_repo_tensorboard,
tensorboard_metric_prefix=lighteval_config.logging.tensorboard_metric_prefix,
nanotron_run_info=nanotron_config.general,
)
evaluation_tracker.general_config_logger.log_args_info(
num_fewshot_seeds=1,
override_batch_size=None,
Expand Down
6 changes: 3 additions & 3 deletions src/lighteval/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,15 +301,15 @@ class Metrics(Enum):
sample_level_fn=LoglikelihoodPreparator().prepare,
category=MetricCategory.MULTICHOICE,
use_case=MetricUseCase.ACCURACY,
corpus_level_fn=CorpusLevelF1Score(None),
corpus_level_fn=CorpusLevelF1Score(None).compute,
higher_is_better=True,
)
loglikelihood_f1_single_token = CorpusLevelMetric(
metric="loglikelihood_f1",
sample_level_fn=LoglikelihoodPreparator(is_single_token=True).prepare,
category=MetricCategory.MULTICHOICE_ONE_TOKEN,
use_case=MetricUseCase.ACCURACY,
corpus_level_fn=CorpusLevelF1Score(None),
corpus_level_fn=CorpusLevelF1Score(None).compute,
higher_is_better=True,
)
mcc = CorpusLevelMetric(
Expand Down Expand Up @@ -385,7 +385,7 @@ class Metrics(Enum):
sample_level_fn=LoglikelihoodPreparator(is_single_token=True).prepare,
category=MetricCategory.MULTICHOICE_ONE_TOKEN,
use_case=MetricUseCase.ACCURACY,
corpus_level_fn=CorpusLevelF1Score(average=None, num_classes=3),
corpus_level_fn=CorpusLevelF1Score(average=None, num_classes=3).compute,
higher_is_better=True,
)
perfect_exact_match = SampleLevelMetric(
Expand Down
9 changes: 9 additions & 0 deletions src/lighteval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,15 @@ def is_peft_available() -> bool:
NO_PEFT_ERROR_MSG = "You are trying to use adapter weights models, for which you need `peft`, which is not available in your environment. Please install it using pip."


def is_tensorboardX_available() -> bool:
return importlib.util.find_spec("tensorboardX") is not None


NO_TENSORBOARDX_WARN_MSG = (
"You are trying to log using tensorboardX, which is not installed. Please install it using pip. Skipping."
)


def is_openai_available() -> bool:
return importlib.util.find_spec("openai") is not None

Expand Down

0 comments on commit 7d2afa4

Please sign in to comment.