Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…rness into winogender
  • Loading branch information
oskarvanderwal committed Jan 9, 2024
2 parents a2d17b3 + ecb1df2 commit 6010d8f
Show file tree
Hide file tree
Showing 37 changed files with 422 additions and 137 deletions.
14 changes: 5 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -301,14 +301,10 @@ The best way to get support is to open an issue on this repo or join the [Eleuth
## Cite as

```
@misc{eval-harness,
author = {Gao, Leo and Tow, Jonathan and Abbasi, Baber and Biderman, Stella and Black, Sid and DiPofi, Anthony and Foster, Charles and Golding, Laurence and Hsu, Jeffrey and Le Noac'h, Alain and Li, Haonan and McDonell, Kyle and Muennighoff, Niklas and Ociepa, Chris and Phang, Jason and Reynolds, Laria and Schoelkopf, Hailey and Skowron, Aviya and Sutawika, Lintang and Tang, Eric and Thite, Anish and Wang, Ben and Wang, Kevin and Zou, Andy},
title = {A framework for few-shot language model evaluation},
month = 12,
year = 2023,
publisher = {Zenodo},
version = {v0.4.0},
doi = {10.5281/zenodo.10256836},
url = {https://zenodo.org/records/10256836}
@article{gao2021framework,
title={A framework for few-shot language model evaluation},
author={Gao, Leo and Tow, Jonathan and Biderman, Stella and Black, Sid and DiPofi, Anthony and Foster, Charles and Golding, Laurence and Hsu, Jeffrey and McDonell, Kyle and Muennighoff, Niklas and others},
journal={Version v0. 0.1. Sept},
year={2021}
}
```
9 changes: 7 additions & 2 deletions lm_eval/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,9 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
if results is not None:
if args.log_samples:
samples = results.pop("samples")
dumped = json.dumps(results, indent=2, default=_handle_non_serializable)
dumped = json.dumps(
results, indent=2, default=_handle_non_serializable, ensure_ascii=False
)
if args.show_config:
print(dumped)

Expand All @@ -264,7 +266,10 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
)
filename = path.joinpath(f"{output_name}.jsonl")
samples_dumped = json.dumps(
samples[task_name], indent=2, default=_handle_non_serializable
samples[task_name],
indent=2,
default=_handle_non_serializable,
ensure_ascii=False,
)
filename.open("w").write(samples_dumped)

Expand Down
40 changes: 19 additions & 21 deletions lm_eval/api/task.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import abc
import ast
import logging
import os
import random
import re
from collections.abc import Callable
Expand Down Expand Up @@ -87,12 +86,6 @@ class TaskConfig(dict):
] = None # by default, not used in the code. allows for users to pass arbitrary info to tasks

def __post_init__(self) -> None:
if self.dataset_path and os.path.exists(os.path.dirname(self.dataset_path)):
import inspect
from importlib import import_module

self.dataset_path = inspect.getfile(import_module(self.dataset_path))

if self.generation_kwargs is not None:
if self.output_type != "generate_until":
eval_logger.warning(
Expand Down Expand Up @@ -705,11 +698,11 @@ def __init__(
)

if delimiter_has_whitespace and choice_has_whitespace:
eval_logger.warning(
f'Both target_delimiter and target choice: "{choice}" have whitespace'
eval_logger.debug(
f'Both target_delimiter "{self.config.target_delimiter}" and target choice: "{choice}" have whitespace'
)
elif (not delimiter_has_whitespace) and (not choice_has_whitespace):
eval_logger.warning(
eval_logger.debug(
f'Both target_delimiter "{self.config.target_delimiter}" and target choice: "{choice}" do not have whitespace, ignore if the language you are evaluating on does not require/use whitespace'
)

Expand Down Expand Up @@ -794,16 +787,19 @@ def fewshot_context(self, doc, num_fewshot):
)

example = self.doc_to_text(doc)
if isinstance(example, str):
return labeled_examples + example
elif isinstance(example, list):
return [labeled_examples + ex for ex in example]
elif isinstance(example, int):
if self.config.doc_to_choice is not None:
choices = self.doc_to_choice(doc)
return labeled_examples + choices[example]
else:
return labeled_examples + str(example)
if self.multiple_input:
return labeled_examples
else:
if isinstance(example, str):
return labeled_examples + example
elif isinstance(example, list):
return [labeled_examples + ex for ex in example]
elif isinstance(example, int):
if self.config.doc_to_choice is not None:
choices = self.doc_to_choice(doc)
return labeled_examples + choices[example]
else:
return labeled_examples + str(example)

def apply_filters(self):
if hasattr(self, "_filters"):
Expand Down Expand Up @@ -959,7 +955,9 @@ def construct_requests(
if self.multiple_input:
# If there are multiple inputs, choices are placed in the ctx
cont = self.doc_to_target(doc)
arguments = [(ctx, f"{target_delimiter}{cont}") for ctx in choices]
arguments = [
(ctx + choice, f"{target_delimiter}{cont}") for choice in choices
]
else:
# Otherwise they are placed in the continuation
arguments = [(ctx, f"{target_delimiter}{cont}") for cont in choices]
Expand Down
Loading

0 comments on commit 6010d8f

Please sign in to comment.