Skip to content

Commit

Permalink
Merge branch 'main' into clem_homogeneize_logging
Browse files Browse the repository at this point in the history
  • Loading branch information
clefourrier authored Jul 9, 2024
2 parents ba4a8c4 + 70f7fc6 commit 8a246e5
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 7 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,17 @@ python run_evals_accelerate.py \
--output_dir "./evals"
```

### Using the dummy model
To debug or obtain random baseline scores for a given set of tasks, you can use the `dummy` model:
```shell
python run_evals_accelerate.py \
--model_args "dummy"\
--tasks <task parameters> \
--output_dir output_dir
```
This "model" randomly generates logprobs (for selection/accuracy tasks) and the string "random baseline" for generation tasks.
You can also select a specific seed for the random logprob values generated by the dummy model: `--model_args "dummy,seed=123"`.

## Deep thanks
`lighteval` was originally built on top of the great [Eleuther AI Harness](https://github.com/EleutherAI/lm-evaluation-harness) (we use the latter to power the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)). We also took a lot of inspiration from the amazing [HELM](https://crfm.stanford.edu/helm/latest/), notably for metrics.

Expand Down
89 changes: 89 additions & 0 deletions src/lighteval/models/dummy_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# MIT License
#
# Copyright (c) 2024 The HuggingFace Team
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# inspired by https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/models/dummy.py

import random
from typing import Optional

from transformers import AutoTokenizer

from lighteval.models.abstract_model import LightevalModel
from lighteval.models.model_config import DummyModelConfig, EnvConfig
from lighteval.models.model_output import GenerateReturn, LoglikelihoodReturn, LoglikelihoodSingleTokenReturn
from lighteval.tasks.requests import (
GreedyUntilRequest,
LoglikelihoodRequest,
LoglikelihoodRollingRequest,
LoglikelihoodSingleTokenRequest,
)


class DummyModel(LightevalModel):
"""Dummy model to generate random baselines."""

def __init__(
self,
config: DummyModelConfig,
env_config: EnvConfig,
):
self.config = config
self.env_config = env_config
self._random = random.Random(self.config.seed)
self._tokenizer = None

@property
def tokenizer(self):
if not self._tokenizer:
self._tokenizer = AutoTokenizer.from_pretrained("gpt2")
return self._tokenizer

@property
def add_special_tokens(self):
return False

@property
def max_length(self) -> int:
return 2048

def greedy_until(
self, requests: list[GreedyUntilRequest], override_bs: Optional[int] = None
) -> list[GenerateReturn]:
return [GenerateReturn(result="random baseline") for _ in range(len(requests))]

def loglikelihood(
self, requests: list[LoglikelihoodRequest], override_bs: Optional[int] = None
) -> list[LoglikelihoodReturn]:
return [LoglikelihoodReturn((-self._random.random(), False)) for _ in requests]

def loglikelihood_rolling(
self, requests: list[LoglikelihoodRollingRequest], override_bs: Optional[int] = None
) -> list[LoglikelihoodReturn]:
return [LoglikelihoodReturn((-self._random.random(), False)) for _ in requests]

def loglikelihood_single_token(
self, requests: list[LoglikelihoodSingleTokenRequest], override_bs: Optional[int] = None
) -> list[LoglikelihoodSingleTokenReturn]:
return [
LoglikelihoodSingleTokenReturn(result=[-self._random.random() for _ in req.tokenized_continuation])
for req in requests
]
24 changes: 21 additions & 3 deletions src/lighteval/models/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,11 @@ class TGIModelConfig:
model_id: str


@dataclass
class DummyModelConfig:
seed: int = 42


@dataclass
class InferenceModelConfig:
model: str
Expand Down Expand Up @@ -253,7 +258,16 @@ def nullable_keys() -> list[str]:
return ["namespace", "env_vars", "image_url"]


def create_model_config(args: Namespace, accelerator: Union["Accelerator", None]) -> BaseModelConfig: # noqa: C901
def create_model_config( # noqa: C901
args: Namespace, accelerator: Union["Accelerator", None]
) -> Union[
BaseModelConfig,
AdapterModelConfig,
DeltaModelConfig,
TGIModelConfig,
InferenceEndpointModelConfig,
DummyModelConfig,
]:
"""
Create a model configuration based on the provided arguments.
Expand All @@ -262,7 +276,7 @@ def create_model_config(args: Namespace, accelerator: Union["Accelerator", None]
accelerator (Union[Accelerator, None]): accelerator to use for model training.
Returns:
BaseModelConfig: model configuration.
Union[BaseModelConfig, AdapterModelConfig, DeltaModelConfig, TGIModelConfig, InferenceEndpointModelConfig, DummyModelConfig]: model configuration.
Raises:
ValueError: If both an inference server address and model arguments are provided.
Expand All @@ -271,7 +285,11 @@ def create_model_config(args: Namespace, accelerator: Union["Accelerator", None]
ValueError: If a base model is specified when not using delta weights or adapter weights.
"""
if args.model_args:
args_dict = {k.split("=")[0]: k.split("=")[1] for k in args.model_args.split(",")}
args_dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in args.model_args.split(",")}

if args_dict.pop("dummy", False):
return DummyModelConfig(**args_dict)

args_dict["accelerator"] = accelerator
args_dict["use_chat_template"] = args.use_chat_template

Expand Down
20 changes: 18 additions & 2 deletions src/lighteval/models/model_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,13 @@
from lighteval.models.adapter_model import AdapterModel
from lighteval.models.base_model import BaseModel
from lighteval.models.delta_model import DeltaModel
from lighteval.models.dummy_model import DummyModel
from lighteval.models.endpoint_model import InferenceEndpointModel
from lighteval.models.model_config import (
AdapterModelConfig,
BaseModelConfig,
DeltaModelConfig,
DummyModelConfig,
EnvConfig,
InferenceEndpointModelConfig,
InferenceModelConfig,
Expand All @@ -54,9 +56,16 @@ class ModelInfo:


def load_model( # noqa: C901
config: Union[BaseModelConfig, AdapterModelConfig, DeltaModelConfig, TGIModelConfig, InferenceEndpointModelConfig],
config: Union[
BaseModelConfig,
AdapterModelConfig,
DeltaModelConfig,
TGIModelConfig,
InferenceEndpointModelConfig,
DummyModelConfig,
],
env_config: EnvConfig,
) -> Tuple[Union[BaseModel, AdapterModel, DeltaModel, ModelClient], ModelInfo]:
) -> Tuple[Union[BaseModel, AdapterModel, DeltaModel, ModelClient, DummyModel], ModelInfo]:
"""Will load either a model from an inference server or a model from a checkpoint, depending
on the config type.
Expand All @@ -82,6 +91,9 @@ def load_model( # noqa: C901
if isinstance(config, BaseModelConfig):
return load_model_with_accelerate_or_default(config=config, env_config=env_config)

if isinstance(config, DummyModelConfig):
return load_dummy_model(config=config, env_config=env_config)


def load_model_with_tgi(config: TGIModelConfig):
if not is_tgi_available():
Expand Down Expand Up @@ -143,3 +155,7 @@ def load_model_with_accelerate_or_default(
hlog(f"Model info: {model_info}")

return model, model_info


def load_dummy_model(config: DummyModelConfig, env_config: EnvConfig):
return DummyModel(config=config, env_config=env_config), ModelInfo(model_name="dummy", model_sha=str(config.seed))
4 changes: 2 additions & 2 deletions src/lighteval/models/model_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ class ModelReturn:
result: Union[tuple, list, str]
input_tokens: list[int] = field(default_factory=list) # model inputs
generated_tokens: list[int] = field(default_factory=list) # model generations
truncated_tokens_count: Optional[int] = None # How many tokens truncated
padded_tokens_count: Optional[int] = None # How many tokens of padding
truncated_tokens_count: Optional[int] = 0 # How many tokens truncated
padded_tokens_count: Optional[int] = 0 # How many tokens of padding

def get_result_for_eval(self):
raise NotImplementedError()
Expand Down

0 comments on commit 8a246e5

Please sign in to comment.