Skip to content

Commit

Permalink
Fix gguf loading via Transformers (#2596)
Browse files Browse the repository at this point in the history
* hf support load gguf file

* code review

* code review

* code clean up

* note about use_fast compat with gguf

---------

Co-authored-by: Qubitium-ModelCloud <[email protected]>
  • Loading branch information
CL-ModelCloud and Qubitium authored Jan 7, 2025
1 parent 888ac29 commit 16cfe46
Showing 1 changed file with 21 additions and 8 deletions.
29 changes: 21 additions & 8 deletions lm_eval/models/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def __init__(
delta: Optional[str] = None,
autogptq: Optional[Union[bool, str]] = False,
gptqmodel: Optional[bool] = False,
gguf_file: Optional[str] = None,
**kwargs,
) -> None:
super().__init__()
Expand Down Expand Up @@ -164,6 +165,7 @@ def __init__(
pretrained,
revision=revision,
trust_remote_code=trust_remote_code,
gguf_file=gguf_file,
)

# determine which of 'causal' and 'seq2seq' backends to use for HF models
Expand All @@ -178,6 +180,7 @@ def __init__(
revision=revision,
trust_remote_code=trust_remote_code,
use_fast_tokenizer=use_fast_tokenizer,
gguf_file=gguf_file,
)

# if we passed `pretrained` as a string, initialize our model now
Expand All @@ -196,6 +199,7 @@ def __init__(
delta=delta,
autogptq=autogptq,
gptqmodel=gptqmodel,
gguf_file=gguf_file,
**kwargs,
)

Expand Down Expand Up @@ -508,12 +512,14 @@ def _get_config(
pretrained: str,
revision: str = "main",
trust_remote_code: bool = False,
gguf_file: Optional[str] = None,
) -> None:
"""Return the model config for HuggingFace models"""
self._config = transformers.AutoConfig.from_pretrained(
pretrained,
revision=revision,
trust_remote_code=trust_remote_code,
gguf_file=gguf_file,
)

def _create_model(
Expand All @@ -535,6 +541,7 @@ def _create_model(
delta: Optional[str] = None,
autogptq: Optional[Union[bool, str]] = False,
gptqmodel: Optional[bool] = False,
gguf_file: Optional[str] = None,
**kwargs,
) -> None:
"""
Expand Down Expand Up @@ -579,6 +586,7 @@ def _create_model(
revision=revision,
torch_dtype=get_dtype(dtype),
trust_remote_code=trust_remote_code,
gguf_file=gguf_file,
**model_kwargs,
)
else:
Expand Down Expand Up @@ -676,21 +684,29 @@ def _create_tokenizer(
revision: Optional[str] = "main",
trust_remote_code: Optional[bool] = False,
use_fast_tokenizer: Optional[bool] = True,
gguf_file: Optional[str] = None,
) -> None:
"""
Helper method during initialization.
Create a tokenizer object corresponding to the correct
tokenizer for value of `pretrained`, or use the pre-initialized tokenizer passed.
"""
kwargs = {
"revision": revision,
"trust_remote_code": trust_remote_code,
}

# gguf format embeds tokenizer and is not compatible with hf tokenizer `use_fast` param
if gguf_file is not None:
kwargs["gguf_file"] = gguf_file
else:
kwargs["use_fast"] = use_fast_tokenizer

if tokenizer:
if isinstance(tokenizer, str):
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
tokenizer,
revision=revision,
trust_remote_code=trust_remote_code,
use_fast=use_fast_tokenizer,
tokenizer, **kwargs
)
else:
assert isinstance(
Expand All @@ -705,10 +721,7 @@ def _create_tokenizer(
# get the HF hub name via accessor on model
model_name = self.model.name_or_path
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
model_name,
revision=revision,
trust_remote_code=trust_remote_code,
use_fast=use_fast_tokenizer,
model_name, **kwargs
)
return None

Expand Down

0 comments on commit 16cfe46

Please sign in to comment.