From 16cfe464a11807ff9a6a4cbdafb6e137826464e4 Mon Sep 17 00:00:00 2001 From: CL-ModelCloud Date: Tue, 7 Jan 2025 21:55:45 +0800 Subject: [PATCH] Fix gguf loading via Transformers (#2596) * hf support load gguf file * code review * code review * code clean up * note about use_fast compat with gguf --------- Co-authored-by: Qubitium-ModelCloud --- lm_eval/models/huggingface.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/lm_eval/models/huggingface.py b/lm_eval/models/huggingface.py index 40e65f5d55..819a486991 100644 --- a/lm_eval/models/huggingface.py +++ b/lm_eval/models/huggingface.py @@ -90,6 +90,7 @@ def __init__( delta: Optional[str] = None, autogptq: Optional[Union[bool, str]] = False, gptqmodel: Optional[bool] = False, + gguf_file: Optional[str] = None, **kwargs, ) -> None: super().__init__() @@ -164,6 +165,7 @@ def __init__( pretrained, revision=revision, trust_remote_code=trust_remote_code, + gguf_file=gguf_file, ) # determine which of 'causal' and 'seq2seq' backends to use for HF models @@ -178,6 +180,7 @@ def __init__( revision=revision, trust_remote_code=trust_remote_code, use_fast_tokenizer=use_fast_tokenizer, + gguf_file=gguf_file, ) # if we passed `pretrained` as a string, initialize our model now @@ -196,6 +199,7 @@ def __init__( delta=delta, autogptq=autogptq, gptqmodel=gptqmodel, + gguf_file=gguf_file, **kwargs, ) @@ -508,12 +512,14 @@ def _get_config( pretrained: str, revision: str = "main", trust_remote_code: bool = False, + gguf_file: Optional[str] = None, ) -> None: """Return the model config for HuggingFace models""" self._config = transformers.AutoConfig.from_pretrained( pretrained, revision=revision, trust_remote_code=trust_remote_code, + gguf_file=gguf_file, ) def _create_model( @@ -535,6 +541,7 @@ def _create_model( delta: Optional[str] = None, autogptq: Optional[Union[bool, str]] = False, gptqmodel: Optional[bool] = False, + gguf_file: Optional[str] = None, **kwargs, ) -> None: """ @@ -579,6 +586,7 @@ def _create_model( revision=revision, torch_dtype=get_dtype(dtype), trust_remote_code=trust_remote_code, + gguf_file=gguf_file, **model_kwargs, ) else: @@ -676,6 +684,7 @@ def _create_tokenizer( revision: Optional[str] = "main", trust_remote_code: Optional[bool] = False, use_fast_tokenizer: Optional[bool] = True, + gguf_file: Optional[str] = None, ) -> None: """ Helper method during initialization. @@ -683,14 +692,21 @@ def _create_tokenizer( Create a tokenizer object corresponding to the correct tokenizer for value of `pretrained`, or use the pre-initialized tokenizer passed. """ + kwargs = { + "revision": revision, + "trust_remote_code": trust_remote_code, + } + + # gguf format embeds tokenizer and is not compatible with hf tokenizer `use_fast` param + if gguf_file is not None: + kwargs["gguf_file"] = gguf_file + else: + kwargs["use_fast"] = use_fast_tokenizer if tokenizer: if isinstance(tokenizer, str): self.tokenizer = transformers.AutoTokenizer.from_pretrained( - tokenizer, - revision=revision, - trust_remote_code=trust_remote_code, - use_fast=use_fast_tokenizer, + tokenizer, **kwargs ) else: assert isinstance( @@ -705,10 +721,7 @@ def _create_tokenizer( # get the HF hub name via accessor on model model_name = self.model.name_or_path self.tokenizer = transformers.AutoTokenizer.from_pretrained( - model_name, - revision=revision, - trust_remote_code=trust_remote_code, - use_fast=use_fast_tokenizer, + model_name, **kwargs ) return None