Adding New Tokens, then Saving & Re-loading Model Adapter #1343

laura-burdick-sil · 2024-11-26T19:49:29Z

Hello, I am trying to add new tokens to the tokenizer, and then save the model adapter and re-load it later. Here is my code:

import torch
import json
from datasets import Dataset, DatasetDict
import os
from clearml import Task
from trl import SFTTrainer
from transformers import TrainingArguments
import boto3
from botocore.exceptions import ClientError
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftConfig, PeftModel
import time

from unsloth import FastLanguageModel
from unsloth import add_new_tokens

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    max_seq_length = 1024,
    dtype = None,
    load_in_4bit = True,
    device_map={"":0}
)

add_new_tokens(model, tokenizer, ["eng_", "Latn", "rro_", "mek_"])

model = FastLanguageModel.get_peft_model(
    model,
    r = 128,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

path = "/root/test3"
model.save_pretrained(path, save_adapter=True)
tokenizer.save_pretrained(path)

model, tokenizer = FastLanguageModel.from_pretrained(path)

When I load the model adapter, I get the following error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[5], line 1
----> 1 model, tokenizer = FastLanguageModel.from_pretrained(path)

File [~/.clearml/venvs-builds/3.10/lib/python3.10/site-packages/unsloth/models/loader.py:401](https://vscode-remote+localhost-003a8898.vscode-resource.vscode-cdn.net/root/~/.clearml/venvs-builds/3.10/lib/python3.10/site-packages/unsloth/models/loader.py:401), in FastLanguageModel.from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, token, device_map, rope_scaling, fix_tokenizer, trust_remote_code, use_gradient_checkpointing, resize_model_vocab, revision, *args, **kwargs)
    397 if is_peft:
    398     # From https://github.com/huggingface/peft/issues/184
    399     # Now add PEFT adapters
    400     model.enable_input_require_grads()
--> 401     model = PeftModel.from_pretrained(
    402         model,
    403         old_model_name,
    404         token = token,
    405         revision = revision,
    406         is_trainable = True,
    407         trust_remote_code = trust_remote_code,
    408     )
    409     # Patch it as well!
    410     model = dispatch_model.patch_peft_model(model, use_gradient_checkpointing)

File [~/.clearml/venvs-builds/3.10/lib/python3.10/site-packages/peft/peft_model.py:586](https://vscode-remote+localhost-003a8898.vscode-resource.vscode-cdn.net/root/~/.clearml/venvs-builds/3.10/lib/python3.10/site-packages/peft/peft_model.py:586), in PeftModel.from_pretrained(cls, model, model_id, adapter_name, is_trainable, config, autocast_adapter_dtype, ephemeral_gpu_offload, low_cpu_mem_usage, **kwargs)
    577 else:
    578     model = MODEL_TYPE_TO_PEFT_MODEL_MAPPING[config.task_type](
    579         model,
    580         config,
   (...)
    583         low_cpu_mem_usage=low_cpu_mem_usage,
    584     )
--> 586 model.load_adapter(
    587     model_id,
    588     adapter_name,
    589     is_trainable=is_trainable,
    590     autocast_adapter_dtype=autocast_adapter_dtype,
    591     low_cpu_mem_usage=low_cpu_mem_usage,
    592     **kwargs,
    593 )
    595 return model

File [~/.clearml/venvs-builds/3.10/lib/python3.10/site-packages/peft/peft_model.py:1181](https://vscode-remote+localhost-003a8898.vscode-resource.vscode-cdn.net/root/~/.clearml/venvs-builds/3.10/lib/python3.10/site-packages/peft/peft_model.py:1181), in PeftModel.load_adapter(self, model_id, adapter_name, is_trainable, torch_device, autocast_adapter_dtype, ephemeral_gpu_offload, low_cpu_mem_usage, **kwargs)
   1179 # load the weights into the model
   1180 ignore_mismatched_sizes = kwargs.get("ignore_mismatched_sizes", False)
-> 1181 load_result = set_peft_model_state_dict(
   1182     self,
   1183     adapters_weights,
   1184     adapter_name=adapter_name,
   1185     ignore_mismatched_sizes=ignore_mismatched_sizes,
   1186     low_cpu_mem_usage=low_cpu_mem_usage,
   1187 )
   1188 if (
   1189     (getattr(self, "hf_device_map", None) is not None)
   1190     and (len(set(self.hf_device_map.values()).intersection({"cpu", "disk"})) > 0)
   1191     and len(self.peft_config) == 1
   1192 ):
   1193     device_map = kwargs.get("device_map", "auto")

File [~/.clearml/venvs-builds/3.10/lib/python3.10/site-packages/peft/utils/save_and_load.py:464](https://vscode-remote+localhost-003a8898.vscode-resource.vscode-cdn.net/root/~/.clearml/venvs-builds/3.10/lib/python3.10/site-packages/peft/utils/save_and_load.py:464), in set_peft_model_state_dict(model, peft_model_state_dict, adapter_name, ignore_mismatched_sizes, low_cpu_mem_usage)
    462             module._move_adapter_to_device_of_base_layer(adapter_name)
    463 else:
--> 464     load_result = model.load_state_dict(peft_model_state_dict, strict=False)
    466 if config.is_prompt_learning:
    467     model.prompt_encoder[adapter_name].embedding.load_state_dict(
    468         {"weight": peft_model_state_dict["prompt_embeddings"]}, strict=True
    469     )

File [~/.clearml/venvs-builds/3.10/lib/python3.10/site-packages/torch/nn/modules/module.py:2584](https://vscode-remote+localhost-003a8898.vscode-resource.vscode-cdn.net/root/~/.clearml/venvs-builds/3.10/lib/python3.10/site-packages/torch/nn/modules/module.py:2584), in Module.load_state_dict(self, state_dict, strict, assign)
   2576         error_msgs.insert(
   2577             0,
   2578             "Missing key(s) in state_dict: {}. ".format(
   2579                 ", ".join(f'"{k}"' for k in missing_keys)
   2580             ),
   2581         )
   2583 if len(error_msgs) > 0:
-> 2584     raise RuntimeError(
   2585         "Error(s) in loading state_dict for {}:\n\t{}".format(
   2586             self.__class__.__name__, "\n\t".join(error_msgs)
   2587         )
   2588     )
   2589 return _IncompatibleKeys(missing_keys, unexpected_keys)

RuntimeError: Error(s) in loading state_dict for PeftModelForCausalLM:
	size mismatch for base_model.model.lm_head.modules_to_save.default.weight: copying a param with shape torch.Size([128260, 4096]) from checkpoint, the shape in current model is torch.Size([128256, 4096]).

How do I handle saving & re-loading an adapter when I have added new tokens to the tokenizer? Thanks for your help.

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Adding New Tokens, then Saving & Re-loading Model Adapter #1343

Adding New Tokens, then Saving & Re-loading Model Adapter #1343

laura-burdick-sil commented Nov 26, 2024

Adding New Tokens, then Saving & Re-loading Model Adapter #1343

Adding New Tokens, then Saving & Re-loading Model Adapter #1343

Comments

laura-burdick-sil commented Nov 26, 2024