From 59dbf6afa42e26ef726fa07a7deefd09500efd69 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Thu, 28 Mar 2024 18:41:03 -0400 Subject: [PATCH] fixes for larger models --- src/axolotl/utils/config/models/input/v0_4_1/__init__.py | 1 + src/axolotl/utils/models.py | 7 ++++++- src/axolotl/utils/trainer.py | 2 ++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py index cce0cbc76a..c07c0ff75a 100644 --- a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py +++ b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py @@ -533,6 +533,7 @@ class Config: Dict[Union[int, Literal["cpu", "disk"]], Union[int, str]] ] = None gpu_memory_limit: Optional[Union[int, str]] = None + low_cpu_mem_usage: Optional[bool] = None chat_template: Optional[ChatTemplate] = None default_system_message: Optional[str] = None diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index e2d33036b0..3cbbad6f5f 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -402,7 +402,9 @@ def load_model( from accelerate import infer_auto_device_map with init_empty_weights(): - model_canvas = AutoModelForCausalLM.from_config(model_config) + model_canvas = AutoModelForCausalLM.from_config( + model_config, trust_remote_code=cfg.trust_remote_code or False + ) model_canvas.tie_weights() device_map = infer_auto_device_map( model_canvas, @@ -502,6 +504,9 @@ def load_model( model_kwargs["attn_implementation"] = "eager" model_config._attn_implementation = "eager" # pylint: disable=protected-access + if cfg.low_cpu_mem_usage: + model_kwargs["low_cpu_mem_usage"] = True + qlora_fsdp = cfg.fsdp and cfg.adapter == "qlora" try: diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index da9f071c08..dc995fda8e 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -312,6 +312,8 @@ def setup_fsdp_envs(cfg): os.environ["FSDP_USE_ORIG_PARAMS"] = "true" if cfg.fsdp_config.fsdp_state_dict_type: os.environ["FSDP_STATE_DICT_TYPE"] = cfg.fsdp_config.fsdp_state_dict_type + if cfg.fsdp_config.fsdp_auto_wrap_policy: + os.environ["FSDP_AUTO_WRAP_POLICY"] = cfg.fsdp_config.fsdp_auto_wrap_policy if cfg.fsdp_config.fsdp_transformer_layer_cls_to_wrap: os.environ[ "FSDP_TRANSFORMER_CLS_TO_WRAP"