Skip to content

Commit

Permalink
Jamba (#1451)
Browse files Browse the repository at this point in the history
* fixes for larger models

* add qlora example for deepspeed

* add readme for jamba
  • Loading branch information
winglian authored Mar 29, 2024
1 parent c63c913 commit 64cbc5d
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 1 deletion.
5 changes: 5 additions & 0 deletions examples/jamba/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Jamba

qlora w/ deepspeed needs at least 2x GPUs and 35GiB VRAM per GPU

qlora single-gpu - training will start, but loss is off by an order of magnitude
62 changes: 62 additions & 0 deletions examples/jamba/qlora_deepspeed.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
base_model: ai21labs/Jamba-v0.1
trust_remote_code: true

load_in_8bit: false
load_in_4bit: true
strict: false

datasets:
- path: mhenrichsen/alpaca_2k_test
type: alpaca
dataset_prepared_path:
val_set_size: 0.0
output_dir: ./out

sequence_len: 4096
sample_packing: false
pad_to_sequence_len: false
eval_sample_packing: false

wandb_project:
wandb_entity:
wandb_watch:
wandb_name:
wandb_log_model:

adapter: qlora
lora_r: 8
lora_alpha: 16
lora_dropout: 0.05
lora_target_linear: true

low_cpu_mem_usage: true
gradient_accumulation_steps: 4
micro_batch_size: 1
num_epochs: 2
optimizer: paged_adamw_8bit
lr_scheduler: cosine
learning_rate: 0.00001

train_on_inputs: false
group_by_length: false
bf16: auto
fp16:
tf32: false

gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
early_stopping_patience:
resume_from_checkpoint:
local_rank:
logging_steps: 1
xformers_attention:
flash_attention: true

warmup_steps: 10
evals_per_epoch:
saves_per_epoch: 1
debug:
deepspeed: deepspeed_configs/zero2.json
weight_decay: 0.0
special_tokens:
1 change: 1 addition & 0 deletions src/axolotl/utils/config/models/input/v0_4_1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,7 @@ class Config:
Dict[Union[int, Literal["cpu", "disk"]], Union[int, str]]
] = None
gpu_memory_limit: Optional[Union[int, str]] = None
low_cpu_mem_usage: Optional[bool] = None

chat_template: Optional[ChatTemplate] = None
default_system_message: Optional[str] = None
Expand Down
7 changes: 6 additions & 1 deletion src/axolotl/utils/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,9 @@ def load_model(
from accelerate import infer_auto_device_map

with init_empty_weights():
model_canvas = AutoModelForCausalLM.from_config(model_config)
model_canvas = AutoModelForCausalLM.from_config(
model_config, trust_remote_code=cfg.trust_remote_code or False
)
model_canvas.tie_weights()
device_map = infer_auto_device_map(
model_canvas,
Expand Down Expand Up @@ -502,6 +504,9 @@ def load_model(
model_kwargs["attn_implementation"] = "eager"
model_config._attn_implementation = "eager" # pylint: disable=protected-access

if cfg.low_cpu_mem_usage:
model_kwargs["low_cpu_mem_usage"] = True

qlora_fsdp = cfg.fsdp and cfg.adapter == "qlora"

try:
Expand Down
2 changes: 2 additions & 0 deletions src/axolotl/utils/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,8 @@ def setup_fsdp_envs(cfg):
os.environ["FSDP_USE_ORIG_PARAMS"] = "true"
if cfg.fsdp_config.fsdp_state_dict_type:
os.environ["FSDP_STATE_DICT_TYPE"] = cfg.fsdp_config.fsdp_state_dict_type
if cfg.fsdp_config.fsdp_auto_wrap_policy:
os.environ["FSDP_AUTO_WRAP_POLICY"] = cfg.fsdp_config.fsdp_auto_wrap_policy
if cfg.fsdp_config.fsdp_transformer_layer_cls_to_wrap:
os.environ[
"FSDP_TRANSFORMER_CLS_TO_WRAP"
Expand Down

0 comments on commit 64cbc5d

Please sign in to comment.