diff --git a/Dockerfile b/Dockerfile index d40e24c..b96291e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,9 +14,9 @@ RUN mkdir -p /packages && \ cd /packages && \ git clone https://github.com/truefoundry/axolotl && \ cd axolotl/ && \ - git checkout e16f637d079ef5d56321a240ef0547a50c37b708 && \ + git checkout 285193c1933dac665ae08b9eef95a355117bf8a2 && \ cd /packages/axolotl/ && \ - MAX_JOBS=1 NVCC_APPEND_FLAGS="--threads 1" pip install -U --no-build-isolation --no-cache-dir -e .[flash-attn,mamba-ssm,fused-dense-lib,optimizers,lion-pytorch,galore] && \ + MAX_JOBS=1 NVCC_APPEND_FLAGS="--threads 1" pip install -U --no-build-isolation --no-cache-dir -e .[flash-attn,mamba-ssm,optimizers,lion-pytorch,galore] && \ rm -rf /root/.cache/pip # Install axolotl_truefoundry plugin with our requirements overrides over axolotl diff --git a/Dockerfile-notebook b/Dockerfile-notebook index d43fc46..0f1b065 100644 --- a/Dockerfile-notebook +++ b/Dockerfile-notebook @@ -29,9 +29,9 @@ USER jovyan RUN cd /packages && \ git clone https://github.com/truefoundry/axolotl && \ cd axolotl/ && \ - git checkout e16f637d079ef5d56321a240ef0547a50c37b708 && \ + git checkout 285193c1933dac665ae08b9eef95a355117bf8a2 && \ cd /packages/axolotl/ && \ - MAX_JOBS=1 NVCC_APPEND_FLAGS="--threads 1" pip install -U --no-build-isolation --no-cache-dir -e .[flash-attn,mamba-ssm,fused-dense-lib,optimizers,lion-pytorch,galore] + MAX_JOBS=1 NVCC_APPEND_FLAGS="--threads 1" pip install -U --no-build-isolation --no-cache-dir -e .[flash-attn,mamba-ssm,optimizers,lion-pytorch,galore] # Install axolotl_truefoundry plugin with our requirements overrides over axolotl COPY --chown=jovyan:users plugins/axolotl_truefoundry /packages/axolotl_truefoundry diff --git a/config-base.yaml b/config-base.yaml index fd00ff8..bd5f013 100644 --- a/config-base.yaml +++ b/config-base.yaml @@ -86,8 +86,7 @@ optimizer: adamw_torch_fused output_dir: ./outputs plugins: - axolotl_truefoundry.TrueFoundryMLPlugin - # Liger is disabled till it is updated with Gradient Accumulation Loss fixes - # - axolotl.integrations.liger.LigerPlugin + - axolotl.integrations.liger.LigerPlugin pad_to_sequence_len: True remove_unused_columns: True report_to: tensorboard @@ -127,6 +126,8 @@ truefoundry_ml_repo: null val_data_uri: null ## Liger -liger_rms_norm: True -liger_swiglu: True -liger_fused_linear_cross_entropy: True +liger_rope: true +liger_rms_norm: true +liger_glu_activation: true +liger_layer_norm: true +liger_fused_linear_cross_entropy: true diff --git a/requirements.txt b/requirements.txt index 1aea925..e8856b1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/cu121 cloud-files==4.26.0 -deepspeed==0.15.3 +deepspeed==0.15.4 transformers==4.46.2 pyarrow==15.0.0 rich>=13.0.0,<14 diff --git a/sample_run.sh b/sample_run.sh index 20496a9..774d57a 100755 --- a/sample_run.sh +++ b/sample_run.sh @@ -31,16 +31,16 @@ accelerate launch \ train.py \ config-base.yaml \ --deepspeed ./deepspeed_configs/3_ds_z2_config.json \ ---base_model HuggingFaceTB/SmolLM2-135M-Instruct \ +--base_model Qwen/Qwen2.5-7B-Instruct \ --train_data_uri ./sample_data/chatalpaca-openai-1k.jsonl \ --val_data_uri None \ --val_set_size 0.2 \ --dataset_type chat \ ---sequence_len 2048 \ +--sequence_len 4096 \ --max_steps 0 \ ---micro_batch_size 2 \ ---eval_batch_size 2 \ ---num_epochs 3 \ +--micro_batch_size 1 \ +--eval_batch_size 1 \ +--num_epochs 5 \ --gradient_accumulation_steps 4 \ --gradient_checkpointing unsloth \ --learning_rate 0.00001 \