Upgrade to torch 2.5.0 (#20)

* Upgrade to torch 2.5.0 * Drop `mamba-ssm` for now * Fix transformers broadcast issue
truefoundry · Dec 23, 2024 · 9675011 · 9675011
1 parent 46298e8
commit 9675011
Show file tree

Hide file tree

Showing 9 changed files with 37 additions and 21 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,13 +1,15 @@
-# https://hub.docker.com/layers/winglian/axolotl/main-20241111-py3.11-cu121-2.3.1/images/sha256-67c35533cf8e7a399de19cdaf3852be093b9e184b9554ea38801a482da5d7231?context=explore
-FROM winglian/axolotl@sha256:1f892444717a6781ad0e6e02b3548cd76be14d65a7162f2d82eab5c809936bc5
+# https://hub.docker.com/layers/winglian/axolotl/main-20241217/images/sha256-5ed6e068d193ac35d092f8d6ccb56b1750779415cd07047edbbfb8d4edd87ae2
+FROM winglian/axolotl@sha256:0966ba0bdfda0a317016614a6eb9f599325d0e42109544f95f5540d144ddeebd
 SHELL ["/bin/bash", "-c"]
 USER root
+RUN [ "$(/usr/local/cuda/bin/nvcc --version | egrep -o "V[0-9]+\.[0-9]+" | cut -c2-)" = "12.1" ] || (echo "Error: CUDA version is not 12.1" && exit 1)
 
 # Install torch and axolotl requirements
-COPY torch-requirements.txt base-requirements.txt requirements.txt /tmp/llm-finetune/
+COPY base-requirements.txt requirements.txt /tmp/llm-finetune/
 RUN pip install -U pip wheel setuptools && \
-    pip uninstall -y axolotl && \
-    MAX_JOBS=1 NVCC_APPEND_FLAGS="--threads 1" pip install -U --no-cache-dir --no-build-isolation --use-pep517 -r /tmp/llm-finetune/requirements.txt && \
+    pip uninstall -y axolotl torch && \
+    pip install -U --no-cache-dir --use-pep517 -r /tmp/llm-finetune/base-requirements.txt && \
+    MAX_JOBS=1 NVCC_APPEND_FLAGS="--threads 1" pip install --no-cache-dir --no-build-isolation --use-pep517 -r /tmp/llm-finetune/requirements.txt && \
     rm -rf /root/.cache/pip
 
 # Install axolotl_truefoundry plugin

diff --git a/Dockerfile-notebook b/Dockerfile-notebook
@@ -1,10 +1,10 @@
 FROM tfy.jfrog.io/tfy-images/jupyter:0.3.8-cu121-py3.11.10-sudo
 SHELL ["/bin/bash", "-c"]
+USER root
+RUN [ "$(/usr/local/cuda/bin/nvcc --version | egrep -o "V[0-9]+\.[0-9]+" | cut -c2-)" = "12.1" ] || (echo "Error: CUDA version is not 12.1" && exit 1)
 ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
 ENV DEBIAN_FRONTEND=noninteractive
-
 # upgrade libnccl
-USER root
 RUN apt update && \
     apt install -y --no-install-recommends git curl wget && \
     wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb -O /tmp/cuda-keyring_1.1-1_all.deb && \
@@ -14,11 +14,12 @@ RUN apt update && \
 
 # Install torch and axolotl
 USER jovyan
-COPY torch-requirements.txt base-requirements.txt requirements.txt notebook-requirements.txt /tmp/llm-finetune/
+COPY base-requirements.txt requirements.txt notebook-requirements.txt /tmp/llm-finetune/
 RUN pip install -U pip wheel setuptools && \
-    pip uninstall -y axolotl && \
-    pip install --no-cache-dir -U -r /tmp/llm-finetune/torch-requirements.txt && \
-    MAX_JOBS=1 NVCC_APPEND_FLAGS="--threads 1" pip install -U --no-cache-dir --no-build-isolation --use-pep517 -r /tmp/llm-finetune/notebook-requirements.txt
+    pip uninstall -y axolotl torch && \
+    pip install -U --no-cache-dir --use-pep517 -r /tmp/llm-finetune/base-requirements.txt && \
+    MAX_JOBS=1 NVCC_APPEND_FLAGS="--threads 1" pip install --no-cache-dir --no-build-isolation --use-pep517 -r /tmp/llm-finetune/requirements.txt && \
+    pip install --no-cache-dir --use-pep517 -r /tmp/llm-finetune/notebook-requirements.txt
 
 # Setup editable packages
 USER root

diff --git a/base-requirements.txt b/base-requirements.txt
@@ -1,12 +1,13 @@
 --extra-index-url https://download.pytorch.org/whl/cu121
 cloud-files==4.29.0
+cut-cross-entropy[transformers] @ git+https://github.com/apple/ml-cross-entropy.git@1f3ebdb20653a26598a2722acd21e9b1528608c3
 fsspec==2024.9.0
 hf-transfer<0.2.0
 pyarrow>=15.0.0,<19.0.0
 rich>=13.0.0,<14
 s3fs==2024.9.0
 snowflake-connector-python[pandas]==3.12.3
-torch==2.3.1+cu121
-torchao==0.6.1+cu121
-truefoundry==0.5.2rc1
-unsloth[cu121-ampere-torch230] @ git+https://github.com/unslothai/unsloth.git@9dc399a6b6625ee40835c5eab361426d3c5d4abb
+torch==2.5.0+cu121
+torchao==0.7.0+cu121
+transformers @ git+https://github.com/truefoundry/transformers.git@09a21295f3a13bf81b4fc22057bb2fc9ae063891
+truefoundry==0.5.3rc2
diff --git a/config-base.yaml b/config-base.yaml
@@ -13,6 +13,7 @@ val_set_size: 0.1
 data_dir: auto # type: string
 datasets: auto # type: list
 test_datasets: auto # type: list
+batch_flattening: auto # type: bool
 bf16: auto # type: bool
 bfloat16: auto # type: bool
 flash_attention: auto # type: bool
@@ -90,6 +91,7 @@ peft_use_rslora: True
 plugins:
   - axolotl_truefoundry.TrueFoundryMLPlugin
   - axolotl.integrations.liger.LigerPlugin
+  - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
 pad_to_sequence_len: True
 remove_unused_columns: True
 report_to: tensorboard
@@ -140,3 +142,6 @@ liger_rms_norm: True
 liger_glu_activation: True
 liger_layer_norm: True
 liger_fused_linear_cross_entropy: True
+
+## CutCrossEntropy
+cut_cross_entropy: False
diff --git a/notebook-requirements.txt b/notebook-requirements.txt
@@ -1,2 +1 @@
--r requirements.txt
 jupyter-app-launcher==0.3.1
diff --git a/plugins/axolotl_truefoundry/pyproject.toml b/plugins/axolotl_truefoundry/pyproject.toml
@@ -11,7 +11,7 @@ dependencies = [
     "transformers>=4.0.0,<5",
     "truefoundry>=0.5.1,<0.6.0",
     "pynvml>=11.0.0,<12",
-    "torch>=2.3.0,<2.4.0",
+    "torch>=2.0.0,<3.0.0",
     "pydantic>=2.0.0,<3",
     "orjson",
 ]

diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu121
 -r base-requirements.txt
-axolotl[deepspeed,flash-attn,mamba-ssm,optimizers,lion-pytorch,galore] @ git+https://github.com/truefoundry/axolotl@c7fc338e67c4313ec82fcca304733c9ececae5c0
+unsloth[cu121-ampere-torch250] @ git+https://github.com/unslothai/unsloth.git@9dc399a6b6625ee40835c5eab361426d3c5d4abb
+axolotl[deepspeed,flash-attn,optimizers,lion-pytorch,galore] @ git+https://github.com/truefoundry/axolotl@5878daa3beec58bf4f4d21a6abd6dba3c40e74f4
diff --git a/torch-requirements.txt b/torch-requirements.txt
diff --git a/train.py b/train.py
@@ -81,7 +81,13 @@ def make_axolotl_config(config_base, kwargs, timestamp=None):
             else:
                 cfg[k] = kwargs[k]
     if not cfg.output_dir:
-        raise ValueError("`output_dir` must be set in config base")
+        raise ValueError("`output_dir` must be set")
+
+    if cfg.dataset_type == "chat" and cfg.long_sequences_strategy == "truncate":
+        raise ValueError(
+            "Chat datasets cannot be truncated. Please set `long_sequences_strategy` either to "
+            "`drop` to drop sequences longer than `sequence_len` or `error` to raise an error."
+        )
 
     if is_main_process():
         if cfg.cleanup_output_dir_on_start is True:
@@ -181,6 +187,9 @@ def make_axolotl_config(config_base, kwargs, timestamp=None):
         set_cfg_option_if_auto(cfg, "flash_attn_fuse_mlp", cfg.adapter not in {"qlora", "lora"})
         set_cfg_option_if_auto(cfg, "flash_attn_fuse_qkv", cfg.adapter not in {"qlora", "lora"})
 
+        set_cfg_option_if_auto(
+            cfg, "batch_flattening", not cfg.sample_packing and cfg.flash_attention and cfg.micro_batch_size > 1
+        )
         set_cfg_option_if_auto(cfg, "optimizer", "adamw_torch_fused" if cfg.adapter == "qlora" else "adamw_torch")
 
         if cfg.datasets == "auto":