-
Notifications
You must be signed in to change notification settings - Fork 74
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Upgrading TRTLLM to v13 (#320)
Signed-off-by: Terry Kong <[email protected]> Signed-off-by: NeMo-Aligner CI <[email protected]> Co-authored-by: oliver könig <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
- Loading branch information
1 parent
77af2a8
commit c86c63c
Showing
39 changed files
with
1,337 additions
and
346 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ | |
# | ||
# To update NeMo-Aligner from a pre-built NeMo-Framework container: | ||
# | ||
# docker buildx build --target=aligner-bump --build-arg=BASE_IMAGE=nvcr.io/nvidia/nemo:24.07 -t aligner:latest . | ||
# docker buildx build --target=aligner-bump -t aligner:latest . | ||
# | ||
|
||
# Number of parallel threads for compute heavy build jobs | ||
|
@@ -13,13 +13,12 @@ ARG MAX_JOBS=8 | |
# Git refs for dependencies | ||
ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea | ||
ARG PYTRITON_VERSION=0.5.10 | ||
ARG NEMO_TAG=e033481e26e6ae32764d3e2b3f16afed00dc7218 # On: r2.0.0rc1 | ||
ARG MLM_TAG=a3fe0c75df82218901fa2c3a7c9e389aa5f53182 # On: core_r0.8.0 | ||
ARG NEMO_TAG=19668e5320a2e2af0199b6d5e0b841993be3a634 # On: main | ||
ARG MLM_TAG=25059d3bbf68be0751800f3644731df12a88f3f3 # On: main | ||
ARG ALIGNER_COMMIT=main | ||
ARG TRTLLM_VERSION=v0.10.0 | ||
ARG TRTLLM_VERSION=v0.13.0 | ||
ARG PROTOBUF_VERSION=4.24.4 | ||
|
||
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.03-py3 | ||
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.07-py3 | ||
|
||
FROM ${BASE_IMAGE} AS aligner-bump | ||
ARG ALIGNER_COMMIT | ||
|
@@ -36,13 +35,40 @@ git checkout -f $ALIGNER_COMMIT | |
# case 2: ALIGNER_COMMIT is a commit, so git-pull is expected to fail | ||
git pull --rebase || true | ||
|
||
pip install --no-deps -e . | ||
pip install --no-cache-dir --no-deps -e . | ||
EOF | ||
|
||
FROM ${BASE_IMAGE} as final | ||
WORKDIR /opt | ||
# needed in case git complains that it can't detect a valid email, this email is fake but works | ||
RUN git config --global user.email "[email protected]" | ||
# install latest apex | ||
ARG APEX_TAG | ||
RUN pip uninstall -y apex && \ | ||
git clone https://github.com/NVIDIA/apex && \ | ||
cd apex && \ | ||
if [ ! -z $APEX_TAG ]; then \ | ||
git fetch origin $APEX_TAG && \ | ||
git checkout FETCH_HEAD; \ | ||
fi && \ | ||
pip install -v --no-build-isolation --disable-pip-version-check --no-cache-dir --config-settings "--build-option=--cpp_ext --cuda_ext --fast_layer_norm --distributed_adam --deprecated_fused_adam" ./ | ||
|
||
# Git LFS | ||
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash && \ | ||
apt-get install git-lfs && \ | ||
git lfs install && \ | ||
apt-get clean | ||
|
||
# TRTLLM | ||
ARG TRTLLM_VERSION | ||
RUN git clone https://github.com/NVIDIA/TensorRT-LLM.git && \ | ||
cd TensorRT-LLM && \ | ||
git checkout ${TRTLLM_VERSION} && \ | ||
. docker/common/install_tensorrt.sh && \ | ||
python3 ./scripts/build_wheel.py --job_count $(nproc) --trt_root /usr/local/tensorrt --python_bindings --benchmarks && \ | ||
pip install -e . | ||
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12/compat/lib.real/ | ||
|
||
# install TransformerEngine | ||
ARG MAX_JOBS | ||
ARG TE_TAG | ||
|
@@ -56,17 +82,6 @@ RUN pip uninstall -y transformer-engine && \ | |
git submodule init && git submodule update && \ | ||
NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi pip install . | ||
|
||
# install latest apex | ||
ARG APEX_TAG | ||
RUN pip uninstall -y apex && \ | ||
git clone https://github.com/NVIDIA/apex && \ | ||
cd apex && \ | ||
if [ ! -z $APEX_TAG ]; then \ | ||
git fetch origin $APEX_TAG && \ | ||
git checkout FETCH_HEAD; \ | ||
fi && \ | ||
pip install -v --no-build-isolation --disable-pip-version-check --no-cache-dir --config-settings "--build-option=--cpp_ext --cuda_ext --fast_layer_norm --distributed_adam --deprecated_fused_adam" ./ | ||
|
||
# place any util pkgs here | ||
ARG PYTRITON_VERSION | ||
RUN pip install --upgrade-strategy only-if-needed nvidia-pytriton==$PYTRITON_VERSION | ||
|
@@ -99,29 +114,32 @@ RUN pip uninstall -y megatron-core && \ | |
fi && \ | ||
pip install -e . | ||
|
||
# Git LFS | ||
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash && \ | ||
apt-get install git-lfs && \ | ||
git lfs install | ||
|
||
COPY --from=aligner-bump /opt/NeMo-Aligner /opt/NeMo-Aligner | ||
RUN cd /opt/NeMo-Aligner && \ | ||
pip install --no-deps -e . | ||
|
||
# TRTLLM | ||
ARG TRTLLM_VERSION | ||
RUN git clone https://github.com/NVIDIA/TensorRT-LLM.git && \ | ||
cd TensorRT-LLM && \ | ||
git checkout ${TRTLLM_VERSION} && \ | ||
patch -p1 < ../NeMo-Aligner/setup/trtllm.patch && \ | ||
. docker/common/install_tensorrt.sh && \ | ||
python3 ./scripts/build_wheel.py --trt_root /usr/local/tensorrt | ||
|
||
RUN cd TensorRT-LLM && \ | ||
pip install ./build/tensorrt_llm*.whl | ||
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12/compat/lib.real/ | ||
RUN cd TensorRT-LLM && patch -p1 < ../NeMo-Aligner/setup/trtllm.patch | ||
|
||
# WAR(0.4.0): The pin of NeMo requires a higher nvidia-modelopt version than | ||
# TRT-LLM allows. This installation must follow TRT-LLM and is | ||
# only necessary when NeMo 2.0.0rc1 is installed with TRT-LLM v10. | ||
RUN pip install --upgrade-strategy only-if-needed nvidia-modelopt==0.13.0 | ||
# TODO(terryk): This layer should be deleted ASAP after NeMo is bumped to include all of these PRs | ||
RUN <<"EOF" bash -exu | ||
cd NeMo | ||
# Ensures we don't cherry-pick "future" origin/main commits | ||
git fetch -a | ||
# 0c92fe17df4642ffc33d5d8c0c83fda729e3910c: [fix] Ensures disabling exp_manager with exp_manager=null does not error NeMo#10651 | ||
# 60e677423667c029dd05875da72bf0719774f844: [feat] Update get_model_parallel_src_rank to support tp-pp-dp ordering NeMo#10652 | ||
# 0deaf6716cb4f20766c995ce25d129795f1ae200: fix[export]: update API for disabling device reassignment in TRTLLM for Aligner NeMo#10863 | ||
# (superceded by 10863) 148543d6e9c66ff1f8562e84484448202249811d: feat: Migrate GPTSession refit path in Nemo export to ModelRunner for Aligner NeMo#10654 | ||
for pr_and_commit in \ | ||
"10651 0c92fe17df4642ffc33d5d8c0c83fda729e3910c" \ | ||
"10652 60e677423667c029dd05875da72bf0719774f844" \ | ||
"10863 0deaf6716cb4f20766c995ce25d129795f1ae200" \ | ||
; do | ||
pr=$(cut -f1 -d' ' <<<"$pr_and_commit") | ||
head_pr_commit=$(cut -f2 -d' ' <<<"$pr_and_commit") | ||
git fetch origin $head_pr_commit:PR-${pr} | ||
# cherry-picks all commits between main and the top of the PR | ||
git cherry-pick --allow-empty $(git merge-base origin/main PR-${pr})..PR-${pr} | ||
# Tag cherry-picks to help | ||
git tag cherry-pick-PR-${pr} | ||
done | ||
EOF |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.