diff --git a/Dockerfile.base b/Dockerfile.base new file mode 100644 index 0000000000000..6d735af43a332 --- /dev/null +++ b/Dockerfile.base @@ -0,0 +1,107 @@ +ARG BASE_IMAGE=rocm/dev-ubuntu-22.04:6.2.4-complete +FROM ${BASE_IMAGE} AS base + +ENV PATH=/opt/rocm/llvm/bin:$PATH +ENV ROCM_PATH=/opt/rocm +ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/local/lib: +ARG PYTORCH_ROCM_ARCH=gfx90a;gfx942 +ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} + +ARG PYTHON_VERSION=3.12 + +RUN mkdir -p /app +WORKDIR /app +ENV DEBIAN_FRONTEND=noninteractive + +# Install Python and other dependencies +RUN apt-get update -y \ + && apt-get install -y software-properties-common git curl sudo vim less \ + && add-apt-repository ppa:deadsnakes/ppa \ + && apt-get update -y \ + && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \ + python${PYTHON_VERSION}-lib2to3 python-is-python3 \ + && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ + && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \ + && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \ + && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \ + && python3 --version && python3 -m pip --version + +RUN pip install -U packaging cmake ninja wheel setuptools Cython + +FROM base AS build_hipblaslt +ARG HIPBLASLT_BRANCH="507a649" +# Unset the following on ROCm6.3+ +ARG LEGACY_HIPBLASLT_OPTION="--legacy_hipblas_direct" +RUN git clone https://github.com/ROCm/hipBLASLt +RUN cd hipBLASLt \ + && git checkout ${HIPBLASLT_BRANCH} \ + && ./install.sh -d --architecture ${PYTORCH_ROCM_ARCH} ${LEGACY_HIPBLASLT_OPTION} \ + && cd build/release \ + && make package +RUN mkdir -p /app/install && cp /app/hipBLASLt/build/release/*.deb /app/install + +FROM base AS build_rccl +ARG RCCL_BRANCH="dfe4a3e" +ARG RCCL_REPO="https://github.com/ROCm/rccl" +RUN git clone ${RCCL_REPO} +RUN cd rccl \ + && git checkout ${RCCL_BRANCH} \ + && ./install.sh -p --amdgpu_targets ${PYTORCH_ROCM_ARCH} +RUN mkdir -p /app/install && cp /app/rccl/build/release/*.deb /app/install + +FROM base AS build_triton +ARG TRITON_BRANCH="release/3.1.x" +ARG TRITON_REPO="https://github.com/triton-lang/triton.git" +RUN git clone ${TRITON_REPO} +RUN cd triton \ + && git checkout ${TRITON_BRANCH} \ + && cd python \ + && python3 setup.py bdist_wheel --dist-dir=dist +RUN mkdir -p /app/install && cp /app/triton/python/dist/*.whl /app/install + +FROM base AS build_amdsmi +RUN cd /opt/rocm/share/amd_smi \ + && pip wheel . --wheel-dir=dist +RUN mkdir -p /app/install && cp /opt/rocm/share/amd_smi/dist/*.whl /app/install + +FROM base AS build_pytorch +ARG PYTORCH_BRANCH="8bc4033" +ARG PYTORCH_VISION_BRANCH="v0.19.1" +ARG PYTORCH_REPO="https://github.com/pytorch/pytorch.git" +ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git" +ARG FA_BRANCH="c555642" +ARG FA_REPO="https://github.com/ROCm/flash-attention.git" +RUN git clone ${PYTORCH_REPO} pytorch +RUN cd pytorch && git checkout ${PYTORCH_BRANCH} && \ + pip install -r requirements.txt && git submodule update --init --recursive \ + && python3 tools/amd_build/build_amd.py \ + && CMAKE_PREFIX_PATH=$(python3 -c 'import sys; print(sys.prefix)') python3 setup.py bdist_wheel --dist-dir=dist \ + && pip install dist/*.whl +RUN git clone ${PYTORCH_VISION_REPO} vision +RUN cd vision && git checkout ${PYTORCH_VISION_BRANCH} \ + && python3 setup.py bdist_wheel --dist-dir=dist \ + && pip install dist/*.whl +RUN git clone ${FA_REPO} +RUN cd flash-attention \ + && git checkout ${FA_BRANCH} \ + && git submodule update --init \ + && MAX_JOBS=64 GPU_ARCHS=${PYTORCH_ROCM_ARCH} python3 setup.py bdist_wheel --dist-dir=dist +RUN mkdir -p /app/install && cp /app/pytorch/dist/*.whl /app/install \ + && cp /app/vision/dist/*.whl /app/install \ + && cp /app/flash-attention/dist/*.whl /app/install + +FROM base AS final +RUN --mount=type=bind,from=build_hipblaslt,src=/app/install/,target=/install \ + dpkg -i /install/*deb \ + && sed -i 's/, hipblaslt-dev \(.*\), hipcub-dev/, hipcub-dev/g' /var/lib/dpkg/status \ + && sed -i 's/, hipblaslt \(.*\), hipfft/, hipfft/g' /var/lib/dpkg/status +RUN --mount=type=bind,from=build_rccl,src=/app/install/,target=/install \ + dpkg -i /install/*deb \ + && sed -i 's/, rccl-dev \(.*\), rocalution/, rocalution/g' /var/lib/dpkg/status \ + && sed -i 's/, rccl \(.*\), rocalution/, rocalution/g' /var/lib/dpkg/status +RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \ + pip install /install/*.whl +RUN --mount=type=bind,from=build_amdsmi,src=/app/install/,target=/install \ + pip install /install/*.whl +RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \ + pip install /install/*.whl \ No newline at end of file diff --git a/Dockerfile.rocm b/Dockerfile.rocm index 66cbe61905f84..a9594833c3c99 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -1,151 +1,21 @@ # default base image -ARG BASE_IMAGE="rocm/pytorch:rocm6.2.2_ubuntu20.04_py3.9_pytorch_release_2.1.2" - -ARG COMMON_WORKDIR=/app - -# The following ARGs should be "0" or "1". If "1", the respective component will be built and installed on top of the base image -ARG BUILD_HIPBLASLT="0" -ARG BUILD_RCCL="0" -ARG BUILD_FA="1" -ARG BUILD_TRITON="1" -ARG BUILD_PYTORCH="1" -# This ARG should also be "0" or "1". If "1", the vLLM development directory is obtained via git clone. -# If "0", it is copied in from the local working directory. ARG REMOTE_VLLM="0" ARG USE_CYTHON="0" ARG BUILD_RPD="1" +ARG COMMON_WORKDIR=/app +ARG BASE_IMAGE=rocm/vllm-dev:base_ubuntu22.04_py3.12_ROCm6.2.4_hipblaslt0.11_torch2.6 -# ----------------------- -# vLLM base image -FROM $BASE_IMAGE AS base -USER root - -# Import BASE_IMAGE arg from pre-FROM -ARG BASE_IMAGE -ARG COMMON_WORKDIR -# Used as ARCHes for all components -ARG ARG_PYTORCH_ROCM_ARCH="gfx90a;gfx942" -ENV PYTORCH_ROCM_ARCH=${ARG_PYTORCH_ROCM_ARCH} +FROM ${BASE_IMAGE} AS base # Install some basic utilities -RUN apt-get update -q -y && apt-get install -q -y python3 python3-pip RUN apt-get update -q -y && apt-get install -q -y \ sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev # Remove sccache RUN python3 -m pip install --upgrade pip RUN apt-get purge -y sccache; python3 -m pip uninstall -y sccache; rm -f "$(which sccache)" - - -ENV LLVM_SYMBOLIZER_PATH=/opt/rocm/llvm/bin/llvm-symbolizer -ENV PATH=$PATH:/opt/rocm/bin:/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/bin: -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib/:/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/lib: -ENV CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/include:/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/include/torch/csrc/api/include/:/opt/rocm/include/: - -WORKDIR ${COMMON_WORKDIR} - -# ----------------------- -# hipBLASLt build stages -FROM base AS build_hipblaslt -ARG HIPBLASLT_BRANCH="e6da924" -RUN apt-get purge -y hipblaslt \ - && git clone https://github.com/ROCm/hipBLASLt.git \ - && cd hipBLASLt \ - && git checkout ${HIPBLASLT_BRANCH} \ - && ./install.sh --architecture ${PYTORCH_ROCM_ARCH} --legacy_hipblas_direct \ - && cd build/release \ - && make package -FROM scratch AS export_hipblaslt_1 -ARG COMMON_WORKDIR -COPY --from=build_hipblaslt ${COMMON_WORKDIR}/hipBLASLt/build/release/*.deb / -FROM scratch AS export_hipblaslt_0 -FROM export_hipblaslt_${BUILD_HIPBLASLT} AS export_hipblaslt - -# ----------------------- -# RCCL build stages -FROM base AS build_rccl -ARG RCCL_BRANCH="rocm-6.2.0" -RUN git clone https://github.com/ROCm/rccl \ - && cd rccl \ - && git checkout ${RCCL_BRANCH} \ - && ./install.sh -p --amdgpu_targets ${PYTORCH_ROCM_ARCH} -FROM scratch AS export_rccl_1 -ARG COMMON_WORKDIR -COPY --from=build_rccl ${COMMON_WORKDIR}/rccl/build/release/*.deb / -FROM scratch AS export_rccl_0 -FROM export_rccl_${BUILD_RCCL} AS export_rccl -# ----------------------- -# Triton build stages -FROM base AS build_triton -ARG TRITON_BRANCH="e192dba" -ARG TRITON_REPO="https://github.com/triton-lang/triton.git" -RUN python3 -m pip install ninja cmake wheel pybind11 && git clone ${TRITON_REPO} \ - && cd triton \ - && git checkout ${TRITON_BRANCH} \ - && cd python \ - && python3 setup.py bdist_wheel --dist-dir=dist -FROM scratch AS export_triton_1 ARG COMMON_WORKDIR -COPY --from=build_triton ${COMMON_WORKDIR}/triton/python/dist/*.whl / -FROM scratch AS export_triton_0 -FROM export_triton_${BUILD_TRITON} AS export_triton +WORKDIR ${COMMON_WORKDIR} -# AMD-SMI build stages -FROM base AS build_amdsmi -RUN cd /opt/rocm/share/amd_smi \ - && pip wheel . --wheel-dir=dist -FROM scratch AS export_amdsmi -COPY --from=build_amdsmi /opt/rocm/share/amd_smi/dist/*.whl / - -FROM base as build_pytorch -# A commit to fix the output scaling factor issue in _scaled_mm -# Not yet in 2.5.0-rc1 -ARG PYTORCH_BRANCH="cedc116" -ARG PYTORCH_VISION_BRANCH="v0.19.1" -ARG PYTORCH_REPO="https://github.com/ROCm/pytorch.git" -ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git" -#RUN --mount=type=bind,from=export_hipblaslt,src=/,target=/install \ -#if ls /install/*.deb; then \ -# apt-get purge -y hipblaslt \ -# && dpkg -i /install/*.deb \ -# && sed -i 's/, hipblaslt-dev \(.*\), hipcub-dev/, hipcub-dev/g' /var/lib/dpkg/status \ -# && sed -i 's/, hipblaslt \(.*\), hipfft/, hipfft/g' /var/lib/dpkg/status; \ -#fi -RUN git clone ${PYTORCH_REPO} pytorch \ - && cd pytorch && git checkout ${PYTORCH_BRANCH} && git submodule update --init --recursive \ - && python tools/amd_build/build_amd.py \ - && CMAKE_PREFIX_PATH=$(python3 -c 'import sys; print(sys.prefix)') python3 setup.py bdist_wheel --dist-dir=dist \ - && pip install dist/*.whl \ - && cd .. \ - && git clone ${PYTORCH_VISION_REPO} vision \ - && cd vision && git checkout ${PYTORCH_VISION_BRANCH} \ - && python3 setup.py bdist_wheel --dist-dir=dist -FROM scratch as export_pytorch_1 -ARG COMMON_WORKDIR -COPY --from=build_pytorch ${COMMON_WORKDIR}/pytorch/dist/*.whl / -COPY --from=build_pytorch ${COMMON_WORKDIR}/vision/dist/*.whl / -FROM scratch as export_pytorch_0 -from export_pytorch_${BUILD_PYTORCH} as export_pytorch - -# ----------------------- -# flash attn build stages -FROM base AS build_flash_attn -ARG FA_BRANCH="3cea2fb" -ARG FA_REPO="https://github.com/ROCm/flash-attention.git" -RUN --mount=type=bind,from=export_pytorch,src=/,target=/install \ -if ls /install/*.whl; then \ - pip uninstall -y torch torchvision \ - && pip install /install/*.whl; \ -fi -RUN git clone ${FA_REPO} \ - && cd flash-attention \ - && git checkout ${FA_BRANCH} \ - && git submodule update --init \ - && MAX_JOBS=64 GPU_ARCHS=${PYTORCH_ROCM_ARCH} python3 setup.py bdist_wheel --dist-dir=dist -FROM scratch AS export_flash_attn_1 -ARG COMMON_WORKDIR -COPY --from=build_flash_attn ${COMMON_WORKDIR}/flash-attention/dist/*.whl / -FROM scratch AS export_flash_attn_0 -FROM export_flash_attn_${BUILD_FA} AS export_flash_attn # ----------------------- # vLLM fetch stages @@ -162,22 +32,7 @@ FROM fetch_vllm_${REMOTE_VLLM} AS fetch_vllm # ----------------------- # vLLM build stages FROM fetch_vllm AS build_vllm -ARG COMMON_WORKDIR ARG USE_CYTHON -# Install hipblaslt -RUN --mount=type=bind,from=export_hipblaslt,src=/,target=/install \ -if ls /install/*.deb; then \ - apt-get purge -y hipblaslt \ - && dpkg -i /install/*.deb \ - && sed -i 's/, hipblaslt-dev \(.*\), hipcub-dev/, hipcub-dev/g' /var/lib/dpkg/status \ - && sed -i 's/, hipblaslt \(.*\), hipfft/, hipfft/g' /var/lib/dpkg/status; \ -fi -# Install pytorch -RUN --mount=type=bind,from=export_pytorch,src=/,target=/install \ -if ls /install/*.whl; then \ - pip install /install/*.whl; \ -fi - # Build vLLM RUN cd vllm \ && python3 -m pip install -r requirements-rocm.txt \ @@ -198,8 +53,6 @@ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite # ----------------------- # Final vLLM image FROM base AS final -ARG COMMON_WORKDIR -ARG BUILD_FA RUN python3 -m pip install --upgrade pip && rm -rf /var/lib/apt/lists/* # Error related to odd state for numpy 1.20.3 where there is no METADATA etc, but an extra LICENSES_bundled.txt. @@ -209,49 +62,6 @@ RUN case "$(which python3)" in \ rm -rf /opt/conda/envs/py_3.9/lib/python3.9/site-packages/numpy-1.20.3.dist-info/;; \ *) ;; esac -RUN --mount=type=bind,from=export_hipblaslt,src=/,target=/install \ - if ls /install/*.deb; then \ - apt-get purge -y hipblaslt \ - && dpkg -i /install/*.deb \ - && sed -i 's/, hipblaslt-dev \(.*\), hipcub-dev/, hipcub-dev/g' /var/lib/dpkg/status \ - && sed -i 's/, hipblaslt \(.*\), hipfft/, hipfft/g' /var/lib/dpkg/status; \ - fi - -RUN --mount=type=bind,from=export_rccl,src=/,target=/install \ - if ls /install/*.deb; then \ - dpkg -i /install/*.deb \ - # RCCL needs to be installed twice - && dpkg -i /install/*.deb \ - && sed -i 's/, rccl-dev \(.*\), rocalution/, rocalution/g' /var/lib/dpkg/status \ - && sed -i 's/, rccl \(.*\), rocalution/, rocalution/g' /var/lib/dpkg/status; \ - fi - -RUN --mount=type=bind,from=export_flash_attn,src=/,target=/install \ - if ls /install/*.whl; then \ - # Preemptively uninstall to prevent pip same-version no-installs - pip uninstall -y flash-attn \ - && pip install /install/*.whl; \ - fi - -RUN --mount=type=bind,from=export_triton,src=/,target=/install \ - if ls /install/*.whl; then \ - # Preemptively uninstall to prevent pip same-version no-installs - pip uninstall -y triton \ - && pip install /install/*.whl; \ - fi - -RUN --mount=type=bind,from=export_amdsmi,src=/,target=/install \ - # Preemptively uninstall to prevent pip same-version no-installs - pip uninstall -y amdsmi \ - && pip install /install/*.whl; - -RUN --mount=type=bind,from=export_pytorch,src=/,target=/install \ - if ls /install/*.whl; then \ - # Preemptively uninstall to prevent pip same-version no-installs - pip uninstall -y torch torchvision \ - && pip install /install/*.whl; \ - fi - RUN python3 -m pip install --upgrade huggingface-hub[cli] ARG BUILD_RPD RUN if [ ${BUILD_RPD} -eq "1" ]; then \ @@ -259,7 +69,7 @@ RUN if [ ${BUILD_RPD} -eq "1" ]; then \ && cd rocmProfileData/rpd_tracer \ && pip install -r requirements.txt && cd ../ \ && make && make install \ - && cd hipMarker && python setup.py install ; fi + && cd hipMarker && python3 setup.py install ; fi # Install vLLM # Make sure punica kernels are built (for LoRA) @@ -276,6 +86,8 @@ RUN --mount=type=bind,from=export_vllm,src=/,target=/install \ && pip uninstall -y vllm \ && pip install *.whl +ARG COMMON_WORKDIR + # Copy over the benchmark scripts as well COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks COPY --from=export_vllm /tests ${COMMON_WORKDIR}/vllm/tests diff --git a/benchmarks/P3L.py b/benchmarks/P3L.py index 6c9ffd9ebd599..5fcb8b5a78dbb 100755 --- a/benchmarks/P3L.py +++ b/benchmarks/P3L.py @@ -42,13 +42,16 @@ """ import argparse +import dataclasses import datetime +import json import math import os from huggingface_hub import hf_hub_download from vllm import LLM, SamplingParams +from vllm.engine.arg_utils import EngineArgs from vllm.logger import init_logger logger = init_logger(__name__) @@ -69,16 +72,8 @@ def get_wikitext2_text(tokenizer): def vllm_init(args): - - llm = LLM(model=args.model, - tensor_parallel_size=args.tensor_parallel_size, - trust_remote_code=args.trust_remote_code, - dtype=args.dtype, - quantization=args.quantization, - kv_cache_dtype=args.kv_cache_dtype, - quantization_param_path=args.kv_cache_scales_path - if args.kv_cache_scales_path != '' else None, - enforce_eager=args.enforce_eager) + engine_args = EngineArgs.from_cli_args(args) + llm = LLM(**dataclasses.asdict(engine_args)) sampling_params = SamplingParams(n=1, temperature=0.0, @@ -186,6 +181,15 @@ def main(args: argparse.Namespace): f"{my_ppl/num_tokens_generated}" \ f"\n\tPPL={math.exp(my_ppl/num_tokens_generated)}") + if args.output_json: + results = { + "integral_cross_entropy": my_ppl, + "average_cross_entropy": my_ppl / num_tokens_generated, + "ppl": math.exp(my_ppl / num_tokens_generated), + } + with open(args.output_json, "w") as f: + json.dump(results, f, indent=4) + logger.info(MESSAGE) print(MESSAGE) return @@ -193,41 +197,21 @@ def main(args: argparse.Namespace): if __name__ == "__main__": parser = argparse.ArgumentParser( - description='Benchmark the latency of processing a single batch of ' - 'requests till completion.') - parser.add_argument('--model', type=str, default='facebook/opt-125m') + description='Measure the PPPL (P3L) score of a given model.') parser.add_argument( '--data', type=str, default='./wikitext/wikitext-2-v1/test-00000-of-00001.parquet') parser.add_argument('--context-size', type=int, default=4096) - parser.add_argument('--kv-cache-scales-path', type=str, default='') - parser.add_argument('--tensor-parallel-size', '-tp', type=int, default=1) - parser.add_argument('--quantization', type=str, default=None) - parser.add_argument('--trust-remote-code', - action='store_true', - help='trust remote code from huggingface') - parser.add_argument( - '--dtype', - type=str, - default='auto', - choices=['auto', 'half', 'float16', 'bfloat16', 'float', 'float32'], - help='data type for model weights and activations. ' - 'The "auto" option will use FP16 precision ' - 'for FP32 and FP16 models, and BF16 precision ' - 'for BF16 models.') parser.add_argument('--sample-size', type=int, default=512) parser.add_argument('--patch-size', type=int, default=None) - parser.add_argument('--enforce-eager', - action='store_true', - help='enforce eager mode and disable CUDA graph') parser.add_argument( - "--kv-cache-dtype", + '--output-json', type=str, - choices=['auto', 'fp8_e5m2', 'fp8'], - default='auto', - help= - 'Data type for kv cache storage. If "auto", will use model data type.') + default=None, + help='Path to save the latency results in JSON format.') + + parser = EngineArgs.add_cli_args(parser) args = parser.parse_args() main(args)