From 4987a503c529f8b54184428925b7f5554231ffad Mon Sep 17 00:00:00 2001
From: Hongxia Yang <hongxyan@amd.com>
Date: Tue, 16 Jul 2024 16:39:51 +0000
Subject: [PATCH 1/3] [ROCm] Cleanup Dockerfile and remove outdated patch

---
 Dockerfile.rocm                               | 27 +---------------
 .../getting_started/amd-installation.rst      | 32 +++++--------------
 rocm_patch/rocm_bf16.patch                    | 15 ---------
 3 files changed, 9 insertions(+), 65 deletions(-)
 delete mode 100644 rocm_patch/rocm_bf16.patch

diff --git a/Dockerfile.rocm b/Dockerfile.rocm
index befb0499f2e68..85dfda8dbb532 100644
--- a/Dockerfile.rocm
+++ b/Dockerfile.rocm
@@ -1,11 +1,6 @@
 # Default ROCm 6.1 base image
 ARG BASE_IMAGE="rocm/pytorch:rocm6.1.2_ubuntu20.04_py3.9_pytorch_staging"
 
-# Tested and supported base rocm/pytorch images
-ARG ROCm_5_7_BASE="rocm/pytorch:rocm5.7_ubuntu20.04_py3.9_pytorch_2.0.1" \
-    ROCm_6_0_BASE="rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1" \
-    ROCM_6_1_BASE="rocm/pytorch:rocm6.1.2_ubuntu20.04_py3.9_pytorch_staging"
-
 # Default ROCm ARCHes to build vLLM for.
 ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"
 
@@ -54,18 +49,6 @@ RUN pip install --upgrade pip
 RUN apt-get purge -y sccache; pip uninstall -y sccache; rm -f "$(which sccache)"
 # Install torch == 2.5.0 on ROCm
 RUN case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
-        *"rocm-5.7"*) \
-            pip uninstall -y torch torchaudio torchvision \
-            && pip install --no-cache-dir --pre \
-                torch==2.5.0.dev20240710 torchaudio==2.4.0.dev20240710 \
-                torchvision==0.20.0.dev20240710 \
-               --index-url https://download.pytorch.org/whl/nightly/rocm5.7;; \
-        *"rocm-6.0"*) \
-            pip uninstall -y torch torchaudio torchvision \
-            && pip install --no-cache-dir --pre \
-                torch==2.5.0.dev20240710 torchaudio==2.4.0.dev20240710 \
-                torchvision==0.20.0.dev20240710 \
-               --index-url https://download.pytorch.org/whl/nightly/rocm6.0;; \
         *"rocm-6.1"*) \
             pip uninstall -y torch torchaudio torchvision \
             && pip install --no-cache-dir --pre \
@@ -104,11 +87,6 @@ RUN --mount=type=cache,target=${CCACHE_DIR} \
     && cd flash-attention \
     && git checkout "${FA_BRANCH}" \
     && git submodule update --init \
-    && case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
-        *"rocm-5.7"*) \
-            export VLLM_TORCH_PATH="$(python3 -c 'import torch; print(torch.__path__[0])')" \
-            && patch "${VLLM_TORCH_PATH}"/utils/hipify/hipify_python.py hipify_patch.patch;; \
-        *) ;; esac \
     && GPU_ARCHS="${FA_GFX_ARCHS}" python3 setup.py bdist_wheel --dist-dir=/install; \
     # Create an empty directory otherwise as later build stages expect one
     else mkdir -p /install; \
@@ -161,12 +139,9 @@ RUN --mount=type=cache,target=${CCACHE_DIR} \
     --mount=type=cache,target=/root/.cache/pip \
     pip install -U -r requirements-rocm.txt \
     && case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
-        *"rocm-6.0"*) \
-            patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h rocm_patch/rocm_bf16.patch;; \
         *"rocm-6.1"*) \
             # Bring in upgrades to HIP graph earlier than ROCm 6.2 for vLLM
-            wget -N https://github.com/ROCm/vllm/raw/fa78403/rocm_patch/libamdhip64.so.6 -P rocm_patch \
-            && cp rocm_patch/libamdhip64.so.6 /opt/rocm/lib/libamdhip64.so.6 \
+            wget -N https://github.com/ROCm/vllm/raw/fa78403/rocm_patch/libamdhip64.so.6 -P /opt/rocm/lib \
             # Prevent interference if torch bundles its own HIP runtime
             && rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so* || true;; \
         *) ;; esac \
diff --git a/docs/source/getting_started/amd-installation.rst b/docs/source/getting_started/amd-installation.rst
index cc41d47296f8d..7161b0baee4e2 100644
--- a/docs/source/getting_started/amd-installation.rst
+++ b/docs/source/getting_started/amd-installation.rst
@@ -3,7 +3,7 @@
 Installation with ROCm
 ======================
 
-vLLM supports AMD GPUs with ROCm 5.7 and 6.0.
+vLLM supports AMD GPUs with ROCm 6.1.
 
 Requirements
 ------------
@@ -11,7 +11,7 @@ Requirements
 * OS: Linux
 * Python: 3.8 -- 3.11
 * GPU: MI200s (gfx90a), MI300 (gfx942), Radeon RX 7900 series (gfx1100)
-* ROCm 6.0 and ROCm 5.7
+* ROCm 6.1
 
 Installation options:
 
@@ -27,7 +27,7 @@ You can build and install vLLM from source.
 
 First, build a docker image from `Dockerfile.rocm <https://github.com/vllm-project/vllm/blob/main/Dockerfile.rocm>`_ and launch a docker container from the image.
 
-`Dockerfile.rocm <https://github.com/vllm-project/vllm/blob/main/Dockerfile.rocm>`_ uses ROCm 6.0 by default, but also supports ROCm 5.7.
+`Dockerfile.rocm <https://github.com/vllm-project/vllm/blob/main/Dockerfile.rocm>`_ uses ROCm 6.1 by default, but also supports ROCm 5.7 or ROCm 6.1 in older vLLM branches.
 It provides flexibility to customize the build of docker image using the following arguments:
 
 * `BASE_IMAGE`: specifies the base image used when running ``docker build``, specifically the PyTorch on ROCm base image. We have tested ROCm 5.7 and ROCm 6.0. The default is `rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1`
@@ -39,24 +39,17 @@ It provides flexibility to customize the build of docker image using the followi
 Their values can be passed in when running ``docker build`` with ``--build-arg`` options.
 
 
-To build vllm on ROCm 6.0 for MI200 and MI300 series, you can use the default:
+To build vllm on ROCm 6.1 for MI200 and MI300 series, you can use the default:
 
 .. code-block:: console
 
-    $ docker build -f Dockerfile.rocm -t vllm-rocm .
+    $ DOCKER_BUILDKIT=1 docker build -f Dockerfile.rocm -t vllm-rocm .
 
-To build vllm on ROCm 6.0 for Radeon RX7900 series (gfx1100), you should specify ``BUILD_FA`` as below:
+To build vllm on ROCm 6.1 for Radeon RX7900 series (gfx1100), you should specify ``BUILD_FA`` as below:
 
 .. code-block:: console
 
-    $ docker build --build-arg BUILD_FA="0" -f Dockerfile.rocm -t vllm-rocm .
-
-To build docker image for vllm on ROCm 5.7, you can specify ``BASE_IMAGE`` as below:
-
-.. code-block:: console
-
-    $ docker build --build-arg BASE_IMAGE="rocm/pytorch:rocm5.7_ubuntu22.04_py3.10_pytorch_2.0.1" \
-       -f Dockerfile.rocm -t vllm-rocm . 
+    $ DOCKER_BUILDKIT=1 docker build --build-arg BUILD_FA="0" -f Dockerfile.rocm -t vllm-rocm .
 
 To run the above docker image ``vllm-rocm``, use the below command:
 
@@ -88,24 +81,16 @@ Option 2: Build from source
 - `Pytorch <https://pytorch.org/>`_
 - `hipBLAS <https://rocm.docs.amd.com/projects/hipBLAS/en/latest/install.html>`_
 
-For installing PyTorch, you can start from a fresh docker image, e.g, `rocm/pytorch:rocm6.1.2_ubuntu20.04_py3.9_pytorch_staging`, `rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1`, `rocm/pytorch-nightly`.
+For installing PyTorch, you can start from a fresh docker image, e.g, `rocm/pytorch:rocm6.1.2_ubuntu20.04_py3.9_pytorch_staging`, `rocm/pytorch-nightly`.
 
 Alternatively, you can install pytorch using pytorch wheels. You can check Pytorch installation guild in Pytorch `Getting Started <https://pytorch.org/get-started/locally/>`_
 
-For rocm6.0:
 
 .. code-block:: console
 
     $ pip3 install torch --index-url https://download.pytorch.org/whl/rocm6.0
 
 
-For rocm5.7:
-
-.. code-block:: console
-
-    $ pip install torch --index-url https://download.pytorch.org/whl/rocm5.7
-
-
 1. Install `Triton flash attention for ROCm <https://github.com/ROCm/triton>`_
 
 Install ROCm's Triton flash attention (the default triton-mlir branch) following the instructions from `ROCm/triton <https://github.com/ROCm/triton/blob/triton-mlir/README.md>`_
@@ -131,7 +116,6 @@ Install ROCm's flash attention (v2.0.4) following the instructions from `ROCm/fl
 
 .. tip::
 
-    - You may need to turn on the ``--enforce-eager`` flag if you experience process hang when running the `benchmark_thoughput.py` script to test your installation.
     - Triton flash attention is used by default. For benchmarking purposes, it is recommended to run a warm up step before collecting perf numbers.
     - To use CK flash-attention, please use this flag ``export VLLM_USE_TRITON_FLASH_ATTN=0`` to turn off triton flash attention. 
     - The ROCm version of pytorch, ideally, should match the ROCm driver version.
diff --git a/rocm_patch/rocm_bf16.patch b/rocm_patch/rocm_bf16.patch
deleted file mode 100644
index a0f07da2a3e2b..0000000000000
--- a/rocm_patch/rocm_bf16.patch
+++ /dev/null
@@ -1,15 +0,0 @@
---- amd_hip_bf16.h	2024-02-06 18:28:58.268699142 +0000
-+++ amd_hip_bf16.h.new	2024-02-06 18:28:31.988647133 +0000
-@@ -90,10 +90,10 @@
- #include "math_fwd.h"              // ocml device functions
- 
- #if defined(__HIPCC_RTC__)
--#define __HOST_DEVICE__ __device__
-+#define __HOST_DEVICE__ __device__ static
- #else
- #include <climits>
--#define __HOST_DEVICE__ __host__ __device__
-+#define __HOST_DEVICE__ __host__ __device__ static inline
- #endif
- 
- // Since we are using unsigned short to represent data in bfloat16, it can be of different sizes on

From 2961a4d54940dbb282202ec45c2e4da8a4132f20 Mon Sep 17 00:00:00 2001
From: Hongxia Yang <hongxyan@amd.com>
Date: Tue, 16 Jul 2024 19:25:02 +0000
Subject: [PATCH 2/3] more cleanup

---
 .../source/getting_started/amd-installation.rst | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/docs/source/getting_started/amd-installation.rst b/docs/source/getting_started/amd-installation.rst
index 7161b0baee4e2..5f6c312314f99 100644
--- a/docs/source/getting_started/amd-installation.rst
+++ b/docs/source/getting_started/amd-installation.rst
@@ -30,7 +30,7 @@ First, build a docker image from `Dockerfile.rocm <https://github.com/vllm-proje
 `Dockerfile.rocm <https://github.com/vllm-project/vllm/blob/main/Dockerfile.rocm>`_ uses ROCm 6.1 by default, but also supports ROCm 5.7 or ROCm 6.1 in older vLLM branches.
 It provides flexibility to customize the build of docker image using the following arguments:
 
-* `BASE_IMAGE`: specifies the base image used when running ``docker build``, specifically the PyTorch on ROCm base image. We have tested ROCm 5.7 and ROCm 6.0. The default is `rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1`
+* `BASE_IMAGE`: specifies the base image used when running ``docker build``, specifically the PyTorch on ROCm base image.
 * `BUILD_FA`: specifies whether to build CK flash-attention. The default is 1. For `Radeon RX 7900 series (gfx1100) <https://rocm.docs.amd.com/projects/radeon/en/latest/index.html>`_, this should be set to 0 before flash-attention supports this target.
 * `FX_GFX_ARCHS`: specifies the GFX architecture that is used to build CK flash-attention, for example, `gfx90a;gfx942` for MI200 and MI300. The default is `gfx90a;gfx942`
 * `FA_BRANCH`: specifies the branch used to build the CK flash-attention in `ROCm's flash-attention repo <https://github.com/ROCmSoftwarePlatform/flash-attention>`_. The default is `ae7928c`
@@ -78,17 +78,12 @@ Option 2: Build from source
 0. Install prerequisites (skip if you are already in an environment/docker with the following installed):
 
 - `ROCm <https://rocm.docs.amd.com/en/latest/deploy/linux/index.html>`_
-- `Pytorch <https://pytorch.org/>`_
+- `PyTorch <https://pytorch.org/>`_
 - `hipBLAS <https://rocm.docs.amd.com/projects/hipBLAS/en/latest/install.html>`_
 
 For installing PyTorch, you can start from a fresh docker image, e.g, `rocm/pytorch:rocm6.1.2_ubuntu20.04_py3.9_pytorch_staging`, `rocm/pytorch-nightly`.
 
-Alternatively, you can install pytorch using pytorch wheels. You can check Pytorch installation guild in Pytorch `Getting Started <https://pytorch.org/get-started/locally/>`_
-
-
-.. code-block:: console
-
-    $ pip3 install torch --index-url https://download.pytorch.org/whl/rocm6.0
+Alternatively, you can install PyTorch using PyTorch wheels. You can check PyTorch installation guild in PyTorch `Getting Started <https://pytorch.org/get-started/locally/>`_
 
 
 1. Install `Triton flash attention for ROCm <https://github.com/ROCm/triton>`_
@@ -100,8 +95,6 @@ Install ROCm's Triton flash attention (the default triton-mlir branch) following
 Install ROCm's flash attention (v2.0.4) following the instructions from `ROCm/flash-attention <https://github.com/ROCm/flash-attention/tree/flash_attention_for_rocm#amd-gpurocm-support>`_
 
 .. note::
-    - If you are using rocm5.7 with pytorch 2.1.0 onwards, you don't need to apply the `hipify_python.patch`. You can build the ROCm flash attention directly.
-    - If you fail to install `ROCm/flash-attention`, try cloning from the commit `6fd2f8e572805681cd67ef8596c7e2ce521ed3c6`.
     - ROCm's Flash-attention-2 (v2.0.4) does not support sliding windows attention.
     - You might need to downgrade the "ninja" version to 1.10 it is not used when compiling flash-attention-2 (e.g. `pip install ninja==1.10.2.4`)
 
@@ -117,5 +110,5 @@ Install ROCm's flash attention (v2.0.4) following the instructions from `ROCm/fl
 .. tip::
 
     - Triton flash attention is used by default. For benchmarking purposes, it is recommended to run a warm up step before collecting perf numbers.
-    - To use CK flash-attention, please use this flag ``export VLLM_USE_TRITON_FLASH_ATTN=0`` to turn off triton flash attention. 
-    - The ROCm version of pytorch, ideally, should match the ROCm driver version.
+    - To use CK flash-attention or PyTorch naive attention, please use this flag ``export VLLM_USE_TRITON_FLASH_ATTN=0`` to turn off triton flash attention. 
+    - The ROCm version of PyTorch, ideally, should match the ROCm driver version.

From 94ec6ee54936aea73c053f680530f29f76929512 Mon Sep 17 00:00:00 2001
From: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com>
Date: Tue, 16 Jul 2024 21:03:32 -0400
Subject: [PATCH 3/3] Update docs/source/getting_started/amd-installation.rst

Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
---
 docs/source/getting_started/amd-installation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/getting_started/amd-installation.rst b/docs/source/getting_started/amd-installation.rst
index 5f6c312314f99..1f9e4fabc4fc9 100644
--- a/docs/source/getting_started/amd-installation.rst
+++ b/docs/source/getting_started/amd-installation.rst
@@ -27,7 +27,7 @@ You can build and install vLLM from source.
 
 First, build a docker image from `Dockerfile.rocm <https://github.com/vllm-project/vllm/blob/main/Dockerfile.rocm>`_ and launch a docker container from the image.
 
-`Dockerfile.rocm <https://github.com/vllm-project/vllm/blob/main/Dockerfile.rocm>`_ uses ROCm 6.1 by default, but also supports ROCm 5.7 or ROCm 6.1 in older vLLM branches.
+`Dockerfile.rocm <https://github.com/vllm-project/vllm/blob/main/Dockerfile.rocm>`_ uses ROCm 6.1 by default, but also supports ROCm 5.7 and 6.0 in older vLLM branches.
 It provides flexibility to customize the build of docker image using the following arguments:
 
 * `BASE_IMAGE`: specifies the base image used when running ``docker build``, specifically the PyTorch on ROCm base image.