From 3691e98d660e5003be098b147d18a7fe65d51f27 Mon Sep 17 00:00:00 2001 From: Jason Cox Date: Sat, 27 Jan 2024 18:22:48 -0800 Subject: [PATCH 1/6] Add compute capability == 6 support This adds the 6.x architectures to the supported list but also presents a warning that capabilities < 7.0 are untested and may have issues. --- setup.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 2f6242690a263..d92ea1c179f37 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ MAIN_CUDA_VERSION = "12.1" # Supported NVIDIA GPU architectures. -NVIDIA_SUPPORTED_ARCHS = {"7.0", "7.5", "8.0", "8.6", "8.9", "9.0"} +NVIDIA_SUPPORTED_ARCHS = {"6.0", "6.1", "6.2", "7.0", "7.5", "8.0", "8.6", "8.9", "9.0"} ROCM_SUPPORTED_ARCHS = {"gfx90a", "gfx908", "gfx906", "gfx1030", "gfx1100"} # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS) @@ -184,9 +184,14 @@ def get_torch_arch_list() -> Set[str]: device_count = torch.cuda.device_count() for i in range(device_count): major, minor = torch.cuda.get_device_capability(i) - if major < 7: + if major < 6: raise RuntimeError( - "GPUs with compute capability below 7.0 are not supported.") + "GPUs with compute capability below 6.0 are not supported.") + if major == 6: + warnings.warn( + "GPUs with compute capability below 7.0 may work but are not part of vLLM supported testing.", + "Proceeding but performance and stability issues may occur.", + stacklevel=2) compute_capabilities.add(f"{major}.{minor}") ext_modules = [] From cd31b016e1f71528c5f43720276020aa5287583f Mon Sep 17 00:00:00 2001 From: Jason Cox Date: Sat, 27 Jan 2024 18:47:20 -0800 Subject: [PATCH 2/6] Comply with yapf format changes Failed build based on yapf - updating to suggested format: NVIDIA_SUPPORTED_ARCHS = { "6.0", "6.1", "6.2", "7.0", "7.5", "8.0", "8.6", "8.9", "9.0" } --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d92ea1c179f37..2f91847f7ea20 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,9 @@ MAIN_CUDA_VERSION = "12.1" # Supported NVIDIA GPU architectures. -NVIDIA_SUPPORTED_ARCHS = {"6.0", "6.1", "6.2", "7.0", "7.5", "8.0", "8.6", "8.9", "9.0"} +NVIDIA_SUPPORTED_ARCHS = { + "6.0", "6.1", "6.2", "7.0", "7.5", "8.0", "8.6", "8.9", "9.0" +} ROCM_SUPPORTED_ARCHS = {"gfx90a", "gfx908", "gfx906", "gfx1030", "gfx1100"} # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS) From 941f23d834d72a4a8bd96e79c13871974d5d7946 Mon Sep 17 00:00:00 2001 From: Jason Cox Date: Tue, 13 Feb 2024 23:42:09 -0800 Subject: [PATCH 3/6] Update setup.py for yapf --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 88d94da2cc921..0d904c1353b92 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,9 @@ MAIN_CUDA_VERSION = "12.1" # Supported NVIDIA GPU architectures. -NVIDIA_SUPPORTED_ARCHS = {"6.0", "6.1", "6.2", "7.0", "7.5", "8.0", "8.6", "8.9", "9.0"} +NVIDIA_SUPPORTED_ARCHS = { + "6.0", "6.1", "6.2", "7.0", "7.5", "8.0", "8.6", "8.9", "9.0" +} ROCM_SUPPORTED_ARCHS = {"gfx90a", "gfx942", "gfx1100"} # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS) From 7a6fadc33b6857b130f852664fb5ba77d3f0393c Mon Sep 17 00:00:00 2001 From: Jason Cox Date: Tue, 23 Apr 2024 20:31:57 -0700 Subject: [PATCH 4/6] Script to add Pascal GPU support to vLLM --- pascal.sh | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 pascal.sh diff --git a/pascal.sh b/pascal.sh new file mode 100644 index 0000000000000..6ddf1132d27bc --- /dev/null +++ b/pascal.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# +# This script adds Pascal GPU support to the VLLM OpenAI Docker image. +# It updates the CMakeLists.txt and Dockerfile files to include 6.0, 6.1 and 6.2. +# + +# Ask user for confirmation +read -p "This script will add Pascal GPU support to vLLM. Continue? [y/N] " -n 1 -r +echo +if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Exiting..." + exit 1 +fi +echo +echo "Adding Pascal GPU support..." + +# Update CMakeLists.txt and Dockerfile +echo " - Updating CMakeLists.txt" +cuda_supported_archs="6.0;6.1;6.2;7.0;7.5;8.0;8.6;8.9;9.0" +sed -i.orig "s/set(CUDA_SUPPORTED_ARCHS \"7.0;7.5;8.0;8.6;8.9;9.0\")/set(CUDA_SUPPORTED_ARCHS \"$cuda_supported_archs\")/g" CMakeLists.txt + +echo " - Updating Dockerfile" +torch_cuda_arch_list="6.0 6.1 6.2 7.0 7.5 8.0 8.6 8.9 9.0+PTX" +sed -i.orig "s/ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'/ARG torch_cuda_arch_list='$torch_cuda_arch_list'/g" Dockerfile + +cat < Date: Tue, 23 Apr 2024 20:36:09 -0700 Subject: [PATCH 5/6] Rebase --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3216893862c82..4b672e1af8494 100644 --- a/setup.py +++ b/setup.py @@ -411,4 +411,4 @@ def _read_requirements(filename: str) -> List[str]: }, cmdclass={"build_ext": cmake_build_ext} if not _is_neuron() else {}, package_data=package_data, -) \ No newline at end of file +) From fa939e5b48dcc38b7dd964233e8334863d93c4d7 Mon Sep 17 00:00:00 2001 From: Jason Cox Date: Tue, 23 Apr 2024 20:45:38 -0700 Subject: [PATCH 6/6] Update Pascal GPU support in vLLM build files --- pascal.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) mode change 100644 => 100755 pascal.sh diff --git a/pascal.sh b/pascal.sh old mode 100644 new mode 100755 index 6ddf1132d27bc..f6c752d348ebd --- a/pascal.sh +++ b/pascal.sh @@ -1,7 +1,7 @@ #!/bin/bash # -# This script adds Pascal GPU support to the VLLM OpenAI Docker image. -# It updates the CMakeLists.txt and Dockerfile files to include 6.0, 6.1 and 6.2. +# This script adds Pascal GPU support to vLLM by adding 6.0, 6.1 and 6.2 +# GPU architectures to the build files MakeLists.txt and Dockerfile # # Ask user for confirmation