From 45a1a69b9841a4cb7cc70788cf7dea1a2d3ec3d6 Mon Sep 17 00:00:00 2001
From: Simon Mo <simon.mo@hey.com>
Date: Thu, 30 May 2024 16:37:16 -0500
Subject: [PATCH] [Build] Disable sm_90a in cu11 (#5141)

---
 CMakeLists.txt | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b668cbc97de15..8df3a7a26d884 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -177,7 +177,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
   include(FetchContent)
   SET(CUTLASS_ENABLE_HEADERS_ONLY=ON)
   FetchContent_Declare(
-        cutlass 
+        cutlass
         GIT_REPOSITORY https://github.com/nvidia/cutlass.git
         # CUTLASS 3.5.0
         GIT_TAG 7d49e6c7e2f8896c47f586706e67e1fb215529dc
@@ -200,11 +200,13 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
   # The CUTLASS kernels for Hopper require sm90a to be enabled.
   # This is done via the below gencode option, BUT that creates kernels for both sm90 and sm90a.
   # That adds an extra 17MB to compiled binary, so instead we selectively enable it.
-  set_source_files_properties(
-      "csrc/quantization/cutlass_w8a8/scaled_mm_dq_c3x.cu"
-      PROPERTIES
-      COMPILE_FLAGS
-      "-gencode arch=compute_90a,code=sm_90a")
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 11)
+    set_source_files_properties(
+          "csrc/quantization/cutlass_w8a8/scaled_mm_dq_c3x.cu"
+          PROPERTIES
+          COMPILE_FLAGS
+          "-gencode arch=compute_90a,code=sm_90a")
+  endif()
 
 endif()