PygmalionAI · AlpinDale · Dec 5, 2024 · Nov 15, 2024 · Nov 15, 2024 · Nov 15, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -20,7 +20,7 @@ set(PYTHON_SUPPORTED_VERSIONS "3.8" "3.9" "3.10" "3.11" "3.12")
 set(CUDA_SUPPORTED_ARCHS "6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0")
 
 # Supported AMD GPU architectures.
-set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100")
+set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101")
 
 #
 # Supported/expected torch versions for CUDA/ROCm.
@@ -65,21 +65,20 @@ endif()
 # etc.
 #
 find_package(Torch REQUIRED)
-find_package(CUDA REQUIRED)
-find_package(CUDAToolkit REQUIRED)
-
-# Add cuBLAS to the list of libraries to link against
-list(APPEND LIBS CUDA::cublas)
-
-set(CMAKE_CXX_STANDARD 17)
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
-set(CMAKE_CUDA_STANDARD 17)
-set(CMAKE_CUDA_STANDARD_REQUIRED ON)
-
-# Replace -std=c++20 with -std=c++17 in APHRODITE_GPU_FLAGS
-if(APHRODITE_GPU_LANG STREQUAL "CUDA")
+if(MSVC)
+  find_package(CUDA REQUIRED)
+  find_package(CUDAToolkit REQUIRED)
+  # Add cuBLAS to the list of libraries to link against
+  list(APPEND LIBS CUDA::cublas)
+  set(CMAKE_CXX_STANDARD 17)
+  set(CMAKE_CXX_STANDARD_REQUIRED ON)
+  set(CMAKE_CUDA_STANDARD 17)
+  set(CMAKE_CUDA_STANDARD_REQUIRED ON)
+  # Replace -std=c++20 with -std=c++17 in APHRODITE_GPU_FLAGS
+  if(APHRODITE_GPU_LANG STREQUAL "CUDA")
   list(APPEND APHRODITE_GPU_FLAGS "--std=c++17" "-Xcompiler -Wno-return-type")
 endif()
+endif()
 
 #
 # Add the `default` target which detects which extensions should be
@@ -210,7 +209,6 @@ if(APHRODITE_GPU_LANG STREQUAL "CUDA")
     "kernels/quantization/aqlm/gemm_kernels.cu"
     "kernels/quantization/awq/gemm_kernels.cu"
     "kernels/quantization/quip/origin_order.cu"
-    "kernels/quantization/gptq_marlin/gptq_marlin.cu"
     "kernels/quantization/gptq_marlin/gptq_marlin_repack.cu"
     "kernels/quantization/marlin/dense/marlin_cuda_kernel.cu"
     "kernels/quantization/marlin/sparse/marlin_24_cuda_kernel.cu"
@@ -220,7 +218,10 @@ if(APHRODITE_GPU_LANG STREQUAL "CUDA")
     "kernels/quantization/fp8/fp8_marlin.cu"
     "kernels/all_reduce/custom_all_reduce.cu")
 
-  # Add CUTLASS and GPTQ Marlin kernels if not MSVC
+  if(MSVC)
+    list(APPEND APHRODITE_EXT_SRC
+      "kernels/quantization/gptq_marlin/gptq_marlin_windows.cu")
+  endif()
   if(NOT MSVC)
     # Include CUTLASS only when needed
     include(FetchContent)
@@ -236,7 +237,8 @@ if(APHRODITE_GPU_LANG STREQUAL "CUDA")
     list(APPEND APHRODITE_EXT_SRC
       "kernels/quantization/cutlass_w8a8/scaled_mm_entry.cu"
       "kernels/quantization/cutlass_w8a8/scaled_mm_c2x.cu"
-      "kernels/quantization/cutlass_w8a8/scaled_mm_c3x.cu")
+      "kernels/quantization/cutlass_w8a8/scaled_mm_c3x.cu"
+      "kernels/quantization/gptq_marlin/gptq_marlin.cu")
 
     # Enable sm90a for Hopper CUTLASS kernels when using newer CUDA
     if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0)

diff --git a/amdpatch.sh b/amdpatch.sh
@@ -2,4 +2,4 @@
 
 ROCM_PATH=$(hipconfig --rocmpath)
 
-sudo patch $ROCM_PATH/lib/llvm/lib/clang/18/include/__clang_hip_cmath.h ./patches/amd.patch
+sudo patch $ROCM_PATH/lib/llvm/lib/clang/*/include/__clang_hip_cmath.h ./patches/amd.patch
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,4 +2,4 @@

		ROCM_PATH=$(hipconfig --rocmpath)

		sudo patch $ROCM_PATH/lib/llvm/lib/clang/18/include/__clang_hip_cmath.h ./patches/amd.patch
		sudo patch $ROCM_PATH/lib/llvm/lib/clang/*/include/__clang_hip_cmath.h ./patches/amd.patch