Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: ROCm build #817

Merged
merged 12 commits into from
Dec 5, 2024
38 changes: 20 additions & 18 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ set(PYTHON_SUPPORTED_VERSIONS "3.8" "3.9" "3.10" "3.11" "3.12")
set(CUDA_SUPPORTED_ARCHS "6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0")

# Supported AMD GPU architectures.
set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100")
set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101")

#
# Supported/expected torch versions for CUDA/ROCm.
Expand Down Expand Up @@ -65,20 +65,19 @@ endif()
# etc.
#
find_package(Torch REQUIRED)
find_package(CUDA REQUIRED)
find_package(CUDAToolkit REQUIRED)

# Add cuBLAS to the list of libraries to link against
list(APPEND LIBS CUDA::cublas)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)

# Replace -std=c++20 with -std=c++17 in APHRODITE_GPU_FLAGS
if(APHRODITE_GPU_LANG STREQUAL "CUDA")
list(APPEND APHRODITE_GPU_FLAGS "--std=c++17" "-Xcompiler -Wno-return-type")
if(MSVC)
find_package(CUDA REQUIRED)
find_package(CUDAToolkit REQUIRED)
# Add cuBLAS to the list of libraries to link against
list(APPEND LIBS CUDA::cublas)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
# Replace -std=c++20 with -std=c++17 in APHRODITE_GPU_FLAGS
if(APHRODITE_GPU_LANG STREQUAL "CUDA")
list(APPEND APHRODITE_GPU_FLAGS "--std=c++17" "-Xcompiler -Wno-return-type")
endif()
AlpinDale marked this conversation as resolved.
Show resolved Hide resolved
endif()

#
Expand Down Expand Up @@ -210,7 +209,6 @@ if(APHRODITE_GPU_LANG STREQUAL "CUDA")
"kernels/quantization/aqlm/gemm_kernels.cu"
"kernels/quantization/awq/gemm_kernels.cu"
"kernels/quantization/quip/origin_order.cu"
"kernels/quantization/gptq_marlin/gptq_marlin.cu"
"kernels/quantization/gptq_marlin/gptq_marlin_repack.cu"
"kernels/quantization/marlin/dense/marlin_cuda_kernel.cu"
"kernels/quantization/marlin/sparse/marlin_24_cuda_kernel.cu"
Expand All @@ -220,7 +218,10 @@ if(APHRODITE_GPU_LANG STREQUAL "CUDA")
"kernels/quantization/fp8/fp8_marlin.cu"
"kernels/all_reduce/custom_all_reduce.cu")

# Add CUTLASS and GPTQ Marlin kernels if not MSVC
if(MSVC)
list(APPEND APHRODITE_EXT_SRC
"kernels/quantization/gptq_marlin/gptq_marlin_windows.cu")
endif()
if(NOT MSVC)
# Include CUTLASS only when needed
include(FetchContent)
Expand All @@ -236,7 +237,8 @@ if(APHRODITE_GPU_LANG STREQUAL "CUDA")
list(APPEND APHRODITE_EXT_SRC
"kernels/quantization/cutlass_w8a8/scaled_mm_entry.cu"
"kernels/quantization/cutlass_w8a8/scaled_mm_c2x.cu"
"kernels/quantization/cutlass_w8a8/scaled_mm_c3x.cu")
"kernels/quantization/cutlass_w8a8/scaled_mm_c3x.cu"
"kernels/quantization/gptq_marlin/gptq_marlin.cu")

# Enable sm90a for Hopper CUTLASS kernels when using newer CUDA
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0)
Expand Down
2 changes: 1 addition & 1 deletion amdpatch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

ROCM_PATH=$(hipconfig --rocmpath)

sudo patch $ROCM_PATH/lib/llvm/lib/clang/18/include/__clang_hip_cmath.h ./patches/amd.patch
sudo patch $ROCM_PATH/lib/llvm/lib/clang/*/include/__clang_hip_cmath.h ./patches/amd.patch
Loading
Loading