From 94144e726cfeeba0c1758751b7fd46a20b6bd3b4 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Tue, 10 Sep 2024 19:51:58 -0400 Subject: [PATCH] [CI/Build][Kernel] Update CUTLASS to 3.5.1 tag (#8043) --- CMakeLists.txt | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c88c31c83da1..f8d6a2be9feae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -195,9 +195,13 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") FetchContent_Declare( cutlass GIT_REPOSITORY https://github.com/nvidia/cutlass.git - # CUTLASS 3.5.1 - GIT_TAG 06b21349bcf6ddf6a1686a47a137ad1446579db9 + GIT_TAG v3.5.1 GIT_PROGRESS TRUE + + # Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history. + # Important: If GIT_SHALLOW is enabled then GIT_TAG works only with branch names and tags. + # So if the GIT_TAG above is updated to a commit hash, GIT_SHALLOW must be set to FALSE + GIT_SHALLOW TRUE ) FetchContent_MakeAvailable(cutlass) @@ -231,6 +235,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") "-gencode arch=compute_90a,code=sm_90a") endif() + # # Machete kernels @@ -289,6 +294,12 @@ define_gpu_extension_target( USE_SABI 3 WITH_SOABI) +# If CUTLASS is compiled on NVCC >= 12.5, it by default uses +# cudaGetDriverEntryPointByVersion as a wrapper to avoid directly calling the +# driver API. This causes problems when linking with earlier versions of CUDA. +# Setting this variable sidesteps the issue by calling the driver directly. +target_compile_definitions(_C PRIVATE CUTLASS_ENABLE_DIRECT_CUDA_DRIVER_CALL=1) + # # _moe_C extension #