diff --git a/csrc/moe_align_block_size_kernels.cu b/csrc/moe_align_block_size_kernels.cu index 138615a4bfba0..e01b23685ef4e 100644 --- a/csrc/moe_align_block_size_kernels.cu +++ b/csrc/moe_align_block_size_kernels.cu @@ -111,7 +111,8 @@ void moe_align_block_size( // set dynamic shared mem auto kernel = vllm::moe_align_block_size_kernel; - AT_CUDA_CHECK(cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shared_mem)); + AT_CUDA_CHECK( + VLLM_DevFuncAttribute_SET_MaxDynamicSharedMemorySize((void *)kernel, shared_mem)); kernel<<<1, num_experts, shared_mem, stream>>>( topk_ids.data_ptr(), sorted_token_ids.data_ptr(),