From 0379295f63a9b6a0d013c3d5802cad50d6476a72 Mon Sep 17 00:00:00 2001 From: Hosang Yoon Date: Tue, 12 Nov 2024 17:50:14 -0500 Subject: [PATCH] Add vectorized rms_norm support for Navi31 - supports vectorized rms_norm_kernel --- csrc/layernorm_kernels.cu | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/csrc/layernorm_kernels.cu b/csrc/layernorm_kernels.cu index cac6a5648f191..db609a7d5b04f 100644 --- a/csrc/layernorm_kernels.cu +++ b/csrc/layernorm_kernels.cu @@ -16,9 +16,10 @@ #include "quantization/fp8/nvidia/quant_utils.cuh" #endif -#if defined(__HIPCC__) && (defined(__gfx90a__) || defined(__gfx940__) || \ - defined(__gfx941__) || defined(__gfx942__)) - #define __HIP__MI300_MI250__ +#if defined(__HIPCC__) && \ + (defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx941__) || \ + defined(__gfx942__) || defined(__gfx1100__)) + #define __HIP__MI300_MI250_Navi31__ #endif namespace vllm { @@ -61,7 +62,7 @@ struct __align__(16) vec8_t { __device__ scalar_t sum() const { return x + y + z + w + u + v + s + t; } }; -#ifdef __HIP__MI300_MI250__ +#ifdef __HIP__MI300_MI250_Navi31__ // TODO(woosuk): Further optimize this kernel. template