From 1fde87ab66412b79ff0b9d5129f4b29aa8df0104 Mon Sep 17 00:00:00 2001
From: Li Zhang <lzhang329@gmail.com>
Date: Wed, 28 Feb 2024 09:09:48 +0000
Subject: [PATCH] fix lint

---
 src/turbomind/kernels/attention/CMakeLists.txt         |  6 +++---
 src/turbomind/kernels/attention/arch.h                 |  2 +-
 src/turbomind/kernels/attention/attention.cu           |  2 +-
 .../kernels/attention/attention_128_bf16_sm80.cu       |  2 +-
 .../kernels/attention/attention_128_f16_sm70.cu        |  2 +-
 .../kernels/attention/attention_128_f16_sm80.cu        |  2 +-
 src/turbomind/kernels/attention/attention_config.h     |  2 +-
 src/turbomind/kernels/attention/attention_template.h   |  2 +-
 src/turbomind/kernels/attention/cta_map.h              |  2 +-
 src/turbomind/kernels/attention/decoding.cu            |  2 +-
 src/turbomind/kernels/attention/decoding.h             |  2 +-
 .../kernels/attention/decoding_128_bf16_sm80.cu        |  2 +-
 .../kernels/attention/decoding_128_f16_sm70.cu         |  2 +-
 .../kernels/attention/decoding_128_f16_sm80.cu         |  2 +-
 src/turbomind/kernels/attention/decoding_config.h      |  2 +-
 src/turbomind/kernels/attention/decoding_simt.h        |  2 +-
 src/turbomind/kernels/attention/decoding_sm80.h        |  2 +-
 src/turbomind/kernels/attention/decoding_template.h    |  2 +-
 src/turbomind/kernels/attention/impl.h                 |  2 +-
 src/turbomind/kernels/attention/impl_m16n8.h           |  2 +-
 src/turbomind/kernels/attention/impl_sm70.h            | 10 +++++-----
 src/turbomind/kernels/attention/impl_sm75.h            |  2 +-
 src/turbomind/kernels/attention/impl_sm80.h            |  2 +-
 src/turbomind/kernels/attention/iterator_sm70.h        |  2 +-
 src/turbomind/kernels/attention/iterator_sm80.h        |  2 +-
 src/turbomind/kernels/attention/kv_cache_utils.cu      |  2 +-
 src/turbomind/kernels/attention/kv_cache_utils.h       |  2 +-
 src/turbomind/kernels/attention/mainloop.h             |  2 +-
 src/turbomind/kernels/attention/mainloop_sm70.h        |  2 +-
 src/turbomind/kernels/attention/mainloop_sm80.h        |  4 ++--
 src/turbomind/kernels/attention/quantization.h         |  2 +-
 src/turbomind/kernels/attention/reduce.cu              |  2 +-
 src/turbomind/kernels/attention/reduce.h               |  2 +-
 src/turbomind/kernels/attention/reduce_template.h      |  2 +-
 src/turbomind/kernels/attention/reference.cu           |  2 +-
 src/turbomind/kernels/attention/reference.h            |  2 +-
 src/turbomind/kernels/attention/smem_layout.h          |  2 +-
 src/turbomind/kernels/attention/test_attention.cu      |  2 +-
 src/turbomind/kernels/attention/utils.cc               |  2 +-
 src/turbomind/kernels/attention/utils.h                |  2 +-
 src/turbomind/kernels/flash_attention/CMakeLists.txt   |  2 +-
 .../kernels/flash_attention/flash_attention.cu         |  2 +-
 42 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/src/turbomind/kernels/attention/CMakeLists.txt b/src/turbomind/kernels/attention/CMakeLists.txt
index 4e347f6dc7..3ecec478da 100644
--- a/src/turbomind/kernels/attention/CMakeLists.txt
+++ b/src/turbomind/kernels/attention/CMakeLists.txt
@@ -1,8 +1,8 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 
-add_library(attention STATIC 
-            attention.cu 
-            decoding.cu 
+add_library(attention STATIC
+            attention.cu
+            decoding.cu
             kv_cache_utils.cu
             utils.cc
             attention_128_f16_sm80.cu
diff --git a/src/turbomind/kernels/attention/arch.h b/src/turbomind/kernels/attention/arch.h
index 1380499396..772852dae8 100644
--- a/src/turbomind/kernels/attention/arch.h
+++ b/src/turbomind/kernels/attention/arch.h
@@ -16,4 +16,4 @@ struct Sm80 {
     static constexpr int value = 80;
 };
 
-}  // namespace turbomind::arch
\ No newline at end of file
+}  // namespace turbomind::arch
diff --git a/src/turbomind/kernels/attention/attention.cu b/src/turbomind/kernels/attention/attention.cu
index ac1a181466..717540e620 100644
--- a/src/turbomind/kernels/attention/attention.cu
+++ b/src/turbomind/kernels/attention/attention.cu
@@ -46,4 +46,4 @@ template void dispatchAttention(const AttentionParams<half>& params);
 // template void dispatchAttention(const AttentionParams<nv_bfloat16>& params);
 // #endif
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/attention_128_bf16_sm80.cu b/src/turbomind/kernels/attention/attention_128_bf16_sm80.cu
index 50a6d1cb31..84fdcb380e 100644
--- a/src/turbomind/kernels/attention/attention_128_bf16_sm80.cu
+++ b/src/turbomind/kernels/attention/attention_128_bf16_sm80.cu
@@ -8,4 +8,4 @@ namespace turbomind {
 using Kernel = typename attention::AttentionConfig<arch::Sm80, nv_bfloat16, nv_bfloat16, 1, 128>::Kernel;
 template void invokeAttention<Kernel>(const typename Kernel::ParamType& params);
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/attention_128_f16_sm70.cu b/src/turbomind/kernels/attention/attention_128_f16_sm70.cu
index 0f177b419c..2ed93c7e0f 100644
--- a/src/turbomind/kernels/attention/attention_128_f16_sm70.cu
+++ b/src/turbomind/kernels/attention/attention_128_f16_sm70.cu
@@ -9,4 +9,4 @@ using Kernel = typename attention::AttentionConfig<arch::Sm70, half, half, 1, 12
 
 template void invokeAttention<Kernel>(const typename Kernel::ParamType& params);
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/attention_128_f16_sm80.cu b/src/turbomind/kernels/attention/attention_128_f16_sm80.cu
index 24059ded6f..92ed792a64 100644
--- a/src/turbomind/kernels/attention/attention_128_f16_sm80.cu
+++ b/src/turbomind/kernels/attention/attention_128_f16_sm80.cu
@@ -8,4 +8,4 @@ namespace turbomind {
 using Kernel = typename attention::AttentionConfig<arch::Sm80, half, half, 1, 128>::Kernel;
 template void invokeAttention<Kernel>(const typename Kernel::ParamType& params);
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/attention_config.h b/src/turbomind/kernels/attention/attention_config.h
index 5f1f988f9c..29dc0d1890 100644
--- a/src/turbomind/kernels/attention/attention_config.h
+++ b/src/turbomind/kernels/attention/attention_config.h
@@ -42,4 +42,4 @@ struct AttentionConfig<arch::Sm70, T, Tkv, 1, HeadDim> {
     using Kernel    = AttentionUniversal<Mainloop, int, AttentionCtaMap>;
 };
 
-}  // namespace turbomind::attention
\ No newline at end of file
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/attention_template.h b/src/turbomind/kernels/attention/attention_template.h
index fc8dedb3c6..09f1ab2aec 100644
--- a/src/turbomind/kernels/attention/attention_template.h
+++ b/src/turbomind/kernels/attention/attention_template.h
@@ -80,4 +80,4 @@ void invokeAttention(const typename Kernel::ParamType& params)
     }
 }
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/cta_map.h b/src/turbomind/kernels/attention/cta_map.h
index 2500798427..ae8502503c 100644
--- a/src/turbomind/kernels/attention/cta_map.h
+++ b/src/turbomind/kernels/attention/cta_map.h
@@ -146,4 +146,4 @@ struct ReduceCtaMap {
     }
 };
 
-}  // namespace turbomind::attention
\ No newline at end of file
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/decoding.cu b/src/turbomind/kernels/attention/decoding.cu
index 7107b1b115..5d2f1c0b53 100644
--- a/src/turbomind/kernels/attention/decoding.cu
+++ b/src/turbomind/kernels/attention/decoding.cu
@@ -143,4 +143,4 @@ void dispatchDecoding(const AttentionParams<nv_bfloat16>& params)
 
 template void dispatchDecoding(const AttentionParams<half>& params);
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/decoding.h b/src/turbomind/kernels/attention/decoding.h
index 6c2ae7d223..9886ef9b2c 100644
--- a/src/turbomind/kernels/attention/decoding.h
+++ b/src/turbomind/kernels/attention/decoding.h
@@ -9,4 +9,4 @@ namespace turbomind {
 template<class T>
 void dispatchDecoding(const AttentionParams<T>& params);
 
-}
\ No newline at end of file
+}
diff --git a/src/turbomind/kernels/attention/decoding_128_bf16_sm80.cu b/src/turbomind/kernels/attention/decoding_128_bf16_sm80.cu
index c28fc30b26..70ceb21ba3 100644
--- a/src/turbomind/kernels/attention/decoding_128_bf16_sm80.cu
+++ b/src/turbomind/kernels/attention/decoding_128_bf16_sm80.cu
@@ -25,4 +25,4 @@ template void invokeDecoding<sm80_bf16_s8_g1_d128>(const typename sm80_bf16_s8_g
 using sm80_bf16_s8_g2_d128 = Decoding<arch::Sm80, nv_bfloat16, int8_t, 2, 128>;
 template void invokeDecoding<sm80_bf16_s8_g2_d128>(const typename sm80_bf16_s8_g2_d128::ParamType& params);
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/decoding_128_f16_sm70.cu b/src/turbomind/kernels/attention/decoding_128_f16_sm70.cu
index 9c7819b6d2..268c9450f8 100644
--- a/src/turbomind/kernels/attention/decoding_128_f16_sm70.cu
+++ b/src/turbomind/kernels/attention/decoding_128_f16_sm70.cu
@@ -25,4 +25,4 @@ template void invokeDecoding<sm70_f16_s8_g2_d128>(const typename sm70_f16_s8_g2_
 using sm70_f16_s8_g4_d128 = Decoding<arch::Sm70, half, int8_t, 4, 128>;
 template void invokeDecoding<sm70_f16_s8_g4_d128>(const typename sm70_f16_s8_g4_d128::ParamType& params);
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/decoding_128_f16_sm80.cu b/src/turbomind/kernels/attention/decoding_128_f16_sm80.cu
index 616200fb15..aca02ab302 100644
--- a/src/turbomind/kernels/attention/decoding_128_f16_sm80.cu
+++ b/src/turbomind/kernels/attention/decoding_128_f16_sm80.cu
@@ -25,4 +25,4 @@ template void invokeDecoding<sm80_f16_s8_g1_d128>(const typename sm80_f16_s8_g1_
 using sm80_f16_s8_g2_d128 = Decoding<arch::Sm80, half, int8_t, 2, 128>;
 template void invokeDecoding<sm80_f16_s8_g2_d128>(const typename sm80_f16_s8_g2_d128::ParamType& params);
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/decoding_config.h b/src/turbomind/kernels/attention/decoding_config.h
index 65f309bdf6..3197a71825 100644
--- a/src/turbomind/kernels/attention/decoding_config.h
+++ b/src/turbomind/kernels/attention/decoding_config.h
@@ -85,4 +85,4 @@ struct DecodingConfig<arch::Sm70, T, int8_t, Qh, HeadDim> {
 //     using Kernel    = AttentionUniversal<Mainloop, BlockSeqLen, DecodingCtaMap>;
 // };
 
-}  // namespace turbomind::attention
\ No newline at end of file
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/decoding_simt.h b/src/turbomind/kernels/attention/decoding_simt.h
index 3861363e85..cf19d30a6a 100644
--- a/src/turbomind/kernels/attention/decoding_simt.h
+++ b/src/turbomind/kernels/attention/decoding_simt.h
@@ -559,4 +559,4 @@ struct Impl<Sm70_Simt, T_, Tkv_, CTA_H_, CTA_Q_, CTA_S_, WARP_H_, WARP_Q, WARP_S
     }
 };
 
-}  // namespace turbomind::attention
\ No newline at end of file
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/decoding_sm80.h b/src/turbomind/kernels/attention/decoding_sm80.h
index 0d9e6870b9..85e37ddfde 100644
--- a/src/turbomind/kernels/attention/decoding_sm80.h
+++ b/src/turbomind/kernels/attention/decoding_sm80.h
@@ -581,4 +581,4 @@ struct Impl<Sm80_81616, T_, Tkv_, CTA_H_, CTA_Q_, CTA_S_, WARP_H_, WARP_Q, WARP_
     }
 };
 
-}  // namespace turbomind::attention
\ No newline at end of file
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/decoding_template.h b/src/turbomind/kernels/attention/decoding_template.h
index 8dd6804008..bba3d3e665 100644
--- a/src/turbomind/kernels/attention/decoding_template.h
+++ b/src/turbomind/kernels/attention/decoding_template.h
@@ -77,4 +77,4 @@ void invokeDecoding(const typename Kernel::ParamType& params)
     }
 }
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/impl.h b/src/turbomind/kernels/attention/impl.h
index 31b7154690..69f8640822 100644
--- a/src/turbomind/kernels/attention/impl.h
+++ b/src/turbomind/kernels/attention/impl.h
@@ -39,4 +39,4 @@ struct Impl {};
 
 }  // namespace attention
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/impl_m16n8.h b/src/turbomind/kernels/attention/impl_m16n8.h
index 3b649227f9..33133a7e46 100644
--- a/src/turbomind/kernels/attention/impl_m16n8.h
+++ b/src/turbomind/kernels/attention/impl_m16n8.h
@@ -262,4 +262,4 @@ struct Impl_m16k8 {
     }
 };
 
-}  // namespace turbomind::attention
\ No newline at end of file
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/impl_sm70.h b/src/turbomind/kernels/attention/impl_sm70.h
index d90d088d63..3b62ee9082 100644
--- a/src/turbomind/kernels/attention/impl_sm70.h
+++ b/src/turbomind/kernels/attention/impl_sm70.h
@@ -25,8 +25,8 @@ mma_m8n8k4_row_col(Array<float, 8>& d, const Array<half, 4>& a, const Array<half
         "{%10, %11},"
         "{%12, %13, %14, %15, %16, %17, %18, %19};"
         : "=f"(d[0]), "=f"(d[1]), "=f"(d[2]), "=f"(d[3]), "=f"(d[4]), "=f"(d[5]), "=f"(d[6]), "=f"(d[7])
-        : "r"(A[0]), "r"(A[1]), 
-          "r"(B[0]), "r"(B[1]), 
+        : "r"(A[0]), "r"(A[1]),
+          "r"(B[0]), "r"(B[1]),
           "f"(c[0]), "f"(c[1]), "f"(c[2]), "f"(c[3]), "f"(c[4]), "f"(c[5]), "f"(c[6]), "f"(c[7]));
 // clang-format on
 #endif
@@ -46,8 +46,8 @@ mma_m8n8k4_row_row(Array<float, 8>& d, const Array<half, 4>& a, const Array<half
         "{%10, %11},"
         "{%12, %13, %14, %15, %16, %17, %18, %19};"
         : "=f"(d[0]), "=f"(d[1]), "=f"(d[2]), "=f"(d[3]), "=f"(d[4]), "=f"(d[5]), "=f"(d[6]), "=f"(d[7])
-        : "r"(A[0]), "r"(A[1]), 
-          "r"(B[0]), "r"(B[1]), 
+        : "r"(A[0]), "r"(A[1]),
+          "r"(B[0]), "r"(B[1]),
           "f"(c[0]), "f"(c[1]), "f"(c[2]), "f"(c[3]), "f"(c[4]), "f"(c[5]), "f"(c[6]), "f"(c[7]));
 // clang-format on
 #endif
@@ -552,4 +552,4 @@ struct Impl<Sm70_884, T_, T_, CTA_H_, CTA_Q_, CTA_S_, WARP_H_, WARP_Q, WARP_S, H
     }
 };
 
-}  // namespace turbomind::attention
\ No newline at end of file
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/impl_sm75.h b/src/turbomind/kernels/attention/impl_sm75.h
index b745cec6f7..c6d8b6199b 100644
--- a/src/turbomind/kernels/attention/impl_sm75.h
+++ b/src/turbomind/kernels/attention/impl_sm75.h
@@ -283,4 +283,4 @@ struct Impl<Sm75_1688, T_, T_, CTA_H_, CTA_Q_, CTA_S_, WARP_H, WARP_Q, WARP_S, H
     }
 };
 
-}  // namespace turbomind::attention
\ No newline at end of file
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/impl_sm80.h b/src/turbomind/kernels/attention/impl_sm80.h
index ee2c6f760c..ba9962975f 100644
--- a/src/turbomind/kernels/attention/impl_sm80.h
+++ b/src/turbomind/kernels/attention/impl_sm80.h
@@ -280,4 +280,4 @@ struct Impl<Sm80_16816, T_, T_, CTA_H_, CTA_Q_, CTA_S_, WARP_H, WARP_Q, WARP_S,
     }
 };
 
-}  // namespace turbomind::attention
\ No newline at end of file
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/iterator_sm70.h b/src/turbomind/kernels/attention/iterator_sm70.h
index 1877601567..e05087dcd2 100644
--- a/src/turbomind/kernels/attention/iterator_sm70.h
+++ b/src/turbomind/kernels/attention/iterator_sm70.h
@@ -56,4 +56,4 @@ struct Sm70GmemIterator: BaseGmemIterator<T, Map, SmemLayout> {
     }
 };
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/iterator_sm80.h b/src/turbomind/kernels/attention/iterator_sm80.h
index 93c185efc0..3810357b2b 100644
--- a/src/turbomind/kernels/attention/iterator_sm80.h
+++ b/src/turbomind/kernels/attention/iterator_sm80.h
@@ -90,4 +90,4 @@ struct Sm80GmemIterator: BaseGmemIterator<T, Map, SmemLayout> {
     }
 };
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/kv_cache_utils.cu b/src/turbomind/kernels/attention/kv_cache_utils.cu
index 8f53863224..b5e475c3d7 100644
--- a/src/turbomind/kernels/attention/kv_cache_utils.cu
+++ b/src/turbomind/kernels/attention/kv_cache_utils.cu
@@ -478,4 +478,4 @@ template void invokeFlattenKV(nv_bfloat16* k,
                               cudaStream_t stream);
 #endif
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/kv_cache_utils.h b/src/turbomind/kernels/attention/kv_cache_utils.h
index 7277725967..5558b0525c 100644
--- a/src/turbomind/kernels/attention/kv_cache_utils.h
+++ b/src/turbomind/kernels/attention/kv_cache_utils.h
@@ -103,4 +103,4 @@ void invokeFlattenKV_(const AttentionParams<T>& params, int sum_k_len)
                     params.stream);
 }
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/mainloop.h b/src/turbomind/kernels/attention/mainloop.h
index 713616c8b6..beb6cc4f29 100644
--- a/src/turbomind/kernels/attention/mainloop.h
+++ b/src/turbomind/kernels/attention/mainloop.h
@@ -7,4 +7,4 @@ namespace turbomind::attention {
 template<class Tag, class Attention>
 struct Mainloop {};
 
-}  // namespace turbomind::attention
\ No newline at end of file
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/mainloop_sm70.h b/src/turbomind/kernels/attention/mainloop_sm70.h
index 50f4d86cdd..0da6f7f348 100644
--- a/src/turbomind/kernels/attention/mainloop_sm70.h
+++ b/src/turbomind/kernels/attention/mainloop_sm70.h
@@ -139,4 +139,4 @@ struct Mainloop<arch::Sm70, Impl_> {
     }
 };
 
-}  // namespace turbomind::attention
\ No newline at end of file
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/mainloop_sm80.h b/src/turbomind/kernels/attention/mainloop_sm80.h
index 1fa936d1cb..b84080d185 100644
--- a/src/turbomind/kernels/attention/mainloop_sm80.h
+++ b/src/turbomind/kernels/attention/mainloop_sm80.h
@@ -317,7 +317,7 @@ struct Mainloop<Sm80_CpAsync<Stages>, Impl_> {
     // Load      : K0,K1 | V0,K2,V1,K3 ...
     // Compute   :    K0 | K1,V0,K2,V1 ...
     // Conclusion:
-    // - more reigster consumption (209 -> 250)
+    // - more register consumption (209 -> 250)
     // - more interleaved HMMA and FMA
     // - slight performance gain
     template<class GmemIterK, class GmemIterV, class BlockIter, class StoreS>
@@ -457,4 +457,4 @@ struct Mainloop<Sm80_CpAsync<Stages>, Impl_> {
     }
 };
 
-}  // namespace turbomind::attention
\ No newline at end of file
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/quantization.h b/src/turbomind/kernels/attention/quantization.h
index 2e1197686f..9c89130e9a 100644
--- a/src/turbomind/kernels/attention/quantization.h
+++ b/src/turbomind/kernels/attention/quantization.h
@@ -391,4 +391,4 @@ __device__ void permute_V(Array<uint8_t, Map::kAccessC> (&x)[Map::kIterS][Map::k
     }
 }
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/reduce.cu b/src/turbomind/kernels/attention/reduce.cu
index e20e399ed4..9dd9c71e78 100644
--- a/src/turbomind/kernels/attention/reduce.cu
+++ b/src/turbomind/kernels/attention/reduce.cu
@@ -20,4 +20,4 @@ void dispatchReduce(T*           out,
 {
 }
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/reduce.h b/src/turbomind/kernels/attention/reduce.h
index 1f807aa189..b6d5be966b 100644
--- a/src/turbomind/kernels/attention/reduce.h
+++ b/src/turbomind/kernels/attention/reduce.h
@@ -206,4 +206,4 @@ struct Reduce {
     }
 };
 
-}  // namespace turbomind::attention
\ No newline at end of file
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/reduce_template.h b/src/turbomind/kernels/attention/reduce_template.h
index 7ac134f0f4..69d91e2dda 100644
--- a/src/turbomind/kernels/attention/reduce_template.h
+++ b/src/turbomind/kernels/attention/reduce_template.h
@@ -99,4 +99,4 @@ void dispatchReduce(T*           out,
     invoke(std::true_type{}, stride_k);
 }
 
-}  // namespace turbomind::attention
\ No newline at end of file
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/reference.cu b/src/turbomind/kernels/attention/reference.cu
index 3dfee23e23..d06e0f37a5 100644
--- a/src/turbomind/kernels/attention/reference.cu
+++ b/src/turbomind/kernels/attention/reference.cu
@@ -331,4 +331,4 @@ template class Reference<half>;
 template class Reference<nv_bfloat16>;
 #endif
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/reference.h b/src/turbomind/kernels/attention/reference.h
index 05a394909c..900e6c675b 100644
--- a/src/turbomind/kernels/attention/reference.h
+++ b/src/turbomind/kernels/attention/reference.h
@@ -67,4 +67,4 @@ class Reference {
     int batch_size_{};
 };
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/smem_layout.h b/src/turbomind/kernels/attention/smem_layout.h
index 5134c8c323..9407471b22 100644
--- a/src/turbomind/kernels/attention/smem_layout.h
+++ b/src/turbomind/kernels/attention/smem_layout.h
@@ -127,4 +127,4 @@ struct SmemAccessor {
     // }
 };
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/test_attention.cu b/src/turbomind/kernels/attention/test_attention.cu
index 94bf785923..7f8b30cf0d 100644
--- a/src/turbomind/kernels/attention/test_attention.cu
+++ b/src/turbomind/kernels/attention/test_attention.cu
@@ -522,4 +522,4 @@ int main(int argc, char* argv[])
     // test_attention<half>();
 
     test_attention<nv_bfloat16>();
-}
\ No newline at end of file
+}
diff --git a/src/turbomind/kernels/attention/utils.cc b/src/turbomind/kernels/attention/utils.cc
index feed1e22a8..aabbecccbd 100644
--- a/src/turbomind/kernels/attention/utils.cc
+++ b/src/turbomind/kernels/attention/utils.cc
@@ -45,4 +45,4 @@ int GetSplitCount(
     return std::get<int>(best);
 }
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/utils.h b/src/turbomind/kernels/attention/utils.h
index 1681d28fd6..e43a78598c 100644
--- a/src/turbomind/kernels/attention/utils.h
+++ b/src/turbomind/kernels/attention/utils.h
@@ -10,4 +10,4 @@ int GetSplitCount(int   max_split_cnt,
                   float alpha = 1,
                   float beta  = 1e-3);
 
-}
\ No newline at end of file
+}
diff --git a/src/turbomind/kernels/flash_attention/CMakeLists.txt b/src/turbomind/kernels/flash_attention/CMakeLists.txt
index 85749aa0b2..e65ffbe0d5 100644
--- a/src/turbomind/kernels/flash_attention/CMakeLists.txt
+++ b/src/turbomind/kernels/flash_attention/CMakeLists.txt
@@ -10,4 +10,4 @@ target_link_libraries(flash_attention PRIVATE llama_fmha)
 if (NOT MSVC)
     add_subdirectory(flash_attention2)
     target_link_libraries(flash_attention PRIVATE flash_attention2)
-endif()
\ No newline at end of file
+endif()
diff --git a/src/turbomind/kernels/flash_attention/flash_attention.cu b/src/turbomind/kernels/flash_attention/flash_attention.cu
index 49d70a872c..cbe0bb8bc1 100644
--- a/src/turbomind/kernels/flash_attention/flash_attention.cu
+++ b/src/turbomind/kernels/flash_attention/flash_attention.cu
@@ -66,4 +66,4 @@ template class FlashAttentionOp<half>;
 template class FlashAttentionOp<__nv_bfloat16>;
 #endif
 
-}  // namespace turbomind
\ No newline at end of file
+}  // namespace turbomind