fix lint

InternLM · Feb 28, 2024 · 1fde87a · 1fde87a
1 parent f64a19f
commit 1fde87a
Show file tree

Hide file tree

Showing 42 changed files with 49 additions and 49 deletions.
diff --git a/src/turbomind/kernels/attention/CMakeLists.txt b/src/turbomind/kernels/attention/CMakeLists.txt
@@ -1,8 +1,8 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 
-add_library(attention STATIC 
-            attention.cu 
-            decoding.cu 
+add_library(attention STATIC
+            attention.cu
+            decoding.cu
             kv_cache_utils.cu
             utils.cc
             attention_128_f16_sm80.cu

diff --git a/src/turbomind/kernels/attention/arch.h b/src/turbomind/kernels/attention/arch.h
@@ -16,4 +16,4 @@ struct Sm80 {
     static constexpr int value = 80;
 };
 
-}  // namespace turbomind::arch
+}  // namespace turbomind::arch
diff --git a/src/turbomind/kernels/attention/attention.cu b/src/turbomind/kernels/attention/attention.cu
@@ -46,4 +46,4 @@ template void dispatchAttention(const AttentionParams<half>& params);
 // template void dispatchAttention(const AttentionParams<nv_bfloat16>& params);
 // #endif
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/attention_128_bf16_sm80.cu b/src/turbomind/kernels/attention/attention_128_bf16_sm80.cu
@@ -8,4 +8,4 @@ namespace turbomind {
 using Kernel = typename attention::AttentionConfig<arch::Sm80, nv_bfloat16, nv_bfloat16, 1, 128>::Kernel;
 template void invokeAttention<Kernel>(const typename Kernel::ParamType& params);
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/attention_128_f16_sm70.cu b/src/turbomind/kernels/attention/attention_128_f16_sm70.cu
@@ -9,4 +9,4 @@ using Kernel = typename attention::AttentionConfig<arch::Sm70, half, half, 1, 12
 
 template void invokeAttention<Kernel>(const typename Kernel::ParamType& params);
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/attention_128_f16_sm80.cu b/src/turbomind/kernels/attention/attention_128_f16_sm80.cu
@@ -8,4 +8,4 @@ namespace turbomind {
 using Kernel = typename attention::AttentionConfig<arch::Sm80, half, half, 1, 128>::Kernel;
 template void invokeAttention<Kernel>(const typename Kernel::ParamType& params);
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/attention_config.h b/src/turbomind/kernels/attention/attention_config.h
@@ -42,4 +42,4 @@ struct AttentionConfig<arch::Sm70, T, Tkv, 1, HeadDim> {
     using Kernel    = AttentionUniversal<Mainloop, int, AttentionCtaMap>;
 };
 
-}  // namespace turbomind::attention
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/attention_template.h b/src/turbomind/kernels/attention/attention_template.h
@@ -80,4 +80,4 @@ void invokeAttention(const typename Kernel::ParamType& params)
     }
 }
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/cta_map.h b/src/turbomind/kernels/attention/cta_map.h
@@ -146,4 +146,4 @@ struct ReduceCtaMap {
     }
 };
 
-}  // namespace turbomind::attention
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/decoding.cu b/src/turbomind/kernels/attention/decoding.cu
@@ -143,4 +143,4 @@ void dispatchDecoding(const AttentionParams<nv_bfloat16>& params)
 
 template void dispatchDecoding(const AttentionParams<half>& params);
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/decoding.h b/src/turbomind/kernels/attention/decoding.h
@@ -9,4 +9,4 @@ namespace turbomind {
 template<class T>
 void dispatchDecoding(const AttentionParams<T>& params);
 
-}
+}
diff --git a/src/turbomind/kernels/attention/decoding_128_bf16_sm80.cu b/src/turbomind/kernels/attention/decoding_128_bf16_sm80.cu
@@ -25,4 +25,4 @@ template void invokeDecoding<sm80_bf16_s8_g1_d128>(const typename sm80_bf16_s8_g
 using sm80_bf16_s8_g2_d128 = Decoding<arch::Sm80, nv_bfloat16, int8_t, 2, 128>;
 template void invokeDecoding<sm80_bf16_s8_g2_d128>(const typename sm80_bf16_s8_g2_d128::ParamType& params);
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/decoding_128_f16_sm70.cu b/src/turbomind/kernels/attention/decoding_128_f16_sm70.cu
@@ -25,4 +25,4 @@ template void invokeDecoding<sm70_f16_s8_g2_d128>(const typename sm70_f16_s8_g2_
 using sm70_f16_s8_g4_d128 = Decoding<arch::Sm70, half, int8_t, 4, 128>;
 template void invokeDecoding<sm70_f16_s8_g4_d128>(const typename sm70_f16_s8_g4_d128::ParamType& params);
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/decoding_128_f16_sm80.cu b/src/turbomind/kernels/attention/decoding_128_f16_sm80.cu
@@ -25,4 +25,4 @@ template void invokeDecoding<sm80_f16_s8_g1_d128>(const typename sm80_f16_s8_g1_
 using sm80_f16_s8_g2_d128 = Decoding<arch::Sm80, half, int8_t, 2, 128>;
 template void invokeDecoding<sm80_f16_s8_g2_d128>(const typename sm80_f16_s8_g2_d128::ParamType& params);
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/decoding_config.h b/src/turbomind/kernels/attention/decoding_config.h
@@ -85,4 +85,4 @@ struct DecodingConfig<arch::Sm70, T, int8_t, Qh, HeadDim> {
 //     using Kernel    = AttentionUniversal<Mainloop, BlockSeqLen, DecodingCtaMap>;
 // };
 
-}  // namespace turbomind::attention
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/decoding_simt.h b/src/turbomind/kernels/attention/decoding_simt.h
@@ -559,4 +559,4 @@ struct Impl<Sm70_Simt, T_, Tkv_, CTA_H_, CTA_Q_, CTA_S_, WARP_H_, WARP_Q, WARP_S
     }
 };
 
-}  // namespace turbomind::attention
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/decoding_sm80.h b/src/turbomind/kernels/attention/decoding_sm80.h
@@ -581,4 +581,4 @@ struct Impl<Sm80_81616, T_, Tkv_, CTA_H_, CTA_Q_, CTA_S_, WARP_H_, WARP_Q, WARP_
     }
 };
 
-}  // namespace turbomind::attention
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/decoding_template.h b/src/turbomind/kernels/attention/decoding_template.h
@@ -77,4 +77,4 @@ void invokeDecoding(const typename Kernel::ParamType& params)
     }
 }
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/impl.h b/src/turbomind/kernels/attention/impl.h
@@ -39,4 +39,4 @@ struct Impl {};
 
 }  // namespace attention
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/impl_m16n8.h b/src/turbomind/kernels/attention/impl_m16n8.h
@@ -262,4 +262,4 @@ struct Impl_m16k8 {
     }
 };
 
-}  // namespace turbomind::attention
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/impl_sm70.h b/src/turbomind/kernels/attention/impl_sm70.h
@@ -25,8 +25,8 @@ mma_m8n8k4_row_col(Array<float, 8>& d, const Array<half, 4>& a, const Array<half
         "{%10, %11},"
         "{%12, %13, %14, %15, %16, %17, %18, %19};"
         : "=f"(d[0]), "=f"(d[1]), "=f"(d[2]), "=f"(d[3]), "=f"(d[4]), "=f"(d[5]), "=f"(d[6]), "=f"(d[7])
-        : "r"(A[0]), "r"(A[1]), 
-          "r"(B[0]), "r"(B[1]), 
+        : "r"(A[0]), "r"(A[1]),
+          "r"(B[0]), "r"(B[1]),
           "f"(c[0]), "f"(c[1]), "f"(c[2]), "f"(c[3]), "f"(c[4]), "f"(c[5]), "f"(c[6]), "f"(c[7]));
 // clang-format on
 #endif
@@ -46,8 +46,8 @@ mma_m8n8k4_row_row(Array<float, 8>& d, const Array<half, 4>& a, const Array<half
         "{%10, %11},"
         "{%12, %13, %14, %15, %16, %17, %18, %19};"
         : "=f"(d[0]), "=f"(d[1]), "=f"(d[2]), "=f"(d[3]), "=f"(d[4]), "=f"(d[5]), "=f"(d[6]), "=f"(d[7])
-        : "r"(A[0]), "r"(A[1]), 
-          "r"(B[0]), "r"(B[1]), 
+        : "r"(A[0]), "r"(A[1]),
+          "r"(B[0]), "r"(B[1]),
           "f"(c[0]), "f"(c[1]), "f"(c[2]), "f"(c[3]), "f"(c[4]), "f"(c[5]), "f"(c[6]), "f"(c[7]));
 // clang-format on
 #endif
@@ -552,4 +552,4 @@ struct Impl<Sm70_884, T_, T_, CTA_H_, CTA_Q_, CTA_S_, WARP_H_, WARP_Q, WARP_S, H
     }
 };
 
-}  // namespace turbomind::attention
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/impl_sm75.h b/src/turbomind/kernels/attention/impl_sm75.h
@@ -283,4 +283,4 @@ struct Impl<Sm75_1688, T_, T_, CTA_H_, CTA_Q_, CTA_S_, WARP_H, WARP_Q, WARP_S, H
     }
 };
 
-}  // namespace turbomind::attention
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/impl_sm80.h b/src/turbomind/kernels/attention/impl_sm80.h
@@ -280,4 +280,4 @@ struct Impl<Sm80_16816, T_, T_, CTA_H_, CTA_Q_, CTA_S_, WARP_H, WARP_Q, WARP_S,
     }
 };
 
-}  // namespace turbomind::attention
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/iterator_sm70.h b/src/turbomind/kernels/attention/iterator_sm70.h
@@ -56,4 +56,4 @@ struct Sm70GmemIterator: BaseGmemIterator<T, Map, SmemLayout> {
     }
 };
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/iterator_sm80.h b/src/turbomind/kernels/attention/iterator_sm80.h
@@ -90,4 +90,4 @@ struct Sm80GmemIterator: BaseGmemIterator<T, Map, SmemLayout> {
     }
 };
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/kv_cache_utils.cu b/src/turbomind/kernels/attention/kv_cache_utils.cu
@@ -478,4 +478,4 @@ template void invokeFlattenKV(nv_bfloat16* k,
                               cudaStream_t stream);
 #endif
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/kv_cache_utils.h b/src/turbomind/kernels/attention/kv_cache_utils.h
@@ -103,4 +103,4 @@ void invokeFlattenKV_(const AttentionParams<T>& params, int sum_k_len)
                     params.stream);
 }
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/mainloop.h b/src/turbomind/kernels/attention/mainloop.h
@@ -7,4 +7,4 @@ namespace turbomind::attention {
 template<class Tag, class Attention>
 struct Mainloop {};
 
-}  // namespace turbomind::attention
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/mainloop_sm70.h b/src/turbomind/kernels/attention/mainloop_sm70.h
@@ -139,4 +139,4 @@ struct Mainloop<arch::Sm70, Impl_> {
     }
 };
 
-}  // namespace turbomind::attention
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/mainloop_sm80.h b/src/turbomind/kernels/attention/mainloop_sm80.h
@@ -317,7 +317,7 @@ struct Mainloop<Sm80_CpAsync<Stages>, Impl_> {
     // Load      : K0,K1 | V0,K2,V1,K3 ...
     // Compute   :    K0 | K1,V0,K2,V1 ...
     // Conclusion:
-    // - more reigster consumption (209 -> 250)
+    // - more register consumption (209 -> 250)
     // - more interleaved HMMA and FMA
     // - slight performance gain
     template<class GmemIterK, class GmemIterV, class BlockIter, class StoreS>
@@ -457,4 +457,4 @@ struct Mainloop<Sm80_CpAsync<Stages>, Impl_> {
     }
 };
 
-}  // namespace turbomind::attention
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/quantization.h b/src/turbomind/kernels/attention/quantization.h
@@ -391,4 +391,4 @@ __device__ void permute_V(Array<uint8_t, Map::kAccessC> (&x)[Map::kIterS][Map::k
     }
 }
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/reduce.cu b/src/turbomind/kernels/attention/reduce.cu
@@ -20,4 +20,4 @@ void dispatchReduce(T*           out,
 {
 }
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/reduce.h b/src/turbomind/kernels/attention/reduce.h
@@ -206,4 +206,4 @@ struct Reduce {
     }
 };
 
-}  // namespace turbomind::attention
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/reduce_template.h b/src/turbomind/kernels/attention/reduce_template.h
@@ -99,4 +99,4 @@ void dispatchReduce(T*           out,
     invoke(std::true_type{}, stride_k);
 }
 
-}  // namespace turbomind::attention
+}  // namespace turbomind::attention
diff --git a/src/turbomind/kernels/attention/reference.cu b/src/turbomind/kernels/attention/reference.cu
@@ -331,4 +331,4 @@ template class Reference<half>;
 template class Reference<nv_bfloat16>;
 #endif
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/reference.h b/src/turbomind/kernels/attention/reference.h
@@ -67,4 +67,4 @@ class Reference {
     int batch_size_{};
 };
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/smem_layout.h b/src/turbomind/kernels/attention/smem_layout.h
@@ -127,4 +127,4 @@ struct SmemAccessor {
     // }
 };
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/test_attention.cu b/src/turbomind/kernels/attention/test_attention.cu
@@ -522,4 +522,4 @@ int main(int argc, char* argv[])
     // test_attention<half>();
 
     test_attention<nv_bfloat16>();
-}
+}
diff --git a/src/turbomind/kernels/attention/utils.cc b/src/turbomind/kernels/attention/utils.cc
@@ -45,4 +45,4 @@ int GetSplitCount(
     return std::get<int>(best);
 }
 
-}  // namespace turbomind
+}  // namespace turbomind
diff --git a/src/turbomind/kernels/attention/utils.h b/src/turbomind/kernels/attention/utils.h
@@ -10,4 +10,4 @@ int GetSplitCount(int   max_split_cnt,
                   float alpha = 1,
                   float beta  = 1e-3);
 
-}
+}
diff --git a/src/turbomind/kernels/flash_attention/CMakeLists.txt b/src/turbomind/kernels/flash_attention/CMakeLists.txt
@@ -10,4 +10,4 @@ target_link_libraries(flash_attention PRIVATE llama_fmha)
 if (NOT MSVC)
     add_subdirectory(flash_attention2)
     target_link_libraries(flash_attention PRIVATE flash_attention2)
-endif()
+endif()
diff --git a/src/turbomind/kernels/flash_attention/flash_attention.cu b/src/turbomind/kernels/flash_attention/flash_attention.cu
@@ -66,4 +66,4 @@ template class FlashAttentionOp<half>;
 template class FlashAttentionOp<__nv_bfloat16>;
 #endif
 
-}  // namespace turbomind
+}  // namespace turbomind
Original file line number	Diff line number	Diff line change
Expand Up		@@ -9,4 +9,4 @@ using Kernel = typename attention::AttentionConfig<arch::Sm70, half, half, 1, 12

		template void invokeAttention<Kernel>(const typename Kernel::ParamType& params);

		} // namespace turbomind
		} // namespace turbomind
-Original file line number
+Diff line change
@@ Expand Up @@
         }
     }
-    }  // namespace turbomind
+    }  // namespace turbomind
Original file line number	Diff line number	Diff line change
Expand Up		@@ -143,4 +143,4 @@ void dispatchDecoding(const AttentionParams<nv_bfloat16>& params)

		template void dispatchDecoding(const AttentionParams<half>& params);

		} // namespace turbomind
		} // namespace turbomind
Original file line number	Diff line number	Diff line change
Expand Up		@@ -39,4 +39,4 @@ struct Impl {};

		} // namespace attention

		} // namespace turbomind
		} // namespace turbomind