fix lint

InternLM · Sep 18, 2023 · a2d96b0 · a2d96b0
1 parent c1ec6e3
commit a2d96b0
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 8 deletions.
diff --git a/...ernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_template.cuh b/...ernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_template.cuh
@@ -1422,8 +1422,8 @@ __global__ void masked_multihead_attention_kernel(Multihead_attention_params<T>
             // Trigger the stores to global memory.
             if (Dh == Dh_MAX || co < Dh / QK_ELTS_IN_16B) {
 
-                size_t offset = params.kv_cache_per_sample_offset + kvhi * params.memory_max_len * Dh + tlength_circ * Dh
-                             + co * QK_ELTS_IN_16B + ci;
+                size_t offset = params.kv_cache_per_sample_offset + kvhi * params.memory_max_len * Dh
+                                + tlength_circ * Dh + co * QK_ELTS_IN_16B + ci;
 
                 if (!QUANT_POLICY) {
                     *reinterpret_cast<Qk_vec_m*>(&params.k_cache_per_sample[bi][offset]) =

diff --git a/src/turbomind/models/llama/llama_kernels.h b/src/turbomind/models/llama/llama_kernels.h
@@ -80,12 +80,12 @@ void invokeMyCopyInt(int* dst, const int* src, size_t count, cudaStream_t st);
 
 template<typename T>
 struct BaseAttentionLayout {
-    int  stride_batch;
-    int  stride_seq;
-    int  stride_head;
-    bool use_seqlens       = false;
-    size_t  batch_seqs_offset = 0;
-    T**  batch_seqs        = nullptr;
+    int    stride_batch;
+    int    stride_seq;
+    int    stride_head;
+    bool   use_seqlens       = false;
+    size_t batch_seqs_offset = 0;
+    T**    batch_seqs        = nullptr;
 };
 
 template<typename T>