Skip to content

Commit

Permalink
fix lint
Browse files Browse the repository at this point in the history
  • Loading branch information
lzhangzz committed Feb 28, 2024
1 parent f64a19f commit 1fde87a
Show file tree
Hide file tree
Showing 42 changed files with 49 additions and 49 deletions.
6 changes: 3 additions & 3 deletions src/turbomind/kernels/attention/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Copyright (c) OpenMMLab. All rights reserved.

add_library(attention STATIC
attention.cu
decoding.cu
add_library(attention STATIC
attention.cu
decoding.cu
kv_cache_utils.cu
utils.cc
attention_128_f16_sm80.cu
Expand Down
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ struct Sm80 {
static constexpr int value = 80;
};

} // namespace turbomind::arch
} // namespace turbomind::arch
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/attention.cu
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@ template void dispatchAttention(const AttentionParams<half>& params);
// template void dispatchAttention(const AttentionParams<nv_bfloat16>& params);
// #endif

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/attention_128_bf16_sm80.cu
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ namespace turbomind {
using Kernel = typename attention::AttentionConfig<arch::Sm80, nv_bfloat16, nv_bfloat16, 1, 128>::Kernel;
template void invokeAttention<Kernel>(const typename Kernel::ParamType& params);

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/attention_128_f16_sm70.cu
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ using Kernel = typename attention::AttentionConfig<arch::Sm70, half, half, 1, 12

template void invokeAttention<Kernel>(const typename Kernel::ParamType& params);

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/attention_128_f16_sm80.cu
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ namespace turbomind {
using Kernel = typename attention::AttentionConfig<arch::Sm80, half, half, 1, 128>::Kernel;
template void invokeAttention<Kernel>(const typename Kernel::ParamType& params);

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/attention_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,4 @@ struct AttentionConfig<arch::Sm70, T, Tkv, 1, HeadDim> {
using Kernel = AttentionUniversal<Mainloop, int, AttentionCtaMap>;
};

} // namespace turbomind::attention
} // namespace turbomind::attention
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/attention_template.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,4 @@ void invokeAttention(const typename Kernel::ParamType& params)
}
}

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/cta_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,4 @@ struct ReduceCtaMap {
}
};

} // namespace turbomind::attention
} // namespace turbomind::attention
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/decoding.cu
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,4 @@ void dispatchDecoding(const AttentionParams<nv_bfloat16>& params)

template void dispatchDecoding(const AttentionParams<half>& params);

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/decoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ namespace turbomind {
template<class T>
void dispatchDecoding(const AttentionParams<T>& params);

}
}
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/decoding_128_bf16_sm80.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ template void invokeDecoding<sm80_bf16_s8_g1_d128>(const typename sm80_bf16_s8_g
using sm80_bf16_s8_g2_d128 = Decoding<arch::Sm80, nv_bfloat16, int8_t, 2, 128>;
template void invokeDecoding<sm80_bf16_s8_g2_d128>(const typename sm80_bf16_s8_g2_d128::ParamType& params);

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/decoding_128_f16_sm70.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ template void invokeDecoding<sm70_f16_s8_g2_d128>(const typename sm70_f16_s8_g2_
using sm70_f16_s8_g4_d128 = Decoding<arch::Sm70, half, int8_t, 4, 128>;
template void invokeDecoding<sm70_f16_s8_g4_d128>(const typename sm70_f16_s8_g4_d128::ParamType& params);

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/decoding_128_f16_sm80.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ template void invokeDecoding<sm80_f16_s8_g1_d128>(const typename sm80_f16_s8_g1_
using sm80_f16_s8_g2_d128 = Decoding<arch::Sm80, half, int8_t, 2, 128>;
template void invokeDecoding<sm80_f16_s8_g2_d128>(const typename sm80_f16_s8_g2_d128::ParamType& params);

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/decoding_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,4 @@ struct DecodingConfig<arch::Sm70, T, int8_t, Qh, HeadDim> {
// using Kernel = AttentionUniversal<Mainloop, BlockSeqLen, DecodingCtaMap>;
// };

} // namespace turbomind::attention
} // namespace turbomind::attention
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/decoding_simt.h
Original file line number Diff line number Diff line change
Expand Up @@ -559,4 +559,4 @@ struct Impl<Sm70_Simt, T_, Tkv_, CTA_H_, CTA_Q_, CTA_S_, WARP_H_, WARP_Q, WARP_S
}
};

} // namespace turbomind::attention
} // namespace turbomind::attention
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/decoding_sm80.h
Original file line number Diff line number Diff line change
Expand Up @@ -581,4 +581,4 @@ struct Impl<Sm80_81616, T_, Tkv_, CTA_H_, CTA_Q_, CTA_S_, WARP_H_, WARP_Q, WARP_
}
};

} // namespace turbomind::attention
} // namespace turbomind::attention
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/decoding_template.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,4 @@ void invokeDecoding(const typename Kernel::ParamType& params)
}
}

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@ struct Impl {};

} // namespace attention

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/impl_m16n8.h
Original file line number Diff line number Diff line change
Expand Up @@ -262,4 +262,4 @@ struct Impl_m16k8 {
}
};

} // namespace turbomind::attention
} // namespace turbomind::attention
10 changes: 5 additions & 5 deletions src/turbomind/kernels/attention/impl_sm70.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ mma_m8n8k4_row_col(Array<float, 8>& d, const Array<half, 4>& a, const Array<half
"{%10, %11},"
"{%12, %13, %14, %15, %16, %17, %18, %19};"
: "=f"(d[0]), "=f"(d[1]), "=f"(d[2]), "=f"(d[3]), "=f"(d[4]), "=f"(d[5]), "=f"(d[6]), "=f"(d[7])
: "r"(A[0]), "r"(A[1]),
"r"(B[0]), "r"(B[1]),
: "r"(A[0]), "r"(A[1]),
"r"(B[0]), "r"(B[1]),
"f"(c[0]), "f"(c[1]), "f"(c[2]), "f"(c[3]), "f"(c[4]), "f"(c[5]), "f"(c[6]), "f"(c[7]));
// clang-format on
#endif
Expand All @@ -46,8 +46,8 @@ mma_m8n8k4_row_row(Array<float, 8>& d, const Array<half, 4>& a, const Array<half
"{%10, %11},"
"{%12, %13, %14, %15, %16, %17, %18, %19};"
: "=f"(d[0]), "=f"(d[1]), "=f"(d[2]), "=f"(d[3]), "=f"(d[4]), "=f"(d[5]), "=f"(d[6]), "=f"(d[7])
: "r"(A[0]), "r"(A[1]),
"r"(B[0]), "r"(B[1]),
: "r"(A[0]), "r"(A[1]),
"r"(B[0]), "r"(B[1]),
"f"(c[0]), "f"(c[1]), "f"(c[2]), "f"(c[3]), "f"(c[4]), "f"(c[5]), "f"(c[6]), "f"(c[7]));
// clang-format on
#endif
Expand Down Expand Up @@ -552,4 +552,4 @@ struct Impl<Sm70_884, T_, T_, CTA_H_, CTA_Q_, CTA_S_, WARP_H_, WARP_Q, WARP_S, H
}
};

} // namespace turbomind::attention
} // namespace turbomind::attention
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/impl_sm75.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,4 +283,4 @@ struct Impl<Sm75_1688, T_, T_, CTA_H_, CTA_Q_, CTA_S_, WARP_H, WARP_Q, WARP_S, H
}
};

} // namespace turbomind::attention
} // namespace turbomind::attention
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/impl_sm80.h
Original file line number Diff line number Diff line change
Expand Up @@ -280,4 +280,4 @@ struct Impl<Sm80_16816, T_, T_, CTA_H_, CTA_Q_, CTA_S_, WARP_H, WARP_Q, WARP_S,
}
};

} // namespace turbomind::attention
} // namespace turbomind::attention
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/iterator_sm70.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,4 @@ struct Sm70GmemIterator: BaseGmemIterator<T, Map, SmemLayout> {
}
};

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/iterator_sm80.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,4 @@ struct Sm80GmemIterator: BaseGmemIterator<T, Map, SmemLayout> {
}
};

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/kv_cache_utils.cu
Original file line number Diff line number Diff line change
Expand Up @@ -478,4 +478,4 @@ template void invokeFlattenKV(nv_bfloat16* k,
cudaStream_t stream);
#endif

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/kv_cache_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,4 @@ void invokeFlattenKV_(const AttentionParams<T>& params, int sum_k_len)
params.stream);
}

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/mainloop.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ namespace turbomind::attention {
template<class Tag, class Attention>
struct Mainloop {};

} // namespace turbomind::attention
} // namespace turbomind::attention
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/mainloop_sm70.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,4 @@ struct Mainloop<arch::Sm70, Impl_> {
}
};

} // namespace turbomind::attention
} // namespace turbomind::attention
4 changes: 2 additions & 2 deletions src/turbomind/kernels/attention/mainloop_sm80.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ struct Mainloop<Sm80_CpAsync<Stages>, Impl_> {
// Load : K0,K1 | V0,K2,V1,K3 ...
// Compute : K0 | K1,V0,K2,V1 ...
// Conclusion:
// - more reigster consumption (209 -> 250)
// - more register consumption (209 -> 250)
// - more interleaved HMMA and FMA
// - slight performance gain
template<class GmemIterK, class GmemIterV, class BlockIter, class StoreS>
Expand Down Expand Up @@ -457,4 +457,4 @@ struct Mainloop<Sm80_CpAsync<Stages>, Impl_> {
}
};

} // namespace turbomind::attention
} // namespace turbomind::attention
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/quantization.h
Original file line number Diff line number Diff line change
Expand Up @@ -391,4 +391,4 @@ __device__ void permute_V(Array<uint8_t, Map::kAccessC> (&x)[Map::kIterS][Map::k
}
}

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/reduce.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ void dispatchReduce(T* out,
{
}

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,4 +206,4 @@ struct Reduce {
}
};

} // namespace turbomind::attention
} // namespace turbomind::attention
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/reduce_template.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,4 @@ void dispatchReduce(T* out,
invoke(std::true_type{}, stride_k);
}

} // namespace turbomind::attention
} // namespace turbomind::attention
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/reference.cu
Original file line number Diff line number Diff line change
Expand Up @@ -331,4 +331,4 @@ template class Reference<half>;
template class Reference<nv_bfloat16>;
#endif

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/reference.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,4 @@ class Reference {
int batch_size_{};
};

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/smem_layout.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,4 @@ struct SmemAccessor {
// }
};

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/test_attention.cu
Original file line number Diff line number Diff line change
Expand Up @@ -522,4 +522,4 @@ int main(int argc, char* argv[])
// test_attention<half>();

test_attention<nv_bfloat16>();
}
}
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ int GetSplitCount(
return std::get<int>(best);
}

} // namespace turbomind
} // namespace turbomind
2 changes: 1 addition & 1 deletion src/turbomind/kernels/attention/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ int GetSplitCount(int max_split_cnt,
float alpha = 1,
float beta = 1e-3);

}
}
2 changes: 1 addition & 1 deletion src/turbomind/kernels/flash_attention/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ target_link_libraries(flash_attention PRIVATE llama_fmha)
if (NOT MSVC)
add_subdirectory(flash_attention2)
target_link_libraries(flash_attention PRIVATE flash_attention2)
endif()
endif()
2 changes: 1 addition & 1 deletion src/turbomind/kernels/flash_attention/flash_attention.cu
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,4 @@ template class FlashAttentionOp<half>;
template class FlashAttentionOp<__nv_bfloat16>;
#endif

} // namespace turbomind
} // namespace turbomind

0 comments on commit 1fde87a

Please sign in to comment.