PaddlePaddle · zeroRains · Oct 10, 2025 · Oct 11, 2025 · Oct 13, 2025 · Oct 14, 2025
diff --git a/.clang-format b/.clang-format
@@ -16,7 +16,7 @@
 ---
 Language:        Cpp
 BasedOnStyle:  Google
-IndentWidth:     4
+IndentWidth:     2
 TabWidth:        2
 ContinuationIndentWidth: 4
 AccessModifierOffset: -1  # The private/protected/public has no indent in class

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -34,7 +34,15 @@ repos:
 #   - id: codespell
 #     additional_dependencies: ['tomli']
 #     args: ['--toml', 'pyproject.toml']
-
+# For C++ files
+- repo: local
+  hooks:
+    - id: clang-format
+      name: clang-format
+      description: Format files with ClangFormat.
+      entry: bash ./scripts/clang_format.sh -i
+      language: system
+      files: \.(c|cc|cxx|cpp|cu|h|cuh|hpp|hxx|xpu|kps)$
 # markdown
 - repo: https://github.com/jackdewinter/pymarkdown
   rev: v0.9.29

diff --git a/custom_ops/cpu_ops/avx_weight_only_fake.cc b/custom_ops/cpu_ops/avx_weight_only_fake.cc
@@ -19,28 +19,28 @@ std::vector<paddle::Tensor> InvokeAvxWeightOnly(const paddle::Tensor &x,
                                                 const paddle::Tensor &w_bias,
                                                 const std::string &alog,
                                                 bool trans) {
-    auto out_shape = x.shape();
-    out_shape[out_shape.size() - 1] = weight.shape()[1];
-    auto out = paddle::empty(out_shape, x.dtype(), paddle::CPUPlace());
-    return {out};
+  auto out_shape = x.shape();
+  out_shape[out_shape.size() - 1] = weight.shape()[1];
+  auto out = paddle::empty(out_shape, x.dtype(), paddle::CPUPlace());
+  return {out};
 }
 
 std::vector<std::vector<int64_t>> AvxWeightOnlyInferShape(
     std::vector<int64_t> x_shape,
     std::vector<int64_t> weigh_shape,
     std::vector<int64_t> weigh_bias_shape) {
-    int m = 1;
-    for (int i = 0; i < x_shape.size() - 1; i++) {
-        m = m * x_shape[i];
-    }
-    return {std::vector<int64_t>{m, weigh_shape[1]}};
+  int m = 1;
+  for (int i = 0; i < x_shape.size() - 1; i++) {
+    m = m * x_shape[i];
+  }
+  return {std::vector<int64_t>{m, weigh_shape[1]}};
 }
 
 std::vector<paddle::DataType> AvxWeightOnlyInferDtype(
     paddle::DataType x_dtype,
     paddle::DataType weight_dtype,
     paddle::DataType weight_bias_dtype) {
-    return {x_dtype};
+  return {x_dtype};
 }
 
 PD_BUILD_STATIC_OP(avx_weight_only)

diff --git a/custom_ops/cpu_ops/get_padding_offset.cc b/custom_ops/cpu_ops/get_padding_offset.cc
@@ -20,13 +20,13 @@ void remove_padding(int64_t *output_data,
                     const int *cum_offsets,
                     const int sequence_length,
                     const int bsz) {
-    for (int bi = 0; bi < bsz; ++bi) {
-        for (int i = 0; i < seq_lens[bi]; ++i) {
-            const int tgt_seq_id = bi * sequence_length - cum_offsets[bi] + i;
-            const int src_seq_id = bi * sequence_length + i;
-            output_data[tgt_seq_id] = input_data[src_seq_id];
-        }
+  for (int bi = 0; bi < bsz; ++bi) {
+    for (int i = 0; i < seq_lens[bi]; ++i) {
+      const int tgt_seq_id = bi * sequence_length - cum_offsets[bi] + i;
+      const int src_seq_id = bi * sequence_length + i;
+      output_data[tgt_seq_id] = input_data[src_seq_id];
     }
+  }
 }
 
 void get_padding_offset_kernel(int *padding_offset,
@@ -37,85 +37,77 @@ void get_padding_offset_kernel(int *padding_offset,
                                const int *seq_lens,
                                const int max_seq_len,
                                const int bsz) {
-    for (int bi = 0; bi < bsz; ++bi) {
-        int cum_offset = bi == 0 ? 0 : cum_offsets[bi - 1];
-        auto seq_len_now = seq_lens[bi];
-        for (int i = 0; i < seq_len_now; ++i) {
-            padding_offset[bi * max_seq_len - cum_offset + i] = cum_offset;
-        }
-        cum_offsets_out[bi] = cum_offset;
-        int cum_seq_len = (bi + 1) * max_seq_len - cum_offsets[bi];
-        cu_seqlens_q[bi + 1] = cum_seq_len;
-        cu_seqlens_k[bi + 1] = cum_seq_len;
+  for (int bi = 0; bi < bsz; ++bi) {
+    int cum_offset = bi == 0 ? 0 : cum_offsets[bi - 1];
+    auto seq_len_now = seq_lens[bi];
+    for (int i = 0; i < seq_len_now; ++i) {
+      padding_offset[bi * max_seq_len - cum_offset + i] = cum_offset;
     }
+    cum_offsets_out[bi] = cum_offset;
+    int cum_seq_len = (bi + 1) * max_seq_len - cum_offsets[bi];
+    cu_seqlens_q[bi + 1] = cum_seq_len;
+    cu_seqlens_k[bi + 1] = cum_seq_len;
+  }
 }
 
 std::vector<paddle::Tensor> GetPaddingOffset(const paddle::Tensor &input_ids,
                                              const paddle::Tensor &cum_offsets,
                                              const paddle::Tensor &token_num,
                                              const paddle::Tensor &seq_len) {
-    std::vector<int64_t> input_ids_shape = input_ids.shape();
-    const int bsz = seq_len.shape()[0];
-    const int seq_length = input_ids_shape[1];
-    auto cum_offsets_out = cum_offsets.copy_to(paddle::CPUPlace(), false);
-    auto cpu_token_num = token_num.copy_to(paddle::CPUPlace(), false);
+  std::vector<int64_t> input_ids_shape = input_ids.shape();
+  const int bsz = seq_len.shape()[0];
+  const int seq_length = input_ids_shape[1];
+  auto cum_offsets_out = cum_offsets.copy_to(paddle::CPUPlace(), false);
+  auto cpu_token_num = token_num.copy_to(paddle::CPUPlace(), false);
 
-    const int token_num_data = cpu_token_num.data<int64_t>()[0];
-    auto x_remove_padding = paddle::empty(
-        {token_num_data}, paddle::DataType::INT64, input_ids.place());
-    auto padding_offset = paddle::empty(
-        {token_num_data}, paddle::DataType::INT32, input_ids.place());
-    auto cu_seqlens_q =
-        paddle::full({bsz + 1}, 0, paddle::DataType::INT32, input_ids.place());
-    auto cu_seqlens_k =
-        paddle::full({bsz + 1}, 0, paddle::DataType::INT32, input_ids.place());
-    get_padding_offset_kernel(padding_offset.data<int>(),
-                              cum_offsets_out.data<int>(),
-                              cu_seqlens_q.data<int>(),
-                              cu_seqlens_k.data<int>(),
-                              cum_offsets.data<int>(),
-                              seq_len.data<int>(),
-                              seq_length,
-                              bsz);
-    remove_padding(x_remove_padding.data<int64_t>(),
-                   input_ids.data<int64_t>(),
-                   seq_len.data<int>(),
-                   cum_offsets_out.data<int>(),
-                   seq_length,
-                   bsz);
-    return {x_remove_padding,
-            padding_offset,
-            cu_seqlens_q,
-            cu_seqlens_k};
+  const int token_num_data = cpu_token_num.data<int64_t>()[0];
+  auto x_remove_padding = paddle::empty(
+      {token_num_data}, paddle::DataType::INT64, input_ids.place());
+  auto padding_offset = paddle::empty(
+      {token_num_data}, paddle::DataType::INT32, input_ids.place());
+  auto cu_seqlens_q =
+      paddle::full({bsz + 1}, 0, paddle::DataType::INT32, input_ids.place());
+  auto cu_seqlens_k =
+      paddle::full({bsz + 1}, 0, paddle::DataType::INT32, input_ids.place());
+  get_padding_offset_kernel(padding_offset.data<int>(),
+                            cum_offsets_out.data<int>(),
+                            cu_seqlens_q.data<int>(),
+                            cu_seqlens_k.data<int>(),
+                            cum_offsets.data<int>(),
+                            seq_len.data<int>(),
+                            seq_length,
+                            bsz);
+  remove_padding(x_remove_padding.data<int64_t>(),
+                 input_ids.data<int64_t>(),
+                 seq_len.data<int>(),
+                 cum_offsets_out.data<int>(),
+                 seq_length,
+                 bsz);
+  return {x_remove_padding, padding_offset, cu_seqlens_q, cu_seqlens_k};
 }
 
 std::vector<std::vector<int64_t>> GetPaddingOffsetInferShape(
     const std::vector<int64_t> &input_ids_shape,
     const std::vector<int64_t> &cum_offsets_shape,
     const std::vector<int64_t> &token_num_shape,
     const std::vector<int64_t> &seq_len_shape) {
-    int64_t bsz = seq_len_shape[0];
-    int64_t seq_len = input_ids_shape[1];
-    return {{-1}, {-1}, {bsz + 1}, {bsz + 1}};
+  int64_t bsz = seq_len_shape[0];
+  int64_t seq_len = input_ids_shape[1];
+  return {{-1}, {-1}, {bsz + 1}, {bsz + 1}};
 }
 
 std::vector<paddle::DataType> GetPaddingOffsetInferDtype(
     const paddle::DataType &input_ids_dtype,
     const paddle::DataType &cum_offsets_dtype,
     const paddle::DataType &token_num_dtype,
     const paddle::DataType &seq_len_dtype) {
-    return {input_ids_dtype,
-            seq_len_dtype,
-            seq_len_dtype,
-            seq_len_dtype};
+  return {input_ids_dtype, seq_len_dtype, seq_len_dtype, seq_len_dtype};
 }
 
 PD_BUILD_STATIC_OP(get_padding_offset_cpu)
     .Inputs({"input_ids", "cum_offsets", "token_num", "seq_len"})
-    .Outputs({"x_remove_padding",
-              "padding_offset",
-              "cu_seqlens_q",
-              "cu_seqlens_k"})
+    .Outputs(
+        {"x_remove_padding", "padding_offset", "cu_seqlens_q", "cu_seqlens_k"})
     .SetKernelFn(PD_KERNEL(GetPaddingOffset))
     .SetInferShapeFn(PD_INFER_SHAPE(GetPaddingOffsetInferShape))
     .SetInferDtypeFn(PD_INFER_DTYPE(GetPaddingOffsetInferDtype));