Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
---
Language: Cpp
BasedOnStyle: Google
IndentWidth: 4
IndentWidth: 2
TabWidth: 2
ContinuationIndentWidth: 4
AccessModifierOffset: -1 # The private/protected/public has no indent in class
Expand Down
10 changes: 9 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,15 @@ repos:
# - id: codespell
# additional_dependencies: ['tomli']
# args: ['--toml', 'pyproject.toml']

# For C++ files
- repo: local
hooks:
- id: clang-format
name: clang-format
description: Format files with ClangFormat.
entry: bash ./scripts/clang_format.sh -i
language: system
files: \.(c|cc|cxx|cpp|cu|h|cuh|hpp|hxx|xpu|kps)$
# markdown
- repo: https://github.com/jackdewinter/pymarkdown
rev: v0.9.29
Expand Down
20 changes: 10 additions & 10 deletions custom_ops/cpu_ops/avx_weight_only_fake.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,28 @@ std::vector<paddle::Tensor> InvokeAvxWeightOnly(const paddle::Tensor &x,
const paddle::Tensor &w_bias,
const std::string &alog,
bool trans) {
auto out_shape = x.shape();
out_shape[out_shape.size() - 1] = weight.shape()[1];
auto out = paddle::empty(out_shape, x.dtype(), paddle::CPUPlace());
return {out};
auto out_shape = x.shape();
out_shape[out_shape.size() - 1] = weight.shape()[1];
auto out = paddle::empty(out_shape, x.dtype(), paddle::CPUPlace());
return {out};
}

std::vector<std::vector<int64_t>> AvxWeightOnlyInferShape(
std::vector<int64_t> x_shape,
std::vector<int64_t> weigh_shape,
std::vector<int64_t> weigh_bias_shape) {
int m = 1;
for (int i = 0; i < x_shape.size() - 1; i++) {
m = m * x_shape[i];
}
return {std::vector<int64_t>{m, weigh_shape[1]}};
int m = 1;
for (int i = 0; i < x_shape.size() - 1; i++) {
m = m * x_shape[i];
}
return {std::vector<int64_t>{m, weigh_shape[1]}};
}

std::vector<paddle::DataType> AvxWeightOnlyInferDtype(
paddle::DataType x_dtype,
paddle::DataType weight_dtype,
paddle::DataType weight_bias_dtype) {
return {x_dtype};
return {x_dtype};
}

PD_BUILD_STATIC_OP(avx_weight_only)
Expand Down
110 changes: 51 additions & 59 deletions custom_ops/cpu_ops/get_padding_offset.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ void remove_padding(int64_t *output_data,
const int *cum_offsets,
const int sequence_length,
const int bsz) {
for (int bi = 0; bi < bsz; ++bi) {
for (int i = 0; i < seq_lens[bi]; ++i) {
const int tgt_seq_id = bi * sequence_length - cum_offsets[bi] + i;
const int src_seq_id = bi * sequence_length + i;
output_data[tgt_seq_id] = input_data[src_seq_id];
}
for (int bi = 0; bi < bsz; ++bi) {
for (int i = 0; i < seq_lens[bi]; ++i) {
const int tgt_seq_id = bi * sequence_length - cum_offsets[bi] + i;
const int src_seq_id = bi * sequence_length + i;
output_data[tgt_seq_id] = input_data[src_seq_id];
}
}
}

void get_padding_offset_kernel(int *padding_offset,
Expand All @@ -37,85 +37,77 @@ void get_padding_offset_kernel(int *padding_offset,
const int *seq_lens,
const int max_seq_len,
const int bsz) {
for (int bi = 0; bi < bsz; ++bi) {
int cum_offset = bi == 0 ? 0 : cum_offsets[bi - 1];
auto seq_len_now = seq_lens[bi];
for (int i = 0; i < seq_len_now; ++i) {
padding_offset[bi * max_seq_len - cum_offset + i] = cum_offset;
}
cum_offsets_out[bi] = cum_offset;
int cum_seq_len = (bi + 1) * max_seq_len - cum_offsets[bi];
cu_seqlens_q[bi + 1] = cum_seq_len;
cu_seqlens_k[bi + 1] = cum_seq_len;
for (int bi = 0; bi < bsz; ++bi) {
int cum_offset = bi == 0 ? 0 : cum_offsets[bi - 1];
auto seq_len_now = seq_lens[bi];
for (int i = 0; i < seq_len_now; ++i) {
padding_offset[bi * max_seq_len - cum_offset + i] = cum_offset;
}
cum_offsets_out[bi] = cum_offset;
int cum_seq_len = (bi + 1) * max_seq_len - cum_offsets[bi];
cu_seqlens_q[bi + 1] = cum_seq_len;
cu_seqlens_k[bi + 1] = cum_seq_len;
}
}

std::vector<paddle::Tensor> GetPaddingOffset(const paddle::Tensor &input_ids,
const paddle::Tensor &cum_offsets,
const paddle::Tensor &token_num,
const paddle::Tensor &seq_len) {
std::vector<int64_t> input_ids_shape = input_ids.shape();
const int bsz = seq_len.shape()[0];
const int seq_length = input_ids_shape[1];
auto cum_offsets_out = cum_offsets.copy_to(paddle::CPUPlace(), false);
auto cpu_token_num = token_num.copy_to(paddle::CPUPlace(), false);
std::vector<int64_t> input_ids_shape = input_ids.shape();
const int bsz = seq_len.shape()[0];
const int seq_length = input_ids_shape[1];
auto cum_offsets_out = cum_offsets.copy_to(paddle::CPUPlace(), false);
auto cpu_token_num = token_num.copy_to(paddle::CPUPlace(), false);

const int token_num_data = cpu_token_num.data<int64_t>()[0];
auto x_remove_padding = paddle::empty(
{token_num_data}, paddle::DataType::INT64, input_ids.place());
auto padding_offset = paddle::empty(
{token_num_data}, paddle::DataType::INT32, input_ids.place());
auto cu_seqlens_q =
paddle::full({bsz + 1}, 0, paddle::DataType::INT32, input_ids.place());
auto cu_seqlens_k =
paddle::full({bsz + 1}, 0, paddle::DataType::INT32, input_ids.place());
get_padding_offset_kernel(padding_offset.data<int>(),
cum_offsets_out.data<int>(),
cu_seqlens_q.data<int>(),
cu_seqlens_k.data<int>(),
cum_offsets.data<int>(),
seq_len.data<int>(),
seq_length,
bsz);
remove_padding(x_remove_padding.data<int64_t>(),
input_ids.data<int64_t>(),
seq_len.data<int>(),
cum_offsets_out.data<int>(),
seq_length,
bsz);
return {x_remove_padding,
padding_offset,
cu_seqlens_q,
cu_seqlens_k};
const int token_num_data = cpu_token_num.data<int64_t>()[0];
auto x_remove_padding = paddle::empty(
{token_num_data}, paddle::DataType::INT64, input_ids.place());
auto padding_offset = paddle::empty(
{token_num_data}, paddle::DataType::INT32, input_ids.place());
auto cu_seqlens_q =
paddle::full({bsz + 1}, 0, paddle::DataType::INT32, input_ids.place());
auto cu_seqlens_k =
paddle::full({bsz + 1}, 0, paddle::DataType::INT32, input_ids.place());
get_padding_offset_kernel(padding_offset.data<int>(),
cum_offsets_out.data<int>(),
cu_seqlens_q.data<int>(),
cu_seqlens_k.data<int>(),
cum_offsets.data<int>(),
seq_len.data<int>(),
seq_length,
bsz);
remove_padding(x_remove_padding.data<int64_t>(),
input_ids.data<int64_t>(),
seq_len.data<int>(),
cum_offsets_out.data<int>(),
seq_length,
bsz);
return {x_remove_padding, padding_offset, cu_seqlens_q, cu_seqlens_k};
}

std::vector<std::vector<int64_t>> GetPaddingOffsetInferShape(
const std::vector<int64_t> &input_ids_shape,
const std::vector<int64_t> &cum_offsets_shape,
const std::vector<int64_t> &token_num_shape,
const std::vector<int64_t> &seq_len_shape) {
int64_t bsz = seq_len_shape[0];
int64_t seq_len = input_ids_shape[1];
return {{-1}, {-1}, {bsz + 1}, {bsz + 1}};
int64_t bsz = seq_len_shape[0];
int64_t seq_len = input_ids_shape[1];
return {{-1}, {-1}, {bsz + 1}, {bsz + 1}};
}

std::vector<paddle::DataType> GetPaddingOffsetInferDtype(
const paddle::DataType &input_ids_dtype,
const paddle::DataType &cum_offsets_dtype,
const paddle::DataType &token_num_dtype,
const paddle::DataType &seq_len_dtype) {
return {input_ids_dtype,
seq_len_dtype,
seq_len_dtype,
seq_len_dtype};
return {input_ids_dtype, seq_len_dtype, seq_len_dtype, seq_len_dtype};
}

PD_BUILD_STATIC_OP(get_padding_offset_cpu)
.Inputs({"input_ids", "cum_offsets", "token_num", "seq_len"})
.Outputs({"x_remove_padding",
"padding_offset",
"cu_seqlens_q",
"cu_seqlens_k"})
.Outputs(
{"x_remove_padding", "padding_offset", "cu_seqlens_q", "cu_seqlens_k"})
.SetKernelFn(PD_KERNEL(GetPaddingOffset))
.SetInferShapeFn(PD_INFER_SHAPE(GetPaddingOffsetInferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(GetPaddingOffsetInferDtype));
Loading
Loading