Skip to content

Commit

Permalink
fix attn_mask on 310I
Browse files Browse the repository at this point in the history
  • Loading branch information
yao-fengchen committed Oct 24, 2024
1 parent 379b2d3 commit 27afd12
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion dlinfer/vendor/ascend/torch_npu_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def prefill_attention(
scale_value = (
softmax_scale if softmax_scale else 1.0 / math.sqrt(query.shape[-1])
)
assert SocVersion.is_Ascend910B() or SocVersion.is_Ascend310P()
if SocVersion.is_Ascend910B():
attn_output[:] = torch.ops.npu.npu_fusion_attention(
query,
Expand Down Expand Up @@ -142,7 +143,7 @@ def prefill_attention(
single_v,
single_o,
padding_mask=None,
atten_mask=attn_mask[0],
atten_mask=None,
actual_seq_lengths=actual_seq_lengths,
num_heads=num_q_heads,
scale_value=scale_value,
Expand Down

0 comments on commit 27afd12

Please sign in to comment.