Skip to content

Commit 64fd64f

Browse files
authored
[TRTLLM-6262] Fix Llama4 Scout FP4 crash issue (#5834)
Signed-off-by: Chenfei Zhang <[email protected]>
1 parent 4df5f96 commit 64fd64f

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

tensorrt_llm/_torch/models/modeling_llama.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,15 +159,19 @@ def _forward_nope(
159159
q = self._attention_scaling(q, position_ids)
160160

161161
out_scale = None
162+
out_scale_sf = None
162163
if self.o_proj.has_fp8_qdq or self.o_proj.has_nvfp4 or self.o_proj.has_fp8_block_scales:
163164
out_scale = self.o_proj.inv_input_scale
165+
if self.o_proj.has_nvfp4 and self.support_nvfp4_output:
166+
out_scale_sf = self.o_proj.input_scale
164167

165168
q, k, v = self.convert_qkv(q, k, v)
166169
attn_output = self.attn.forward(q,
167170
k,
168171
v,
169172
attn_metadata,
170173
out_scale=out_scale,
174+
out_scale_sf=out_scale_sf,
171175
attention_mask=attention_mask,
172176
mrope_config=mrope_config)
173177

0 commit comments

Comments
 (0)