Skip to content

Commit

Permalink
[Misc] Fix input_scale typing in w8a8_utils.py (vllm-project#6579)
Browse files Browse the repository at this point in the history
  • Loading branch information
mgoin authored Jul 20, 2024
1 parent 82da168 commit 8f6c9ab
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions vllm/model_executor/layers/quantization/utils/w8a8_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def apply_fp8_linear(
input: torch.Tensor,
weight: torch.Tensor,
weight_scale: torch.Tensor,
input_scale: torch.Tensor,
input_scale: Optional[torch.Tensor] = None,
input_scale_ub: Optional[torch.Tensor] = None,
bias: Optional[torch.Tensor] = None,
cutlass_fp8_supported: bool = True,
Expand Down Expand Up @@ -192,7 +192,7 @@ def apply_int8_linear(
input: torch.Tensor,
weight: torch.Tensor,
weight_scale: torch.Tensor,
input_scale: torch.Tensor,
input_scale: Optional[torch.Tensor] = None,
bias: Optional[torch.Tensor] = None,
):
# ops.scaled_int8_quant supports both dynamic and static quant.
Expand Down

0 comments on commit 8f6c9ab

Please sign in to comment.