Skip to content

Commit

Permalink
Fix input_scale typing in w8a8_utils.py
Browse files Browse the repository at this point in the history
  • Loading branch information
mgoin authored Jul 19, 2024
1 parent a921e86 commit d048985
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions vllm/model_executor/layers/quantization/utils/w8a8_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def apply_fp8_linear(
input: torch.Tensor,
weight: torch.Tensor,
weight_scale: torch.Tensor,
input_scale: torch.Tensor,
input_scale: Optional[torch.Tensor] = None,
bias: Optional[torch.Tensor] = None,
cutlass_fp8_supported: bool = True,
) -> torch.Tensor:
Expand Down Expand Up @@ -176,7 +176,7 @@ def apply_int8_linear(
input: torch.Tensor,
weight: torch.Tensor,
weight_scale: torch.Tensor,
input_scale: torch.Tensor,
input_scale: Optional[torch.Tensor] = None,
bias: Optional[torch.Tensor] = None,
):
# ops.scaled_int8_quant supports both dynamic and static quant.
Expand Down

0 comments on commit d048985

Please sign in to comment.