From 1386d1e82950b5c9a227291f8d6ebcd22882d068 Mon Sep 17 00:00:00 2001 From: Elsa Date: Sun, 28 Jul 2024 21:13:31 +0800 Subject: [PATCH] Move cutlass_fp8_supported into __init__ --- vllm/model_executor/layers/quantization/fbgemm_fp8.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/fbgemm_fp8.py b/vllm/model_executor/layers/quantization/fbgemm_fp8.py index 6cf156459a61b..9aaffcd1823cc 100644 --- a/vllm/model_executor/layers/quantization/fbgemm_fp8.py +++ b/vllm/model_executor/layers/quantization/fbgemm_fp8.py @@ -73,6 +73,7 @@ class FBGEMMFp8LinearMethod(LinearMethodBase): def __init__(self, quant_config: FBGEMMFp8Config): self.quant_config = quant_config + self.cutlass_fp8_supported = cutlass_fp8_supported() def create_weights( self, @@ -146,5 +147,5 @@ def apply(self, input_scale=None, input_scale_ub=layer.input_scale_ub, bias=bias, - cutlass_fp8_supported=cutlass_fp8_supported(), + cutlass_fp8_supported=self.cutlass_fp8_supported, use_per_token_if_dynamic=True)