From 51ff216d851ba2457a601b47a2a3f19b47f80940 Mon Sep 17 00:00:00 2001 From: George Date: Sat, 21 Dec 2024 01:36:23 -0500 Subject: [PATCH] [Bugfix] update should_ignore_layer (#11354) Signed-off-by: George Ohashi --- .../layers/quantization/compressed_tensors/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py index a74eaef5efdee..dfae4db71e546 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py @@ -30,7 +30,7 @@ def should_ignore_layer(layer_name: Optional[str], # in the safetensors checkpoint. So, we convert the name # from the fused version to unfused + check to make sure that # each shard of the fused layer has the same scheme. - if proj_name in FUSED_LAYER_NAME_MAPPING: + if proj_name in FUSED_LAYER_NAME_MAPPING and layer_name not in ignore: shard_proj_names = FUSED_LAYER_NAME_MAPPING[proj_name] # Convert fused_name --> [shard_names]