From 51ff216d851ba2457a601b47a2a3f19b47f80940 Mon Sep 17 00:00:00 2001
From: George <george@neuralmagic.com>
Date: Sat, 21 Dec 2024 01:36:23 -0500
Subject: [PATCH] [Bugfix] update should_ignore_layer (#11354)

Signed-off-by: George Ohashi <george@neuralmagic.com>
---
 .../layers/quantization/compressed_tensors/utils.py             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py
index a74eaef5efdee..dfae4db71e546 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py
@@ -30,7 +30,7 @@ def should_ignore_layer(layer_name: Optional[str],
     # in the safetensors checkpoint. So, we convert the name
     # from the fused version to unfused + check to make sure that
     # each shard of the fused layer has the same scheme.
-    if proj_name in FUSED_LAYER_NAME_MAPPING:
+    if proj_name in FUSED_LAYER_NAME_MAPPING and layer_name not in ignore:
         shard_proj_names = FUSED_LAYER_NAME_MAPPING[proj_name]
 
         # Convert fused_name --> [shard_names]