Fix mamba logic

Signed-off-by: mzusman <[email protected]>
vllm-project · Dec 9, 2024 · 0f70131 · 0f70131
1 parent 3bc3823
commit 0f70131
Showing 1 changed file with 5 additions and 2 deletions.
diff --git a/vllm/config.py b/vllm/config.py
@@ -696,9 +696,12 @@ def get_num_layers_by_block_type(
         # This function relies on 'layers_block_type' in hf_config,
         # for hybrid/attention-free models w/o this attribute,
         # we will need to have workarounds like so
-        if self.is_attention_free and block_type == LayerBlockType.attention:
+        attn_block_type = block_type == LayerBlockType.attention
+        is_full_attn_model = not self.is_hybrid and not self.is_attention_free
+
+        if self.is_attention_free and attn_block_type:
             return 0
-        if not self.is_hybrid and block_type != LayerBlockType.attention:
+        if is_full_attn_model and attn_block_type:
             return 0
 
         start, end = self.get_layers_start_end_indices(parallel_config)