Skip to content

Commit

Permalink
Revert Mamba MS fix, will fix it in future PR
Browse files Browse the repository at this point in the history
Signed-off-by: mzusman <[email protected]>
  • Loading branch information
mzusman committed Dec 8, 2024
1 parent 3347f3f commit f21ff9a
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 7 deletions.
4 changes: 0 additions & 4 deletions vllm/attention/backends/placeholder_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,6 @@ class PlaceholderAttentionMetadata(AttentionMetadata):
_cached_prefill_metadata: Optional["PlaceholderAttentionMetadata"] = None
_cached_decode_metadata: Optional["PlaceholderAttentionMetadata"] = None

def advance_step(self, *args, **kwargs):
# No need to do anything here
pass

@property
def prefill_metadata(self) -> Optional["PlaceholderAttentionMetadata"]:
if self.num_prefills == 0:
Expand Down
4 changes: 1 addition & 3 deletions vllm/worker/multi_step_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@

logger = init_logger(__name__)

MULTI_STEP_ATTENTION_BACKENDS = [
"FLASH_ATTN", "ROCM_FLASH", "FLASHINFER", "NO_ATTENTION"
]
MULTI_STEP_ATTENTION_BACKENDS = ["FLASH_ATTN", "ROCM_FLASH", "FLASHINFER"]
MULTI_STEP_CHUNKED_PREFILL_ATTENTION_BACKENDS = ["FLASH_ATTN"]

def _get_supported_attention_backends(chunked_prefill_enabled: bool) \
Expand Down

0 comments on commit f21ff9a

Please sign in to comment.