Skip to content

Commit

Permalink
minor fix
Browse files Browse the repository at this point in the history
  • Loading branch information
sasha0552 authored Oct 30, 2024
1 parent 868a7ec commit 10a8040
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion vllm/attention/backends/xformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ def build(self, seq_lens: List[int], query_lens: List[int],
dtype=query_start_loc.dtype,
out=query_start_loc[1:])

return self._metadata_cls( # type: ignore
self._cached_prefill_metadata = XFormersMetadata(
num_prefills=self.num_prefills,
slot_mapping=slot_mapping_tensor,
num_prefill_tokens=self.num_prefill_tokens,
Expand All @@ -552,6 +552,7 @@ def build(self, seq_lens: List[int], query_lens: List[int],
block_tables=block_tables,
use_cuda_graph=use_captured_graph,
)
return self._cached_prefill_metadata


class XFormersImpl(AttentionImpl[XFormersMetadata]):
Expand Down

0 comments on commit 10a8040

Please sign in to comment.