From 40238e8ecee1facab46d169a4fc2046a0338983c Mon Sep 17 00:00:00 2001 From: Kathy Yu Date: Tue, 5 Nov 2024 00:36:57 +0000 Subject: [PATCH] Free cross attention block table for preempted-for-recompute sequence group. Signed-off-by: Kathy Yu --- vllm/core/scheduler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py index c3bc6becf0995..b3d396f9cedda 100644 --- a/vllm/core/scheduler.py +++ b/vllm/core/scheduler.py @@ -1579,6 +1579,7 @@ def _preempt_by_recompute( seq.status = SequenceStatus.WAITING self.free_seq(seq) seq.reset_state_for_recompute() + self._free_seq_group_cross_attn_blocks(seq_group) def _preempt_by_swap( self,