From 5c5e52e6c510a4eaba224f7bee5f5003b1357f92 Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Mon, 30 Dec 2024 19:37:00 -0800 Subject: [PATCH] Update kv_cache_manager.py Co-authored-by: Woosuk Kwon --- vllm/v1/core/kv_cache_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py index 0587488d5f67d..59aa38029a69e 100644 --- a/vllm/v1/core/kv_cache_manager.py +++ b/vllm/v1/core/kv_cache_manager.py @@ -203,8 +203,8 @@ def allocate_slots( # If a computed block of a request is an eviction candidate (in the # free queue and ref_cnt == 0), it cannot be counted as a free block # when allocating this request. - num_evictable_computed_blocks = len( - [blk for blk in computed_blocks if blk.ref_cnt == 0]) + num_evictable_computed_blocks = sum( + blk for blk in computed_blocks if blk.ref_cnt == 0) num_required_blocks = cdiv(num_tokens, self.block_size) if (num_required_blocks > self.free_block_queue.num_free_blocks -