Skip to content

Commit

Permalink
fix allocation est
Browse files Browse the repository at this point in the history
Signed-off-by: Cody Yu <[email protected]>
  • Loading branch information
comaniac committed Nov 1, 2024
1 parent 632fd92 commit 499bd7e
Showing 1 changed file with 13 additions and 3 deletions.
16 changes: 13 additions & 3 deletions vllm/v1/core/kv_cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,15 +241,25 @@ def allocate_slots(
raise ValueError(
f"num_tokens must be greater than 0, got {num_tokens}")

# If a computed block is an eviction candidate (in the free queue),
# it cannot be counted as a free block when estimating whether we
# can allocate new blocks for this request.
num_evictable_computed_blocks = len([
bid for bid in computed_block_ids
if self.block_pool[bid].ref_cnt == 0
])

num_required_blocks = cdiv(num_tokens, self.block_size)
if num_required_blocks > self.num_free_blocks:
if (num_required_blocks >
self.num_free_blocks - num_evictable_computed_blocks):
# Cannot allocate new blocks.
return None

# Determine the number of new blocks to allocate considering
# preallocated blocks.
num_new_blocks = min(num_required_blocks + self.num_preallocate_blocks,
self.num_free_blocks)
num_new_blocks = min(
num_required_blocks + self.num_preallocate_blocks,
self.num_free_blocks - num_evictable_computed_blocks)
# Get the token IDs for the blocks being allocated for hashing.
# Note that we expect this function to be called only once per
# request, so we must have all new token IDs in the prompt.
Expand Down

0 comments on commit 499bd7e

Please sign in to comment.