@@ -201,32 +201,23 @@ def allocate_slots(
201
201
f"num_tokens must be greater than 0, got { num_tokens } " )
202
202
203
203
# Touch the computed blocks to make sure they won't be evicted.
204
- num_evictable_computed_blocks = 0
205
204
if self .enable_caching :
206
205
self ._touch (computed_blocks )
207
-
208
- # If a computed block of a request is an eviction candidate (in the
209
- # free queue and ref_cnt == 0), it cannot be counted as a free block
210
- # when allocating this request.
211
- num_evictable_computed_blocks = len (
212
- [blk for blk in computed_blocks if blk .ref_cnt == 0 ])
213
206
else :
214
207
assert not computed_blocks , (
215
208
"Computed blocks should be empty when "
216
209
"prefix caching is disabled" )
217
210
218
211
num_required_blocks = cdiv (num_tokens , self .block_size )
219
- if (num_required_blocks > self .free_block_queue .num_free_blocks -
220
- num_evictable_computed_blocks ):
212
+ if (num_required_blocks > self .free_block_queue .num_free_blocks ):
221
213
# Cannot allocate new blocks.
222
214
return None
223
215
224
216
# Determine the number of new blocks to allocate considering
225
217
# preallocated blocks.
226
218
num_new_blocks = min (
227
219
num_required_blocks + self .num_preallocate_blocks ,
228
- self .free_block_queue .num_free_blocks -
229
- num_evictable_computed_blocks ,
220
+ self .free_block_queue .num_free_blocks ,
230
221
# Should not exceed the maximum number of blocks per request.
231
222
# This is especially because the block table has the shape
232
223
# [..., max_num_blocks_per_req].
0 commit comments