Skip to content

Commit

Permalink
Fix the compatibility witht the latest main
Browse files Browse the repository at this point in the history
  • Loading branch information
ApostaC committed Dec 13, 2024
1 parent a0b5061 commit 9fcf23e
Showing 1 changed file with 24 additions and 12 deletions.
36 changes: 24 additions & 12 deletions vllm/core/block/cpu_offloading_block_allocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,10 @@ def __init__(self, cpu_block_allocator: PrefixCachingBlockAllocator,
self.num_gpu_blocks = gpu_block_allocator.get_num_total_blocks()
self.num_cpu_blocks = cpu_block_allocator.get_num_total_blocks()

def allocate_mutable_block(self, prev_block: Optional[Block],
device: Device) -> Block:
def allocate_mutable_block(self,
prev_block: Optional[Block],
device: Device,
extra_hash: Optional[int] = None) -> Block:
"""Allocates a new mutable block on the specified device.
Args:
Expand All @@ -148,13 +150,17 @@ def allocate_mutable_block(self, prev_block: Optional[Block],
"handles CPU offloading internally."\
# mark this block as uncached

block = self._allocators[device].allocate_mutable_block(prev_block)
block = self._allocators[device].allocate_mutable_block(
prev_block, extra_hash=extra_hash)
self._uncached_blocks.append(block)
return block

def allocate_immutable_blocks(self, prev_block: Optional[Block],
block_token_ids: List[List[int]],
device: Device) -> List[Block]:
def allocate_immutable_blocks(
self,
prev_block: Optional[Block],
block_token_ids: List[List[int]],
device: Device,
extra_hash: Optional[int] = None) -> List[Block]:
"""Allocates a new group of immutable blocks with the provided block
token IDs on the specified device.
Expand All @@ -179,13 +185,16 @@ def allocate_immutable_blocks(self, prev_block: Optional[Block],
for token_ids in block_token_ids:
prev_block = self.allocate_immutable_block(prev_block=prev_block,
token_ids=token_ids,
device=device)
device=device,
extra_hash=extra_hash)
blocks.append(prev_block)
return blocks

def allocate_immutable_block(self, prev_block: Optional[Block],
def allocate_immutable_block(self,
prev_block: Optional[Block],
token_ids: List[int],
device: Device) -> Block:
device: Device,
extra_hash: Optional[int] = None) -> Block:
"""Allocates a new immutable block with the provided token IDs on the
specified device.
Expand All @@ -207,7 +216,7 @@ def allocate_immutable_block(self, prev_block: Optional[Block],

# allocate a GPU block
block = self._allocators[device].allocate_immutable_block(
prev_block, token_ids)
prev_block, token_ids, extra_hash=extra_hash)
block_id = block.block_id
assert block_id is not None
block_computed = self._allocators[device].block_is_computed(block_id)
Expand All @@ -222,7 +231,7 @@ def allocate_immutable_block(self, prev_block: Optional[Block],
else:
# check if we can hit cache on CPU by trying to allocate CPU block
cpu_block = self._allocators[Device.CPU].allocate_immutable_block(
prev_block, token_ids)
prev_block, token_ids, extra_hash=extra_hash)
cpu_block_id = cpu_block.block_id
assert cpu_block_id is not None
cpu_block_computed = self._allocators[
Expand Down Expand Up @@ -329,7 +338,10 @@ def get_and_reset_swaps(self,
if computed: # This block is computed, copy it to CPU
# allocate a block on CPU
cpu_block = cpu_allocator.allocate_immutable_block(
prev_block=block.prev_block, token_ids=block.token_ids)
prev_block=block.prev_block,
token_ids=block.token_ids,
extra_hash=block.extra_hash,
)
assert cpu_block.block_id is not None
self._allocated_cpu_blocks.append(cpu_block)

Expand Down

0 comments on commit 9fcf23e

Please sign in to comment.