Skip to content

Commit

Permalink
lint
Browse files Browse the repository at this point in the history
  • Loading branch information
rickyyx committed Nov 7, 2024
1 parent 417760a commit 8d8853e
Show file tree
Hide file tree
Showing 16 changed files with 304 additions and 552 deletions.
3 changes: 2 additions & 1 deletion benchmarks/benchmark_prefix_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ def main(args):
input_length_range = tuple(map(int, args.input_length_range.split(':')))
random.seed(args.seed)
if args.dataset_path is not None:
print(f"Start to sample {args.num_prompts} prompts from {args.dataset_path}")
print(f"Start to sample {args.num_prompts} prompts "
f"from {args.dataset_path}")
filtered_datasets = sample_requests(
dataset_path=args.dataset_path,
num_requests=args.num_prompts,
Expand Down
11 changes: 6 additions & 5 deletions tests/core/block/test_block_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,10 +256,9 @@ def test_can_allocate_with_prefix_cache(
# Allocate the seq 1
block_manager.allocate(seq_group_1)

# Mark the seq 1 as computed (This shoudl be done by the scheduler in reality)
block_manager.mark_blocks_as_computed(
seq_group=seq_group_1, token_chunk_size=len(tokens_1)
)
# Mark the seq 1 as computed (This should be done by the scheduler in reality)

Check failure on line 259 in tests/core/block/test_block_manager.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (E501)

tests/core/block/test_block_manager.py:259:81: E501 Line too long (82 > 80)

Check failure on line 259 in tests/core/block/test_block_manager.py

View workflow job for this annotation

GitHub Actions / ruff (3.8)

Ruff (E501)

tests/core/block/test_block_manager.py:259:81: E501 Line too long (82 > 80)

Check failure on line 259 in tests/core/block/test_block_manager.py

View workflow job for this annotation

GitHub Actions / ruff (3.11)

Ruff (E501)

tests/core/block/test_block_manager.py:259:81: E501 Line too long (82 > 80)

Check failure on line 259 in tests/core/block/test_block_manager.py

View workflow job for this annotation

GitHub Actions / ruff (3.9)

Ruff (E501)

tests/core/block/test_block_manager.py:259:81: E501 Line too long (82 > 80)

Check failure on line 259 in tests/core/block/test_block_manager.py

View workflow job for this annotation

GitHub Actions / ruff (3.10)

Ruff (E501)

tests/core/block/test_block_manager.py:259:81: E501 Line too long (82 > 80)
block_manager.mark_blocks_as_computed(seq_group=seq_group_1,
token_chunk_size=len(tokens_1))

# Test if allocatable of seq 2.
seq_group_2 = create_seq_group(
Expand Down Expand Up @@ -399,7 +398,9 @@ def test_can_swap(block_size, num_gpu_blocks, num_lookahead_slots,
watermark=0,
enable_caching=enable_caching)
prompt, seq_group = create_dummy_prompt(
"1", prompt_length=(num_gpu_blocks - 1) * block_size - 1, block_size=block_size
"1",
prompt_length=(num_gpu_blocks - 1) * block_size - 1,
block_size=block_size,
)
prompt.status = SequenceStatus.WAITING
block_manager.allocate(seq_group)
Expand Down
27 changes: 17 additions & 10 deletions tests/core/block/test_block_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,7 @@ def test_allocate_prefix_caching(block_size: int, sequence_len: int):
block_size=block_size,
block_allocator=allocator,
enable_prefix_caching=True,
)
)
))
seq = make_sequence(alloc_i, token_ids, block_size)
block_tables[-1].allocate(seq=seq, device=Device.GPU)

Expand Down Expand Up @@ -148,7 +147,8 @@ def test_allocate_free(block_size: int, sequence_len: int, allocator_type: str,
block_table = BlockTable(
block_size=block_size,
block_allocator=allocator,
enable_prefix_caching=True if allocator_type == "prefix_caching" else False,
enable_prefix_caching=True
if allocator_type == "prefix_caching" else False,

Check failure on line 151 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (SIM210)

tests/core/block/test_block_table.py:150:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 151 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.8)

Ruff (SIM210)

tests/core/block/test_block_table.py:150:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 151 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.11)

Ruff (SIM210)

tests/core/block/test_block_table.py:150:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 151 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.9)

Ruff (SIM210)

tests/core/block/test_block_table.py:150:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 151 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.10)

Ruff (SIM210)

tests/core/block/test_block_table.py:150:31: SIM210 Remove unnecessary `True if ... else False`
)

for i in range(5):
Expand Down Expand Up @@ -193,7 +193,8 @@ def test_append_token_ids_allocation(block_size: int, sequence_len: int,
block_table = BlockTable(
block_size=block_size,
block_allocator=allocator,
enable_prefix_caching=True if allocator_type == "prefix_caching" else False,
enable_prefix_caching=True
if allocator_type == "prefix_caching" else False,

Check failure on line 197 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (SIM210)

tests/core/block/test_block_table.py:196:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 197 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.8)

Ruff (SIM210)

tests/core/block/test_block_table.py:196:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 197 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.11)

Ruff (SIM210)

tests/core/block/test_block_table.py:196:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 197 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.9)

Ruff (SIM210)

tests/core/block/test_block_table.py:196:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 197 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.10)

Ruff (SIM210)

tests/core/block/test_block_table.py:196:31: SIM210 Remove unnecessary `True if ... else False`
)

num_expected_blocks_before_append = len(
Expand Down Expand Up @@ -250,7 +251,8 @@ def test_ensure_num_empty_slots_allocation(block_size: int, sequence_len: int,
block_table = BlockTable(
block_size=block_size,
block_allocator=allocator,
enable_prefix_caching=True if allocator_type == "prefix_caching" else False,
enable_prefix_caching=True
if allocator_type == "prefix_caching" else False,

Check failure on line 255 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (SIM210)

tests/core/block/test_block_table.py:254:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 255 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.8)

Ruff (SIM210)

tests/core/block/test_block_table.py:254:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 255 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.11)

Ruff (SIM210)

tests/core/block/test_block_table.py:254:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 255 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.9)

Ruff (SIM210)

tests/core/block/test_block_table.py:254:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 255 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.10)

Ruff (SIM210)

tests/core/block/test_block_table.py:254:31: SIM210 Remove unnecessary `True if ... else False`
)

num_expected_blocks_before_append = len(
Expand Down Expand Up @@ -308,7 +310,8 @@ def test_append_token_ids_correct_content(block_size: int, sequence_len: int,
block_table = BlockTable(
block_size=block_size,
block_allocator=allocator,
enable_prefix_caching=True if allocator_type == "prefix_caching" else False,
enable_prefix_caching=True
if allocator_type == "prefix_caching" else False,

Check failure on line 314 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (SIM210)

tests/core/block/test_block_table.py:313:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 314 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.8)

Ruff (SIM210)

tests/core/block/test_block_table.py:313:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 314 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.11)

Ruff (SIM210)

tests/core/block/test_block_table.py:313:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 314 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.9)

Ruff (SIM210)

tests/core/block/test_block_table.py:313:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 314 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.10)

Ruff (SIM210)

tests/core/block/test_block_table.py:313:31: SIM210 Remove unnecessary `True if ... else False`
)
seq = make_sequence(0, token_ids, block_size)
block_table.allocate(seq=seq, device=Device.GPU)
Expand Down Expand Up @@ -353,7 +356,8 @@ def test_fork(seq_len: int, block_size: int, allocator_type: str):
block_table = BlockTable(
block_size=block_size,
block_allocator=allocator,
enable_prefix_caching=True if allocator_type == "prefix_caching" else False,
enable_prefix_caching=True
if allocator_type == "prefix_caching" else False,

Check failure on line 360 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (SIM210)

tests/core/block/test_block_table.py:359:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 360 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.8)

Ruff (SIM210)

tests/core/block/test_block_table.py:359:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 360 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.11)

Ruff (SIM210)

tests/core/block/test_block_table.py:359:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 360 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.9)

Ruff (SIM210)

tests/core/block/test_block_table.py:359:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 360 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.10)

Ruff (SIM210)

tests/core/block/test_block_table.py:359:31: SIM210 Remove unnecessary `True if ... else False`
)

seq = make_sequence(0, token_ids, block_size)
Expand Down Expand Up @@ -414,7 +418,8 @@ def test_cow(block_size: int, sequence_len: int, append_len: int,
original_block_table = BlockTable(
block_size=block_size,
block_allocator=allocator,
enable_prefix_caching=True if allocator_type == "prefix_caching" else False,
enable_prefix_caching=True
if allocator_type == "prefix_caching" else False,

Check failure on line 422 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (SIM210)

tests/core/block/test_block_table.py:421:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 422 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.8)

Ruff (SIM210)

tests/core/block/test_block_table.py:421:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 422 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.11)

Ruff (SIM210)

tests/core/block/test_block_table.py:421:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 422 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.9)

Ruff (SIM210)

tests/core/block/test_block_table.py:421:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 422 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.10)

Ruff (SIM210)

tests/core/block/test_block_table.py:421:31: SIM210 Remove unnecessary `True if ... else False`
)

num_expected_non_cow_blocks = cdiv(sequence_len, block_size)
Expand Down Expand Up @@ -504,7 +509,8 @@ def test_cow_lookahead_simple(block_size: int, sequence_len: int,
original_block_table = BlockTable(
block_size=block_size,
block_allocator=allocator,
enable_prefix_caching=True if allocator_type == "prefix_caching" else False,
enable_prefix_caching=True
if allocator_type == "prefix_caching" else False,

Check failure on line 513 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (SIM210)

tests/core/block/test_block_table.py:512:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 513 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.8)

Ruff (SIM210)

tests/core/block/test_block_table.py:512:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 513 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.11)

Ruff (SIM210)

tests/core/block/test_block_table.py:512:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 513 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.9)

Ruff (SIM210)

tests/core/block/test_block_table.py:512:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 513 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.10)

Ruff (SIM210)

tests/core/block/test_block_table.py:512:31: SIM210 Remove unnecessary `True if ... else False`
)

seq = make_sequence(0, token_ids, block_size)
Expand Down Expand Up @@ -590,7 +596,8 @@ def test_num_blocks_touched_by_append_slots(block_size: int, sequence_len: int,
block_table = BlockTable(
block_size=block_size,
block_allocator=allocator,
enable_prefix_caching=True if allocator_type == "prefix_caching" else False,
enable_prefix_caching=True
if allocator_type == "prefix_caching" else False,

Check failure on line 600 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (SIM210)

tests/core/block/test_block_table.py:599:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 600 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.8)

Ruff (SIM210)

tests/core/block/test_block_table.py:599:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 600 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.11)

Ruff (SIM210)

tests/core/block/test_block_table.py:599:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 600 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.9)

Ruff (SIM210)

tests/core/block/test_block_table.py:599:31: SIM210 Remove unnecessary `True if ... else False`

Check failure on line 600 in tests/core/block/test_block_table.py

View workflow job for this annotation

GitHub Actions / ruff (3.10)

Ruff (SIM210)

tests/core/block/test_block_table.py:599:31: SIM210 Remove unnecessary `True if ... else False`
)
seq = make_sequence(0, token_ids, block_size)
block_table.allocate(seq=seq, device=Device.GPU)
Expand Down
12 changes: 5 additions & 7 deletions tests/core/block/test_prefix_caching_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,9 +795,8 @@ def test_get_cached_blocks():

block_size = 16
num_blocks = 5
allocator = PrefixCachingBlockAllocator(
block_size=block_size, num_blocks=num_blocks
)
allocator = PrefixCachingBlockAllocator(block_size=block_size,
num_blocks=num_blocks)

# 1. Allocate a list of blocks
block_hashes = [random.randint(1, 1000000) for _ in range(num_blocks)]
Expand Down Expand Up @@ -825,12 +824,11 @@ def test_get_cached_blocks():
result = allocator.get_cached_blocks(cached_hashes)
assert (
result == expected_cached_blocks
), f"Expected {expected_cached_blocks}, but got {result}, with test case {cached_hashes}. blcok hashes = {block_hashes}"
), f"Expected {expected_cached_blocks}, but got {result}, with test case {cached_hashes}. block hashes = {block_hashes}"

# Test with some non-existent hashes
non_existent_hash = max(block_hashes) + 1
test_hashes = block_hashes[:3] + [non_existent_hash] + block_hashes[3:]
result = allocator.get_cached_blocks(test_hashes)
assert (
result == block_hashes[0:3]
), f"Expected {block_hashes[0:3]}, but got {result}"
assert (result == block_hashes[0:3]
), f"Expected {block_hashes[0:3]}, but got {result}"
2 changes: 1 addition & 1 deletion tests/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def create_dummy_prompt_encoder_decoder(

def create_seq_group(
seq_prompt_len: int = 1024,
seq_output_lens: GenericSequence[int] = (128,),
seq_output_lens: GenericSequence[int] = (128, ),
request_id: str = "0",
seq_id_start: int = 0,
sampling_params: Optional[SamplingParams] = None,
Expand Down
8 changes: 4 additions & 4 deletions tests/prefix_caching/test_prefix_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ def test_mixed_requests(

cached_prompt = example_prompts[cached_position]
with vllm_runner(
model,
dtype=dtype,
enable_prefix_caching=True,
enable_chunked_prefill=enable_chunked_prefill,
model,
dtype=dtype,
enable_prefix_caching=True,
enable_chunked_prefill=enable_chunked_prefill,
) as vllm_model:
# Run the first prompt so the cache is populated
vllm_outputs = vllm_model.generate_greedy([cached_prompt], max_tokens)
Expand Down
46 changes: 8 additions & 38 deletions vllm/core/block/block_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,30 +55,6 @@ def __init__(
self._max_block_sliding_window = max_block_sliding_window
self._num_full_slots = self._get_num_token_ids()

# @staticmethod
# def get_num_required_blocks(token_ids: List[int],
# block_size: int,
# num_lookahead_slots: int = 0) -> int:
# """Calculates the minimum number of blocks required to store a given
# sequence of token IDs along with any look-ahead slots that may be
# required (like in multi-step + chunked-prefill).

# This assumes worst-case scenario, where every block requires a new
# allocation (e.g. ignoring prefix caching).

# Args:
# token_ids (List[int]): The sequence of token IDs to be stored.
# block_size (int): The maximum number of tokens that can be stored in
# a single block.
# num_lookahead_slots (int): look-ahead slots that the sequence may
# require.

# Returns:
# int: The minimum number of blocks required to store the given
# sequence of token IDs along with any required look-ahead slots.
# """
# return cdiv(len(token_ids) + num_lookahead_slots, block_size)

def allocate(
self,
token_ids: List[int],
Expand All @@ -100,14 +76,13 @@ def allocate(
if not token_ids:
return

blocks = self._allocate_blocks_for_token_ids(
token_ids, block_hashes, device
)
blocks = self._allocate_blocks_for_token_ids(token_ids, block_hashes,
device)
self.update(blocks)
self._num_full_slots = len(token_ids)

def update(self, blocks: List[Block]) -> None:
"""Resets the table to the newly provided blocks
"""Resets the table to the newly provided blocks
(with their corresponding block ids)
"""
self._blocks.update(blocks)
Expand Down Expand Up @@ -164,17 +139,14 @@ def append_slots(
# Update the blocks with the new tokens
first_block_idx = self._num_full_slots // self._block_size
token_blocks = self._chunk_token_blocks_for_append(token_ids)

if len(token_blocks) != len(block_hashes):
breakpoint()

assert len(token_blocks) == len(
block_hashes
), "chunked token_ids and block_hashes must have the same length"

for i, token_block in enumerate(token_blocks):
block_hash = block_hashes[i]
self._blocks.append_token_ids(first_block_idx + i, token_block, block_hash)
self._blocks.append_token_ids(first_block_idx + i, token_block,
block_hash)

self._num_full_slots += len(token_ids)

Expand Down Expand Up @@ -304,19 +276,17 @@ def _allocate_blocks_for_token_ids(
self._allocator.allocate_immutable_blocks(
prev_block,
block_token_ids=block_token_ids,
block_hashes=block_hashes[: len(block_token_ids)],
block_hashes=block_hashes[:len(block_token_ids)],
device=device,
)
)
))
prev_block = blocks[-1]

if tail_token_ids:
assert len(tail_token_ids) == 1
assert block_hashes[-1] is None
cur_token_ids = tail_token_ids[0]
block = self._allocator.allocate_mutable_block(
prev_block=prev_block, device=device
)
prev_block=prev_block, device=device)
block.append_token_ids(cur_token_ids, block_hash=None)

blocks.append(block)
Expand Down
5 changes: 2 additions & 3 deletions vllm/core/block/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,9 +254,8 @@ def update(self, blocks: List[Block]):
for block in self._blocks:
self._add_block_id(block.block_id)

def append_token_ids(
self, block_index: int, token_ids: List[int], block_hash: Optional[int]
) -> None:
def append_token_ids(self, block_index: int, token_ids: List[int],
block_hash: Optional[int]) -> None:
block = self._blocks[block_index]
prev_block_id = block.block_id

Expand Down
27 changes: 10 additions & 17 deletions vllm/core/block/cpu_gpu_block_allocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def allocate_immutable_blocks(
prev_block: Optional[Block],
block_token_ids: List[List[int]],
device: Device,
block_hashes: Optional[List[Optional[int]]] = None,
block_hashes: List[Optional[int]],
) -> List[Block]:
"""Allocates a new group of immutable blocks with the provided block
token IDs on the specified device.
Expand All @@ -156,15 +156,7 @@ def allocate_immutable_blocks(
containing the provided block token IDs.
"""
return self._allocators[device].allocate_immutable_blocks(
prev_block, block_token_ids, block_hashes
)

def get_allocated_cached_blocks(
self,
block_hashes: List[int],
device: Device,
) -> List[int]:
return self._allocators[device].get_allocated_cached_blocks(block_hashes)
prev_block, block_token_ids, block_hashes)

def allocate_immutable_block(self, prev_block: Optional[Block],
token_ids: List[int],
Expand Down Expand Up @@ -353,14 +345,12 @@ def get_and_reset_swaps(self) -> List[Tuple[int, int]]:
self._swap_mapping.clear()
return list(mapping.items())

def find_cached_blocks_prefix(
self, block_hashes: List[int], allocated: bool
) -> List[int]:
def find_cached_blocks_prefix(self, block_hashes: List[int],
allocated: bool) -> List[int]:
# Prefix caching only supported on GPU.
device = Device.GPU
return self._allocators[device].find_cached_blocks_prefix(
block_hashes, allocated
)
block_hashes, allocated)


class NullBlock(Block):
Expand All @@ -376,7 +366,9 @@ def __init__(self, proxy: Block):
super().__init__()
self._proxy = proxy

def append_token_ids(self, token_ids: List[BlockId]):
def append_token_ids(self,
token_ids: List[BlockId],
block_hash: Optional[int] = None) -> None:
raise ValueError("null block should not be modified")

@property
Expand Down Expand Up @@ -429,4 +421,5 @@ def content_hash(self):
return self._proxy.content_hash

def set_content_hash(self, content_hash: Optional[int]) -> None:
raise NotImplementedError("NullBlock does not support set_content_hash")
raise NotImplementedError(
"NullBlock does not support set_content_hash")
Loading

0 comments on commit 8d8853e

Please sign in to comment.