[Core][Optimization] change copy-on-write from dict[int, list] to list (

vllm-project#4648)
ROCm · mawong-amd · Jun 6, 2024 · Apr 27, 2024 · Apr 27, 2024 · Apr 28, 2024
commit 469f85c7829c301b6dec48725951b5501c18d611
@@ -410,8 +410,7 @@ def test_cow(block_size: int, sequence_len: int, append_len: int,
         expected_src = static_block_table.physical_block_ids[cow_block_id]
         expected_dst = appender_block_table.physical_block_ids[cow_block_id]
 
-        assert expected_src in cows
-        assert expected_dst in cows[expected_src]
+        assert (expected_src, expected_dst) in cows
     else:
         # Otherwise, there should be no copy-on-write.
         assert not cows
@@ -490,8 +489,7 @@ def test_cow_lookahead_simple(block_size: int, sequence_len: int,
         expected_src = static_block_table.physical_block_ids[cow_block_id]
         expected_dst = appender_block_table.physical_block_ids[cow_block_id]
 
-        assert expected_src in cows
-        assert expected_dst in cows[expected_src]
+        assert (expected_src, expected_dst) in cows
 
     static_block_table.free()
     appender_block_table.free()

@@ -1,4 +1,5 @@
 import time
+from collections import defaultdict
 from typing import List
 
 import pytest
@@ -155,7 +156,10 @@ def test_append_slot_cow():
 
     cows = block_manager.append_slots(child)
     assert cows
-    for src_block, dst_blocks in cows.items():
+    dict_cows = defaultdict(list)
+    for src_block, dst_block in cows:
+        dict_cows[src_block].append(dst_block)
+    for src_block, dst_blocks in dict_cows.items():
         assert src_block not in dst_blocks
 
     after_blocks = block_manager.get_num_free_gpu_blocks()

@@ -636,7 +636,7 @@ def test_schedule_decode_blocks_to_copy_update():
 
     # The last request should be swapped out.
     scheduler.block_manager.append_slots = MagicMock()
-    scheduler.block_manager.append_slots.return_value = {2: [3]}
+    scheduler.block_manager.append_slots.return_value = [(2, 3)]
 
     budget = create_token_budget()
     remaining_running, output = scheduler._schedule_running(
@@ -845,7 +845,7 @@ def test_schedule_swapped_blocks_to_copy():
 
     # The last request should be swapped out.
     scheduler.block_manager.append_slots = MagicMock()
-    scheduler.block_manager.append_slots.return_value = {2: [3]}
+    scheduler.block_manager.append_slots.return_value = [(2, 3)]
 
     budget = create_token_budget()
     remaining_swapped, output = scheduler._schedule_swapped(

@@ -1,5 +1,4 @@
-from collections import defaultdict
-from typing import Dict, Iterable, List, Optional, Protocol
+from typing import Dict, Iterable, List, Optional, Protocol, Tuple
 
 from vllm.core.block.interfaces import Block, BlockAllocator
 
@@ -111,7 +110,7 @@ def __init__(
         refcounter: RefCounterProtocol,
         allocator: BlockAllocator,
     ):
-        self._copy_on_writes: Dict[BlockId, List[BlockId]] = defaultdict(list)
+        self._copy_on_writes: List[Tuple[BlockId, BlockId]] = []
         self._refcounter = refcounter
         self._allocator = allocator
 
@@ -152,25 +151,25 @@ def cow_block_if_not_appendable(self, block: Block) -> Optional[BlockId]:
             # Track src/dst copy.
             assert src_block_id is not None
             assert block_id is not None
-            self._copy_on_writes[src_block_id].append(block_id)
+            self._copy_on_writes.append((src_block_id, block_id))
 
         return block_id
 
-    def clear_cows(self) -> Dict[BlockId, List[BlockId]]:
+    def clear_cows(self) -> List[Tuple[BlockId, BlockId]]:
         """Clears the copy-on-write tracking information and returns the current
         state.
 
-        This method returns a dictionary mapping source block indices to lists
-        of destination block indices for the current copy-on-write operations.
+        This method returns a list mapping source block indices to
+         destination block indices for the current copy-on-write operations.
         It then clears the internal tracking information.
 
         Returns:
-            Dict[BlockId, List[BlockId]]: A dictionary mapping source
-                block indices to lists of destination block indices for the
+            List[Tuple[BlockId, BlockId]]: A list mapping source
+                block indices to destination block indices for the
                 current copy-on-write operations.
         """
-        cows = dict(self._copy_on_writes)
-        self._copy_on_writes.clear()
+        cows = self._copy_on_writes
+        self._copy_on_writes = []
         return cows
 
 

@@ -1,4 +1,4 @@
-from typing import Dict, FrozenSet, List, Optional
+from typing import Dict, FrozenSet, List, Optional, Tuple
 
 from vllm.core.block.interfaces import (Block, BlockAllocator, BlockId,
                                         DeviceAwareBlockAllocator)
@@ -185,13 +185,13 @@ def get_num_free_blocks(self, device: Device) -> int:
     def get_num_total_blocks(self, device: Device) -> int:
         return self._allocators[device].get_num_total_blocks()
 
-    def clear_copy_on_writes(self) -> Dict[int, List[int]]:
+    def clear_copy_on_writes(self) -> List[Tuple[int, int]]:
         """Clears the copy-on-write (CoW) state and returns the mapping of
             source to destination block IDs.
 
         Returns:
-            Dict[int, List[int]]: A dictionary mapping source block IDs to lists
-                of destination block IDs.
+            List[Tuple[int, int]]: A list mapping source block IDs to 
+                destination block IDs.
         """
         # CoW only supported on GPU
         device = Device.GPU

@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Dict, FrozenSet, List, Optional, Protocol
+from typing import FrozenSet, List, Optional, Protocol, Tuple
 
 from vllm.utils import Device
 
@@ -122,7 +122,7 @@ def all_block_ids(self) -> FrozenSet[int]:
         pass
 
     @abstractmethod
-    def clear_copy_on_writes(self) -> Dict[int, List[int]]:
+    def clear_copy_on_writes(self) -> List[Tuple[int, int]]:
         pass
 
     @abstractmethod
@@ -187,7 +187,7 @@ def all_block_ids(self) -> FrozenSet[int]:
         pass
 
     @abstractmethod
-    def clear_copy_on_writes(self) -> Dict[int, List[int]]:
+    def clear_copy_on_writes(self) -> List[Tuple[int, int]]:
         pass
 
     @abstractmethod

@@ -1,4 +1,4 @@
-from typing import Dict, FrozenSet, Iterable, List, Optional, Set
+from typing import FrozenSet, Iterable, List, Optional, Set, Tuple
 
 from vllm.core.block.common import (CopyOnWriteTracker, RefCounter,
                                     get_all_blocks_recursively)
@@ -175,12 +175,12 @@ def cow_block_if_not_appendable(self, block: Block) -> Optional[BlockId]:
         """
         return self._cow_tracker.cow_block_if_not_appendable(block)
 
-    def clear_copy_on_writes(self) -> Dict[BlockId, List[BlockId]]:
+    def clear_copy_on_writes(self) -> List[Tuple[BlockId, BlockId]]:
         """Returns the copy-on-write source->destination mapping and clears it.
 
         Returns:
-            Dict[BlockId, List[BlockId]]: A dictionary mapping source
-                block indices to lists of destination block indices.
+            List[Tuple[BlockId, BlockId]]: A list mapping source
+                block indices to destination block indices.
         """
         return self._cow_tracker.clear_cows()
 

@@ -1,7 +1,7 @@
 """Token blocks."""
 from itertools import takewhile
 from os.path import commonprefix
-from typing import Dict, FrozenSet, Iterable, List, Optional
+from typing import Dict, FrozenSet, Iterable, List, Optional, Tuple
 
 from vllm.core.block.common import (CopyOnWriteTracker,
                                     get_all_blocks_recursively)
@@ -337,12 +337,12 @@ def cow_block_if_not_appendable(self, block: Block) -> Optional[BlockId]:
         """
         return self._cow_tracker.cow_block_if_not_appendable(block)
 
-    def clear_copy_on_writes(self) -> Dict[BlockId, List[BlockId]]:
+    def clear_copy_on_writes(self) -> List[Tuple[BlockId, BlockId]]:
         """Returns the copy-on-write source->destination mapping and clears it.
 
         Returns:
-            Dict[BlockId, List[BlockId]]: A dictionary mapping source
-                block indices to lists of destination block indices.
+            List[Tuple[BlockId, BlockId]]: A list mapping source
+                block indices to destination block indices.
         """
         return self._cow_tracker.clear_cows()
 

@@ -5,7 +5,7 @@
 from os.path import commonprefix
 from typing import Dict, List, Optional
 from typing import Sequence as GenericSequence
-from typing import Set
+from typing import Set, Tuple
 
 from vllm.block import BlockTable, PhysicalTokenBlock
 from vllm.core.evictor_v1 import EvictionPolicy, Evictor, make_evictor
@@ -386,7 +386,7 @@ def append_slots(
         self,
         seq: Sequence,
         num_lookahead_slots: int = 0,
-    ) -> Dict[int, List[int]]:
+    ) -> List[Tuple[int, int]]:
         """Allocate a physical slot for a new token."""
         logical_blocks = seq.logical_token_blocks
         block_table = self.block_tables[seq.seq_id]
@@ -405,7 +405,7 @@ def append_slots(
                 # Allocate a new physical block.
                 new_block = self._allocate_last_physical_block(seq)
                 block_table.append(new_block)
-                return {}
+                return []
 
         # We want to append the token to the last physical block.
         last_block = block_table[-1]
@@ -418,15 +418,15 @@ def append_slots(
                 maybe_new_block = self._maybe_promote_last_block(
                     seq, last_block)
                 block_table[-1] = maybe_new_block
-            return {}
+            return []
         else:
             # The last block is shared with other sequences.
             # Copy on Write: Allocate a new block and copy the tokens.
             new_block = self._allocate_last_physical_block(seq)
 
             block_table[-1] = new_block
             self.gpu_allocator.free(last_block)
-            return {last_block.block_number: [new_block.block_number]}
+            return [(last_block.block_number, new_block.block_number)]
 
     def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:
         # NOTE: fork does not allocate a new physical block.

@@ -1,6 +1,7 @@
 """A block manager that manages token blocks."""
 from typing import Dict, List, Optional
 from typing import Sequence as GenericSequence
+from typing import Tuple
 
 from vllm.core.block.block_table import BlockTable
 from vllm.core.block.cpu_gpu_block_allocator import CpuGpuBlockAllocator
@@ -166,7 +167,7 @@ def append_slots(
         self,
         seq: Sequence,
         num_lookahead_slots: int,
-    ) -> Dict[int, List[int]]:
+    ) -> List[Tuple[int, int]]:
 
         block_table = self.block_tables[seq.seq_id]
 

@@ -2,6 +2,7 @@
 from abc import ABC, abstractmethod
 from typing import Dict, List
 from typing import Sequence as GenericSequence
+from typing import Tuple
 
 from vllm.sequence import Sequence, SequenceGroup
 
@@ -54,7 +55,7 @@ def append_slots(
         self,
         seq: Sequence,
         num_lookahead_slots: int,
-    ) -> Dict[int, List[int]]:
+    ) -> List[Tuple[int, int]]:
         pass
 
     @abstractmethod

@@ -1027,10 +1027,7 @@ def _append_slots(
 
         for seq in seq_group.get_seqs(status=SequenceStatus.RUNNING):
             cows = self.block_manager.append_slots(seq, num_lookahead_slots)
-
-            for src, dests in cows.items():
-                for dest in dests:
-                    blocks_to_copy.append((src, dest))
+            blocks_to_copy.extend(cows)
 
     def _preempt(
         self,