Address comments

Signed-off-by: Woosuk Kwon <[email protected]>
vllm-project · Nov 28, 2024 · 89d77d6 · 89d77d6
1 parent 14411af
commit 89d77d6
Showing 1 changed file with 6 additions and 0 deletions.
diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py
@@ -138,8 +138,11 @@ def append_slots(
                 # Should not exceed the maximum number of blocks per request.
                 # This is especially because the block table has the shape
                 # [..., max_num_blocks_per_req].
+                # TODO(woosuk): Check and reject requests if
+                # num_prompt_tokens + max_tokens > max_model_len.
                 self.max_num_blocks_per_req - len(req_blocks),
             )
+            assert num_new_blocks > 0
 
             new_blocks = self._get_new_blocks(num_new_blocks)
             req_blocks.extend(new_blocks)
@@ -222,8 +225,11 @@ def allocate_slots(
             # Should not exceed the maximum number of blocks per request.
             # This is especially because the block table has the shape
             # [..., max_num_blocks_per_req].
+            # TODO(woosuk): Check and reject requests if
+            # num_prompt_tokens + max_tokens > max_model_len.
             self.max_num_blocks_per_req - len(computed_blocks),
         )
+        assert num_new_blocks > 0
 
         # Concatenate the computed block IDs and the new block IDs.
         new_blocks = self._get_new_blocks(num_new_blocks)