Skip to content

Commit

Permalink
⏪ revert quick budget check
Browse files Browse the repository at this point in the history
Turns out we need to add seqs to SequenceStatus.FINISHED_IGNORED
if num_new_tokens > prompt_limit

Signed-off-by: Prashant Gupta <[email protected]>
  • Loading branch information
prashantgupta24 committed Nov 22, 2024
1 parent 4f1c322 commit cb8fc93
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1019,12 +1019,6 @@ def _schedule_prefills(
budget,
partial_prefill_metadata=partial_prefill_metadata)

num_new_seqs = seq_group.get_max_num_running_seqs()
# quick budget check
if num_new_tokens == 0 or not budget.can_schedule(
num_new_tokens=num_new_tokens, num_new_seqs=num_new_seqs):
break

if not enable_chunking:
num_prompt_tokens = waiting_seqs[0].get_len()
assert num_new_tokens == num_prompt_tokens
Expand Down Expand Up @@ -1075,6 +1069,12 @@ def _schedule_prefills(
waiting_queue.popleft()
continue

num_new_seqs = seq_group.get_max_num_running_seqs()
# quick budget check
if num_new_tokens == 0 or not budget.can_schedule(
num_new_tokens=num_new_tokens, num_new_seqs=num_new_seqs):
break

# Can schedule this request.
if curr_loras is not None and lora_int_id > 0:
curr_loras.add(lora_int_id)
Expand Down

0 comments on commit cb8fc93

Please sign in to comment.