Skip to content

Commit

Permalink
formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-neuralmagic committed Oct 27, 2024
1 parent a089cd1 commit 974aa06
Showing 1 changed file with 11 additions and 13 deletions.
24 changes: 11 additions & 13 deletions vllm/v1/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def update_from_output(
scheduler_output: "SchedulerOutput",
model_runner_output: "ModelRunnerOutput",
) -> List[EngineCoreOutput]:
# NOTE(robertgshaw2): This method should probably in EngineCore.
# NOTE(robertgshaw2): Should this method be in EngineCore instead?
# NOTE(woosuk): This method doesn't consider speculative decoding.
sampled_token_ids = model_runner_output.sampled_token_ids_cpu.tolist()
num_scheduled_tokens = scheduler_output.num_scheduled_tokens
Expand All @@ -252,20 +252,18 @@ def update_from_output(

# TODO: Update the KV cache manager for prefix caching.

# Check for stop and update request state. This must
# be called before me make the EngineCoreOutput.
# Check for stop and update request state.
# This must be called before me make the EngineCoreOutput.
stopped = self._check_stop(request)

# Make EngineCoreOutput
engine_core_outputs.append(
EngineCoreOutput(
request_id=req_id,
new_token_ids=request.
output_token_ids[-num_new_tokens:],
finished=request.is_finished(),
finish_reason=request.get_finished_reason(),
stop_reason=request.stop_reason,
))
# Add EngineCoreOutput for this Request.
output = EngineCoreOutput(
request_id=req_id,
new_token_ids=request.output_token_ids[-num_new_tokens:],
finished=request.is_finished(),
finish_reason=request.get_finished_reason(),
stop_reason=request.stop_reason)
engine_core_outputs.append(output)

# Breakout of the loop.
if stopped:
Expand Down

0 comments on commit 974aa06

Please sign in to comment.