Skip to content

Commit

Permalink
miner fix
Browse files Browse the repository at this point in the history
  • Loading branch information
comaniac committed Jul 17, 2024
1 parent e76466d commit 17a2c83
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions vllm/spec_decode/draft_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@

logger = init_logger(__name__)

# A flag to enable debug prints for the updated input tensors
# before each step.
debug_advance_input = False
enable_gpu_advance_step = True
# A flag to allow GPU advance step for draft model runner.
# Set to False for debugging.
allow_gpu_advance_step = True


class TP1DraftModelRunner(ModelRunner):
Expand Down Expand Up @@ -196,7 +200,7 @@ def supports_gpu_multi_step(self, execute_model_req: ExecuteModelRequest):
3. No LORA
4. No prompt_adapter_config
"""
if not enable_gpu_advance_step:
if not allow_gpu_advance_step:
return False

# We allow multi-step GPU only in decode mode
Expand Down

0 comments on commit 17a2c83

Please sign in to comment.