From 17a2c8303740f74d5eee6a4a533ce5e873202cd6 Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Wed, 17 Jul 2024 15:14:07 -0700 Subject: [PATCH] miner fix --- vllm/spec_decode/draft_model_runner.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vllm/spec_decode/draft_model_runner.py b/vllm/spec_decode/draft_model_runner.py index 3cb7ec58da4c1..d2c7e6e3710a8 100644 --- a/vllm/spec_decode/draft_model_runner.py +++ b/vllm/spec_decode/draft_model_runner.py @@ -15,8 +15,12 @@ logger = init_logger(__name__) +# A flag to enable debug prints for the updated input tensors +# before each step. debug_advance_input = False -enable_gpu_advance_step = True +# A flag to allow GPU advance step for draft model runner. +# Set to False for debugging. +allow_gpu_advance_step = True class TP1DraftModelRunner(ModelRunner): @@ -196,7 +200,7 @@ def supports_gpu_multi_step(self, execute_model_req: ExecuteModelRequest): 3. No LORA 4. No prompt_adapter_config """ - if not enable_gpu_advance_step: + if not allow_gpu_advance_step: return False # We allow multi-step GPU only in decode mode