From a19e8d372651abad75dc6a3939c18f23a1ae8d40 Mon Sep 17 00:00:00 2001
From: shangmingc <csmthu@gmail.com>
Date: Wed, 17 Jul 2024 15:17:07 +0800
Subject: [PATCH] [Misc][Speculative decoding] Typos and typing fixes (#6467)

Co-authored-by: caishangming.csm <caishangming.csm@alibaba-inc.com>
---
 vllm/spec_decode/multi_step_worker.py    | 2 +-
 vllm/spec_decode/ngram_worker.py         | 4 ++--
 vllm/spec_decode/proposer_worker_base.py | 2 +-
 vllm/spec_decode/spec_decode_worker.py   | 2 +-
 vllm/spec_decode/top1_proposer.py        | 4 ++--
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/vllm/spec_decode/multi_step_worker.py b/vllm/spec_decode/multi_step_worker.py
index 09a77f9e870fb..11e99882e3f0b 100644
--- a/vllm/spec_decode/multi_step_worker.py
+++ b/vllm/spec_decode/multi_step_worker.py
@@ -43,7 +43,7 @@ def init_device(self) -> None:
         )
 
     def set_include_gpu_probs_tensor(self) -> None:
-        # Need include_gpu_probs_tensor for multi_step_worker
+        # Need include_gpu_probs_tensor for MultiStepWorker
         self.model_runner.model.sampler.include_gpu_probs_tensor = True
 
     @torch.inference_mode()
diff --git a/vllm/spec_decode/ngram_worker.py b/vllm/spec_decode/ngram_worker.py
index 07991df52e655..a21222fec269b 100644
--- a/vllm/spec_decode/ngram_worker.py
+++ b/vllm/spec_decode/ngram_worker.py
@@ -13,7 +13,7 @@
 class NGramWorker(NonLLMProposerWorkerBase, LoraNotSupportedWorkerBase):
     """NGramWorker provides a light drafter without need for model.
 
-    Current NGramWorker only implement prompt lookup decoding,
+    Current NGramWorker only implements prompt lookup decoding,
     and in future we may also do RAG type drafter and other scenarios
     which don't rely on LLM model to give proposals.
     """
@@ -37,7 +37,7 @@ def init_device(self):
         self.device = torch.device(f"cuda:{self.local_rank}")
         self.load_model = lambda *args, **kwargs: None
 
-        # Current only support Top1Proposer
+        # Current NGramWorker only supports Top1Proposer
         self._proposer = Top1Proposer(
             weakref.proxy(self),  # type: ignore[arg-type]
             device=self.device,
diff --git a/vllm/spec_decode/proposer_worker_base.py b/vllm/spec_decode/proposer_worker_base.py
index fffa557121e17..51cefc0cbca8b 100644
--- a/vllm/spec_decode/proposer_worker_base.py
+++ b/vllm/spec_decode/proposer_worker_base.py
@@ -24,7 +24,7 @@ def sampler_output(
     ) -> Tuple[Optional[List[SamplerOutput]], bool]:
         raise NotImplementedError
 
-    def set_include_gpu_probs_tensor(self):
+    def set_include_gpu_probs_tensor(self) -> None:
         """Implementation optional"""
         pass
 
diff --git a/vllm/spec_decode/spec_decode_worker.py b/vllm/spec_decode/spec_decode_worker.py
index 3c8e3dee46831..903264aad7a15 100644
--- a/vllm/spec_decode/spec_decode_worker.py
+++ b/vllm/spec_decode/spec_decode_worker.py
@@ -206,7 +206,7 @@ def __init__(
 
         self.probs_dtype = self.spec_decode_sampler.probs_dtype
         self.token_id_dtype = self.spec_decode_sampler.token_id_dtype
-        # Lazy initiazliation.
+        # Lazy initialization.
         self.scorer: SpeculativeScorer
 
         # Hidden states from target model to pass to proposer
diff --git a/vllm/spec_decode/top1_proposer.py b/vllm/spec_decode/top1_proposer.py
index 7b34b5d34208b..ade293c2c0757 100644
--- a/vllm/spec_decode/top1_proposer.py
+++ b/vllm/spec_decode/top1_proposer.py
@@ -138,7 +138,7 @@ def _split_by_proposal_len(
 
             # Currently only proposal lens of 0 or the global batch proposal len
             # are supported.
-            # If max_proposal_len is defined, then we shall no exccess this
+            # If max_proposal_len is defined, then we shall no exceed this
             # quota for nonzero_proposal
             new_k = 0
             if (self.max_proposal_len is None
@@ -219,7 +219,7 @@ def _merge_outputs(
         proposal_lens: List[int],
         nonzero_proposal_len_indices: List[int],
         sampler_transposed: bool,
-    ) -> Tuple[torch.Tensor, torch.tensor, torch.Tensor]:
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         """After speculations are produced, merge the speculation results with
         the skipped sequences.
         """