diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index 233a9e664d845..891637dafbb14 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -995,7 +995,7 @@ def __init__( # Python can be expensive. To optimize this, we cache the block table # in numpy and only copy the actual input content at every iteration. # The shape of the cached block table will be - # (max batch size to capture, max context len to capture / block size). + # (max batch size to capture, max seq len to capture / block size). self.graph_block_tables = np.zeros( (self.max_batchsize_to_capture, self.get_max_block_per_batch()), dtype=np.int32)