From 5a4b59c28886c20398f3555222941a9f07afe8da Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Wed, 30 Oct 2024 20:42:01 -0700 Subject: [PATCH] comment Signed-off-by: Roger Wang --- vllm/worker/model_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index 233a9e664d845..891637dafbb14 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -995,7 +995,7 @@ def __init__( # Python can be expensive. To optimize this, we cache the block table # in numpy and only copy the actual input content at every iteration. # The shape of the cached block table will be - # (max batch size to capture, max context len to capture / block size). + # (max batch size to capture, max seq len to capture / block size). self.graph_block_tables = np.zeros( (self.max_batchsize_to_capture, self.get_max_block_per_batch()), dtype=np.int32)