File tree Expand file tree Collapse file tree 1 file changed +7
-1
lines changed
tensorrt_llm/_torch/attention_backend Expand file tree Collapse file tree 1 file changed +7
-1
lines changed Original file line number Diff line number Diff line change @@ -765,8 +765,14 @@ def prepare(self) -> None:
765765 self .kv_cache_block_offsets [:, :self .num_seqs ].copy_ (
766766 self .host_kv_cache_block_offsets [:, :self .num_seqs ],
767767 non_blocking = True )
768+
769+ error_message = (
770+ f"The max KV cache length of input sequences ({ self .kv_lens [:self .num_seqs ].max ()} ) "
771+ f"exceeds the KV cache manager's maximum supported length "
772+ f"({ self .kv_cache_manager .max_seq_len } )." )
773+
768774 assert self .kv_lens [:self .num_seqs ].max (
769- ) <= self .kv_cache_manager .max_seq_len , f"Please set max_seq_len to at least { self . kv_lens [: self . num_seqs ]. max () } for kv cache manager."
775+ ) <= self .kv_cache_manager .max_seq_len , error_message
770776
771777 self .kv_lens_cuda_runtime = self .kv_lens_cuda [:self .num_seqs ]
772778 self .kv_lens_runtime = self .kv_lens [:self .num_seqs ]
You can’t perform that action at this time.
0 commit comments