From 3c86a032214e985d7be1d2b292fa8382f234912c Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Wed, 19 Jun 2024 14:39:49 +0000 Subject: [PATCH] fix 8k issue by changing max-context/seq len to 32k --- vllm/engine/arg_utils.py | 2 +- vllm/entrypoints/llm.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 9fe0d0bb0a301..04b9e8032c0cf 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -56,7 +56,7 @@ class EngineArgs: quantization: Optional[str] = None enforce_eager: bool = False max_context_len_to_capture: Optional[int] = None - max_seq_len_to_capture: int = 8192 + max_seq_len_to_capture: int = 32768 disable_custom_all_reduce: bool = False tokenizer_pool_size: int = 0 tokenizer_pool_type: str = "ray" diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 6e971ae73f5d0..bbb5d31f0606a 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -115,7 +115,7 @@ def __init__( swap_space: int = 4, enforce_eager: bool = False, max_context_len_to_capture: Optional[int] = None, - max_seq_len_to_capture: int = 8192, + max_seq_len_to_capture: int = 32768, disable_custom_all_reduce: bool = False, **kwargs, ) -> None: