From e59c8e44a1c9c1ea44695ecf99581e5f37109471 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Tue, 22 Oct 2024 06:23:01 +0000 Subject: [PATCH] Add fixme --- tests/lora/test_long_context.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/lora/test_long_context.py b/tests/lora/test_long_context.py index 7ac00d54a6a52..9d51591d705ce 100644 --- a/tests/lora/test_long_context.py +++ b/tests/lora/test_long_context.py @@ -108,15 +108,17 @@ def lora_llm(long_context_infos): for info in long_context_infos.values() ] - llm = vllm.LLM("meta-llama/Llama-2-13b-chat-hf", - enable_lora=True, - max_num_seqs=16, - max_loras=2, - long_lora_scaling_factors=tuple(scaling_factors), - max_num_batched_tokens=4096 * 8, - tensor_parallel_size=4, - disable_async_output_proc=True, - distributed_executor_backend="mp") + llm = vllm.LLM( + "meta-llama/Llama-2-13b-chat-hf", + enable_lora=True, + max_num_seqs=16, + max_loras=2, + long_lora_scaling_factors=tuple(scaling_factors), + max_num_batched_tokens=4096 * 8, + tensor_parallel_size=4, + # FIXME enable async output processor + disable_async_output_proc=True, + distributed_executor_backend="mp") yield llm del llm