Skip to content

Commit 8917620

Browse files
committed
update free memory fraction
Signed-off-by: Ivy Zhang <[email protected]>
1 parent e526c60 commit 8917620

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,8 @@ def test_eagle3(self, eagle3_one_model):
237237
disable_overlap_scheduler=True,
238238
cuda_graph_config=CudaGraphConfig(batch_sizes=[1]),
239239
)
240-
kv_cache_config = KvCacheConfig(enable_block_reuse=False)
240+
kv_cache_config = KvCacheConfig(enable_block_reuse=False,
241+
free_gpu_memory_fraction=0.7)
241242

242243
eagle_model_dir = f"{llm_models_root()}/EAGLE3-LLaMA3.1-Instruct-8B"
243244
target_model_dir = f"{llm_models_root()}/llama-3.1-model/Llama-3.1-8B-Instruct"
@@ -374,12 +375,14 @@ def test_auto_dtype_tp8(self):
374375
def test_eagle3_tp8(self, eagle3_one_model):
375376
model_path = f"{llm_models_root()}/llama-3.3-models/Llama-3.3-70B-Instruct"
376377
eagle_model_dir = f"{llm_models_root()}/EAGLE3-LLaMA3.3-Instruct-70B"
378+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.7)
377379
spec_config = EagleDecodingConfig(max_draft_len=4,
378380
speculative_model_dir=eagle_model_dir,
379381
eagle3_one_model=eagle3_one_model)
380382
with LLM(model_path,
381383
tensor_parallel_size=8,
382-
speculative_config=spec_config) as llm:
384+
speculative_config=spec_config,
385+
kv_cache_config=kv_cache_config) as llm:
383386
task = MMLU(self.MODEL_NAME)
384387
task.evaluate(llm)
385388
task = GSM8K(self.MODEL_NAME)

0 commit comments

Comments
 (0)