From 74dedbb79ccdb5f9d0c0fdf9d261cb38c41472a6 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Wed, 30 Oct 2024 16:34:22 -0700 Subject: [PATCH] [torch.compile] upgrade tests (#9858) Signed-off-by: youkaichao Signed-off-by: Loc Huynh --- tests/compile/test_basic_correctness.py | 26 +++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/compile/test_basic_correctness.py b/tests/compile/test_basic_correctness.py index 6aa27b24b4a6e..2f92ff73845f5 100644 --- a/tests/compile/test_basic_correctness.py +++ b/tests/compile/test_basic_correctness.py @@ -30,18 +30,20 @@ def test_compile_correctness(model, model_args, pp_size, tp_size, attn_backend, pytest.skip("Not correct CUDA devices for the test.") import os os.environ["VLLM_ATTENTION_BACKEND"] = attn_backend - if not fullgraph: - os.environ["VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE"] = "0" - all_args = [["--enforce-eager"] + model_args + ["--max_model_len", "1024"] - + ["-pp", str(pp_size)] + ["-tp", str(tp_size)]] * 3 + all_args = [["--enforce-eager"] + model_args + ["-pp", str(pp_size)] + + ["-tp", str(tp_size)]] * 3 # don't test VLLM_TORCH_COMPILE_LEVEL == 3 case # inductor will change the output, so we cannot compare them. - all_envs: List[Optional[Dict[str, str]]] = [{ - "VLLM_TORCH_COMPILE_LEVEL": - str(level) - } for level in [ - CompilationLevel.NO_COMPILATION, - CompilationLevel.DYNAMO_AS_IS, - CompilationLevel.DYNAMO_ONCE, - ]] + all_envs: List[Optional[Dict[str, str]]] = [] + for level in [ + CompilationLevel.NO_COMPILATION, + CompilationLevel.DYNAMO_AS_IS, + CompilationLevel.DYNAMO_ONCE, + ]: + all_envs.append({"VLLM_TORCH_COMPILE_LEVEL": str(level)}) + if level != CompilationLevel.DYNAMO_ONCE and not fullgraph: + # "DYNAMO_ONCE" will always use fullgraph + all_envs[-1][ + "VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE"] = "0" # type: ignore + compare_all_settings(model, all_args, all_envs, method=method)