AlibabaPAI
diff --git a/Makefile b/Makefile
@@ -29,7 +29,7 @@ lint: check_pylint_installed check_pytest_installed
 
 .PHONY: test
 test: check_pytest_installed
-	@pytest -x --ignore=third_party/ --ignore=tests/e2e_test --disable-warnings
+	@pytest -x -v --ignore=third_party/ --ignore=tests/e2e_test --disable-warnings
 
 #################### pygloo install for gloo migration backend begin ####################
 

diff --git a/tests/e2e_test/test_bench.py b/tests/e2e_test/test_bench.py
@@ -100,6 +100,7 @@ def parse_log_file():
     return to_markdown_table(data)
 
 @pytest.mark.asyncio
+@pytest.mark.skipif(torch.cuda.device_count() < 1, reason="at least 1 gpus required for simple benchmark")
 @pytest.mark.parametrize("model", ['/mnt/model/Qwen-7B'])
 async def test_simple_benchmark(model):
     device_count = torch.cuda.device_count()
@@ -118,7 +119,7 @@ async def run_bench_command(command):
 
     tasks = []
     for i in range(device_count):
-        bench_command = generate_bench_command(ip_ports=f"127.0.0.1:{base_port+i}", model=model, num_prompts=30,
+        bench_command = generate_bench_command(ip_ports=f"127.0.0.1:{base_port+i}", model=model, num_prompts=300,
                                                dataset_type="sharegpt",
                                                dataset_path="/mnt/dataset/sharegpt_gpt4/sharegpt_gpt4.jsonl" ,
                                                qps=30,

diff --git a/tests/e2e_test/test_e2e.py b/tests/e2e_test/test_e2e.py
@@ -16,6 +16,7 @@
 import pytest
 import aiohttp
 import ray
+import torch
 
 from vllm import LLM, SamplingParams
 
@@ -106,6 +107,7 @@ def run_vllm(model, max_model_len, sampling_params):
     return vllm_output
 
 @pytest.mark.asyncio
+@pytest.mark.skipif(torch.cuda.device_count() < 1, reason="at least 1 gpus required for e2e test")
 @pytest.mark.parametrize("model", ['/mnt/model/Qwen-7B'])
 @pytest.mark.parametrize("migration_backend", ['rpc', 'gloo', 'nccl'])
 async def test_e2e(model, migration_backend):
@@ -130,11 +132,11 @@ async def test_e2e(model, migration_backend):
                                           timeout=60*5)
         llumnix_output[prompt] = response['text'][0]
 
-    # compare
-    vllm_output = ray.get(run_vllm.remote())
-
     shutdown_llumnix_service()
+
+    vllm_output = ray.get(run_vllm.remote(model, max_model_len, sampling_params))
     clear_ray_state()
 
+    # compare
     for prompt in prompts:
         assert llumnix_output[prompt] == vllm_output[prompt]
diff --git a/tests/e2e_test/test_migration.py b/tests/e2e_test/test_migration.py
@@ -74,7 +74,7 @@ async def run_bench_command(command):
         await process.wait()
         assert process.returncode == 0
 
-    bench_command = generate_bench_command(ip_ports=f"127.0.0.1:{base_port}", model=model, num_prompts=30,
+    bench_command = generate_bench_command(ip_ports=f"127.0.0.1:{base_port}", model=model, num_prompts=300,
                                             dataset_type="sharegpt",
                                             dataset_path="/mnt/dataset/sharegpt_gpt4/sharegpt_gpt4.jsonl" ,
                                             qps=30)

diff --git a/tests/e2e_test/utils.py b/tests/e2e_test/utils.py
@@ -25,5 +25,5 @@ def to_markdown_table(data):
         data_row = " | ".join(f"{str(item):<{col_widths[i]}}" for i, item in enumerate(row))
         data_rows.append(data_row)
 
-    table = f"{header_row}\n{separator_row}\n" + "\n".join(data_rows)
+    table = f"{header_row}\n{separator_row}\n" + "\n".join(data_rows) + "\n\n"
     return table
diff --git a/tools/bench_test.sh b/tools/bench_test.sh
@@ -3,4 +3,4 @@ set -ex
 
 nvidia-docker run --rm -t --net host --ipc host -v ${PWD}:/workspace -v /mnt:/mnt -w /workspace \
   registry.cn-beijing.aliyuncs.com/llumnix/llumnix-dev:20240909_action_678a439 \
-  bash -c "pip install -e . > /dev/null && pytest -s ./tests/e2e_test/test_bench.py"
+  bash -c "pip install -e . > /dev/null && pytest -v ./tests/e2e_test/test_bench.py"
diff --git a/tools/e2e_test.sh b/tools/e2e_test.sh
@@ -3,4 +3,4 @@ set -ex
 
 nvidia-docker run --rm -t --net host --ipc host -v ${PWD}:/workspace -v /mnt:/mnt -w /workspace \
   registry.cn-beijing.aliyuncs.com/llumnix/llumnix-dev:20240909_action_678a439 \
-  bash -c "pip install -e . > /dev/null && pytest ./tests/e2e_test/test_e2e.py"
+  bash -c "pip install -e . > /dev/null && pytest -v ./tests/e2e_test/test_e2e.py"
diff --git a/tools/migration_test.sh b/tools/migration_test.sh
@@ -3,4 +3,4 @@ set -ex
 
 nvidia-docker run --rm -t --net host --ipc host -v ${PWD}:/workspace -v /mnt:/mnt -w /workspace \
   registry.cn-beijing.aliyuncs.com/llumnix/llumnix-dev:20240909_action_678a439 \
-  bash -c "pip install -e . > /dev/null && pytest -s ./tests/e2e_test/test_migration.py"
+  bash -c "pip install -e . > /dev/null && pytest -v ./tests/e2e_test/test_migration.py"