Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: AlibabaPAI/llumnix
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 4e67e50f7c3c580b3844da38037d5a9cc9986458
Choose a base ref
..
head repository: AlibabaPAI/llumnix
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 3e840db47cdd0ada4e7720d346f3a8ca2793d0b7
Choose a head ref
Showing with 13 additions and 10 deletions.
  1. +1 −1 Makefile
  2. +2 −1 tests/e2e_test/test_bench.py
  3. +5 −3 tests/e2e_test/test_e2e.py
  4. +1 −1 tests/e2e_test/test_migration.py
  5. +1 −1 tests/e2e_test/utils.py
  6. +1 −1 tools/bench_test.sh
  7. +1 −1 tools/e2e_test.sh
  8. +1 −1 tools/migration_test.sh
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -29,7 +29,7 @@ lint: check_pylint_installed check_pytest_installed

.PHONY: test
test: check_pytest_installed
@pytest -x --ignore=third_party/ --ignore=tests/e2e_test --disable-warnings
@pytest -x -v --ignore=third_party/ --ignore=tests/e2e_test --disable-warnings

#################### pygloo install for gloo migration backend begin ####################

3 changes: 2 additions & 1 deletion tests/e2e_test/test_bench.py
Original file line number Diff line number Diff line change
@@ -100,6 +100,7 @@ def parse_log_file():
return to_markdown_table(data)

@pytest.mark.asyncio
@pytest.mark.skipif(torch.cuda.device_count() < 1, reason="at least 1 gpus required for simple benchmark")
@pytest.mark.parametrize("model", ['/mnt/model/Qwen-7B'])
async def test_simple_benchmark(model):
device_count = torch.cuda.device_count()
@@ -118,7 +119,7 @@ async def run_bench_command(command):

tasks = []
for i in range(device_count):
bench_command = generate_bench_command(ip_ports=f"127.0.0.1:{base_port+i}", model=model, num_prompts=30,
bench_command = generate_bench_command(ip_ports=f"127.0.0.1:{base_port+i}", model=model, num_prompts=300,
dataset_type="sharegpt",
dataset_path="/mnt/dataset/sharegpt_gpt4/sharegpt_gpt4.jsonl" ,
qps=30,
8 changes: 5 additions & 3 deletions tests/e2e_test/test_e2e.py
Original file line number Diff line number Diff line change
@@ -16,6 +16,7 @@
import pytest
import aiohttp
import ray
import torch

from vllm import LLM, SamplingParams

@@ -106,6 +107,7 @@ def run_vllm(model, max_model_len, sampling_params):
return vllm_output

@pytest.mark.asyncio
@pytest.mark.skipif(torch.cuda.device_count() < 1, reason="at least 1 gpus required for e2e test")
@pytest.mark.parametrize("model", ['/mnt/model/Qwen-7B'])
@pytest.mark.parametrize("migration_backend", ['rpc', 'gloo', 'nccl'])
async def test_e2e(model, migration_backend):
@@ -130,11 +132,11 @@ async def test_e2e(model, migration_backend):
timeout=60*5)
llumnix_output[prompt] = response['text'][0]

# compare
vllm_output = ray.get(run_vllm.remote())

shutdown_llumnix_service()

vllm_output = ray.get(run_vllm.remote(model, max_model_len, sampling_params))
clear_ray_state()

# compare
for prompt in prompts:
assert llumnix_output[prompt] == vllm_output[prompt]
2 changes: 1 addition & 1 deletion tests/e2e_test/test_migration.py
Original file line number Diff line number Diff line change
@@ -74,7 +74,7 @@ async def run_bench_command(command):
await process.wait()
assert process.returncode == 0

bench_command = generate_bench_command(ip_ports=f"127.0.0.1:{base_port}", model=model, num_prompts=30,
bench_command = generate_bench_command(ip_ports=f"127.0.0.1:{base_port}", model=model, num_prompts=300,
dataset_type="sharegpt",
dataset_path="/mnt/dataset/sharegpt_gpt4/sharegpt_gpt4.jsonl" ,
qps=30)
2 changes: 1 addition & 1 deletion tests/e2e_test/utils.py
Original file line number Diff line number Diff line change
@@ -25,5 +25,5 @@ def to_markdown_table(data):
data_row = " | ".join(f"{str(item):<{col_widths[i]}}" for i, item in enumerate(row))
data_rows.append(data_row)

table = f"{header_row}\n{separator_row}\n" + "\n".join(data_rows)
table = f"{header_row}\n{separator_row}\n" + "\n".join(data_rows) + "\n\n"
return table
2 changes: 1 addition & 1 deletion tools/bench_test.sh
Original file line number Diff line number Diff line change
@@ -3,4 +3,4 @@ set -ex

nvidia-docker run --rm -t --net host --ipc host -v ${PWD}:/workspace -v /mnt:/mnt -w /workspace \
registry.cn-beijing.aliyuncs.com/llumnix/llumnix-dev:20240909_action_678a439 \
bash -c "pip install -e . > /dev/null && pytest -s ./tests/e2e_test/test_bench.py"
bash -c "pip install -e . > /dev/null && pytest -v ./tests/e2e_test/test_bench.py"
2 changes: 1 addition & 1 deletion tools/e2e_test.sh
Original file line number Diff line number Diff line change
@@ -3,4 +3,4 @@ set -ex

nvidia-docker run --rm -t --net host --ipc host -v ${PWD}:/workspace -v /mnt:/mnt -w /workspace \
registry.cn-beijing.aliyuncs.com/llumnix/llumnix-dev:20240909_action_678a439 \
bash -c "pip install -e . > /dev/null && pytest ./tests/e2e_test/test_e2e.py"
bash -c "pip install -e . > /dev/null && pytest -v ./tests/e2e_test/test_e2e.py"
2 changes: 1 addition & 1 deletion tools/migration_test.sh
Original file line number Diff line number Diff line change
@@ -3,4 +3,4 @@ set -ex

nvidia-docker run --rm -t --net host --ipc host -v ${PWD}:/workspace -v /mnt:/mnt -w /workspace \
registry.cn-beijing.aliyuncs.com/llumnix/llumnix-dev:20240909_action_678a439 \
bash -c "pip install -e . > /dev/null && pytest -s ./tests/e2e_test/test_migration.py"
bash -c "pip install -e . > /dev/null && pytest -v ./tests/e2e_test/test_migration.py"