Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
Xinyi-ECNU committed Nov 5, 2024
1 parent 297ff91 commit f6c496c
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 4 deletions.
8 changes: 7 additions & 1 deletion docs/Arguments.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ usage: -m llumnix.entrypoints.vllm.api_server [-h]
[--migration-num-layers MIGRATION_NUM_LAYERS]
[--last-stage-max-blocks LAST_STAGE_MAX_BLOCKS]
[--max-stages MAX_STAGES]
[--enable-pd-disagg ENABLE_PD_DISAGG]
[--enable-pd-disagg]
[--num-dispatch-instances NUM_DISPATCH_INSTANCES]
[--log-request-timestamps]
Expand Down Expand Up @@ -170,6 +170,12 @@ usage: -m llumnix.entrypoints.vllm.api_server [-h]
`--log-request-timestamps`
- Enable logging request timestamps.

`--enable-pd-disagg`
- Enable prefill decoding disaggregation.

`--num-dispatch-instances`
- Number of available instances for dispatch.

# Unsupported vLLM feature options

`--device`
Expand Down
2 changes: 1 addition & 1 deletion llumnix/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def add_cli_args(
type=int,
help='drop migration if the number of stages > max_stages')
parser.add_argument('--enable-pd-disagg',
type=bool,
action='store_true',
help='enable prefill decoding disaggregation')
parser.add_argument('--num-dispatch-instances',
type=int,
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e_test/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def generate_launch_command(result_filename: str = "", launch_ray_cluster: bool
f"--tensor-parallel-size 1 "
f"--request-output-queue-port {1234+port} "
f"{'--enable-pd-disagg ' if enable_pd_disagg else ''} "
f"{'--num-dispatch-instances ' if num_dispatch_instances!=math.inf else ''} "
f"{'--num-dispatch-instances {num_dispatch_instances} ' if num_dispatch_instances!=math.inf else ''} "
f"{'--launch-ray-cluster ' if launch_ray_cluster else ''}"
f"{'> instance_'+result_filename if len(result_filename)> 0 else ''} 2>&1 &"
)
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e_test/test_migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def parse_manager_log_file(log_file):
@pytest.mark.asyncio
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="at least 2 gpus required for migration bench")
@pytest.mark.parametrize("model", ['/mnt/model/Qwen-7B'])
@pytest.mark.parametrize("migration_backend", ['rpc'])# 'gloo', 'nccl'])
@pytest.mark.parametrize("migration_backend", ['rpc', 'gloo', 'nccl'])
@pytest.mark.parametrize("enable_pd_disagg", [False, True])
async def test_migration_benchmark(model, migration_backend, enable_pd_disagg):
base_port = 37037
Expand Down

0 comments on commit f6c496c

Please sign in to comment.