diff --git a/Makefile b/Makefile index a071a8d0..13294888 100644 --- a/Makefile +++ b/Makefile @@ -21,15 +21,15 @@ install: .PHONY: lint lint: check_pylint_installed check_pytest_installed - @pylint --rcfile=.pylintrc -s n --jobs=128 ./llumnix - + @pylint --rcfile=.pylintrc -s n --jobs=128 ./llumnix + @pylint --rcfile=.pylintrc \ --disable=protected-access,super-init-not-called,unused-argument,redefined-outer-name,invalid-name \ -s n --jobs=128 ./tests .PHONY: test test: check_pytest_installed - @pytest -v --ignore=third_party/ --ignore=tests/e2e_test --disable-warnings + @pytest -v --ignore=third_party --ignore=tests/e2e_test --disable-warnings @python examlpes/offline_inference.py @pytest -v -x -s --tb=long ./tests/e2e_test/test_e2e.py @pytest -v -x -s --tb=long ./tests/e2e_test/test_bench.py @@ -37,7 +37,7 @@ test: check_pytest_installed .PHONY: unit_test unit_test: check_pytest_installed - @pytest -v --ignore=third_party/ --ignore=tests/e2e_test --disable-warnings + @pytest -v --ignore=third_party --ignore=tests/e2e_test --disable-warnings .PHONY: offline_test offline_test: diff --git a/llumnix/backends/utils.py b/llumnix/backends/utils.py index 16e4da4d..ba5c2b57 100644 --- a/llumnix/backends/utils.py +++ b/llumnix/backends/utils.py @@ -40,10 +40,7 @@ def initialize_placement_group( """Initialize the distributed cluster probably with Ray. Args: - parallel_config: The configurations for parallel execution. - engine_use_ray: Whether to use Ray for async engine. - ray_address: The address of the Ray cluster. If None, uses - the default Ray cluster address. + world_size: The number of workers in Llumlet. Returns: A tuple of (`distributed_init_method`, `placement_group`). The diff --git a/tests/unit_test/entrypoints/vllm/api_server_manager_service.py b/tests/unit_test/entrypoints/vllm/api_server_manager_service.py index 6671bb80..10f802a0 100644 --- a/tests/unit_test/entrypoints/vllm/api_server_manager_service.py +++ b/tests/unit_test/entrypoints/vllm/api_server_manager_service.py @@ -12,6 +12,7 @@ # limitations under the License. import argparse +import time import uvicorn import ray from ray.util.queue import Queue as RayQueue @@ -32,7 +33,6 @@ engine_manager = None MANAGER_ACTOR_NAME = llumnix.llm_engine_manager.MANAGER_ACTOR_NAME ENTRYPOINTS_ACTOR_NAME = "entrypoints" -request_output_queue = RayQueue() @ray.remote(num_cpus=0, lifetime="detached") @@ -110,8 +110,11 @@ def stats() -> Response: parser = EngineManagerArgs.add_cli_args(parser) args = parser.parse_args() + # magic actor, without this actor, FastAPIServer cannot initialize correctly. + # If this actor is placed globally, pylint will hangs if testing api_server_manager and api_server_service concurrently (--jobs > 1). + request_output_queue = RayQueue() + request_output_queue_type = QueueType(args.request_output_queue_type) engine_manager = init_manager_service(request_output_queue_type, args) - - import time - time.sleep(5) + + time.sleep(2)