Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Entrypoints][Refactor] Refactor llumnix entrypoints to be more modular #55

Merged
merged 12 commits into from
Oct 17, 2024
2 changes: 1 addition & 1 deletion configs/base.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SERVER:
HOST: '127.0.0.1'
PORT: 37000
PORT: 1234
QUEUE_TYPE: "rayqueue"

RAY:
Expand Down
2 changes: 1 addition & 1 deletion examlpes/offline_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
ray_cluster_port=6379

# Note: launch_ray_cluster will stop current ray cluster first, then init a new one.
launch_ray_cluster(ray_cluster_port=ray_cluster_port)
launch_ray_cluster(port=ray_cluster_port)
connect_to_ray_cluster(port=ray_cluster_port)

# Set manager args and engine args.
Expand Down
6 changes: 4 additions & 2 deletions llumnix/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
from vllm import *

from llumnix.server_info import ServerInfo
from llumnix.entrypoints.llumnix_utils import (launch_ray_cluster, connect_to_ray_cluster,
init_manager, init_llumlets)
from llumnix.entrypoints.utils import (launch_ray_cluster,
connect_to_ray_cluster,
init_manager,
init_llumlets)
from llumnix.arg_utils import EngineManagerArgs
from llumnix.llm_engine_manager import LLMEngineManager
from llumnix.llumlet.llumlet import Llumlet
Expand Down
79 changes: 79 additions & 0 deletions llumnix/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,84 @@
from llumnix.config import LlumnixConfig, get_llumnix_config
from llumnix.config.default import _C


class LlumnixArgumentParser(argparse.ArgumentParser):
def __init__(self, *args, **kwargs):
self.cur_namespace = "llumnix"
super().__init__(*args, **kwargs)

def set_namespace(self, namespace: str):
self.cur_namespace = namespace

def add_argument(self, *args, **kwargs):
if self.cur_namespace == 'llumnix' and "--help" not in args:
assert 'default' not in kwargs or kwargs['default'] is None, \
f"Do not set the default value for '{args[0]}' in CLI, or set default value to None. " \
f"The default value will be retrieved from config/default.py in get_llumnix_config."
if kwargs.get('action') == 'store_true':
kwargs['default'] = None
super().add_argument(*args, **kwargs)


# All the default values of llumnix arguments are set in default.py. So all the arguments here are set to None.

@dataclass
class LlumnixEntrypointsArgs:
launch_ray_cluster: bool = None
ray_cluster_port: int = None
queue_type: str = None
request_output_queue_port: int = None
disable_log_requests_server: bool = None
log_request_timestamps: bool = None
config_file: bool = None

def __post_init__(self):
for attr in dataclasses.fields(self):
if getattr(self, attr.name) is None:
setattr(self, attr.name, getattr(_C.SERVER, attr.name.upper()))
s5u13b marked this conversation as resolved.
Show resolved Hide resolved

@classmethod
def from_llumnix_config(cls, cfg: LlumnixConfig = get_llumnix_config()) -> 'LlumnixEntrypointsArgs':
# Get the list of attributes of this dataclass.
attrs = [attr.name for attr in dataclasses.fields(cls)]
# Set the attributes from the parsed arguments.
# The defalut values of attributes are defined in default.py.
llumnix_entrypoints_args = cls(**{attr: getattr(cfg.SERVER, attr.upper()) for attr in attrs})
return llumnix_entrypoints_args

@classmethod
def check_args(cls, args: 'LlumnixEntrypointsArgs', parser: argparse.ArgumentParser):
# pylint: disable=protected-access
for action in parser._optionals._actions:
if hasattr(action, 'choices') and action.choices is not None and hasattr(args, action.dest):
assert getattr(args, action.dest) in action.choices, f"{action.dest} should be one of {action.choices}."

@staticmethod
def add_cli_args(parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
parser.add_argument('--launch-ray-cluster',
action='store_true',
help='if launch ray cluster in api server')
parser.add_argument("--ray-cluster-port",
type=int,
help='ray cluster port')
parser.add_argument("--queue-type",
type=str,
choices=['rayqueue', 'zmq'],
help='queue type for request output queue')
parser.add_argument("--request-output-queue-port",
type=int,
help='port for zmq')
parser.add_argument('--disable-log-requests-server',
action='store_true',
help='disable logging requests in server')
parser.add_argument("--log-request-timestamps",
action='store_true',
help='if log request timestamps')
parser.add_argument("--config-file",
type=str,
help="path to config file")
return parser

@dataclass
class EngineManagerArgs:
disable_init_instance_by_manager: bool = None
Expand Down Expand Up @@ -106,6 +184,7 @@ def from_llumnix_config(cls, cfg: LlumnixConfig = get_llumnix_config()) -> 'Engi
# Get the list of attributes of this dataclass.
attrs = [attr.name for attr in dataclasses.fields(cls)]
# Set the attributes from the parsed arguments.
# The defalut values of attributes are defined in default.py.
engine_manager_args = cls(**{attr: getattr(cfg.MANAGER, attr.upper()) for attr in attrs})
return engine_manager_args

Expand Down
6 changes: 3 additions & 3 deletions llumnix/backends/vllm/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from llumnix.server_info import ServerInfo
from llumnix.internal_config import MigrationConfig
from llumnix.queue.queue_client_base import QueueClientBase
from llumnix.queue.utils import get_output_queue_client, QueueType
from llumnix.queue.utils import init_output_queue_client, QueueType

logger = init_logger(__name__)

Expand All @@ -48,7 +48,7 @@ class AsyncPutQueueActor:
def __init__(self, instance_id, output_queue_type: QueueType):
self.instance_id = instance_id
self.output_queue_type = output_queue_type
self.request_output_queue_client: QueueClientBase = get_output_queue_client(output_queue_type)
self.request_output_queue_client: QueueClientBase = init_output_queue_client(output_queue_type)
self.engine_actor_handle = None

async def put_nowait_to_servers(self,
Expand Down Expand Up @@ -225,7 +225,7 @@ def step(self) -> None:
tot_blocks = set(tot_blocks)
instance_info.num_blocks_last_running_request = len(tot_blocks)
if request_outputs:
self.put_queue_args_queue.put((request_outputs, server_infos))
self.put_queue_args_queue.put_nowait((request_outputs, server_infos))
self.instance_info = instance_info
for request_output in request_outputs:
if hasattr(request_output, 'request_timestamps'):
Expand Down
3 changes: 3 additions & 0 deletions llumnix/backends/vllm/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ def __init__(self, request_id, server_info, expected_steps: int, *args, **kwargs
def prompt_len(self) -> int:
return self.get_seqs()[0].get_prompt_len()

def is_finished(self) -> bool:
return self.get_seqs()[0].is_finished()

@property
def request_len(self) -> int:
return self.get_seqs()[0].get_len()
Expand Down
19 changes: 10 additions & 9 deletions llumnix/config/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,34 +21,35 @@
_C = LC()

# -----------------------------------------------------------------------------
# API SERVER CONFIGURATION
# SERVER CONFIGURATION
# -----------------------------------------------------------------------------
_C.SERVER = LC()
# Hostname for the server
_C.SERVER.HOST = "localhost"
# Port number for the server
_C.SERVER.PORT = 8000
# Queue type for request output queue
_C.SERVER.QUEUE_TYPE = "rayqueue"
# Port number for the request output queue
_C.SERVER.REQUEST_OUTPUT_QUEUE_PORT = 1234
# Path to SSL key file for secure connections
_C.SERVER.SSL_KEYFILE = None
# Path to SSL certificate file for secure connections
_C.SERVER.SSL_CERTFILE = None
# Queue type for request output queue
_C.SERVER.QUEUE_TYPE = "rayqueue"
# Port number for the request output queue
_C.SERVER.REQUEST_OUTPUT_QUEUE_PORT = 1234
# Disable logging requests in server
_C.SERVER.DISABLE_LOG_REQUESTS_SERVER = False
# Enable logging request timestamp
_C.SERVER.LOG_REQUEST_TIMESTAMPS = False
# Config file of Llumnix arguments
_C.SERVER.CONFIG_FILE = None
s5u13b marked this conversation as resolved.
Show resolved Hide resolved

# -----------------------------------------------------------------------------
# RAY CONFIGURATION
# -----------------------------------------------------------------------------
_C.RAY = LC()
# Port number for the Ray cluster
_C.RAY.RAY_CLUSTER_PORT = 6379
# If True, launch Ray cluster in API server
_C.RAY.LAUNCH_RAY_CLUSTER = False
_C.SERVER.LAUNCH_RAY_CLUSTER = False
# Port number for the Ray cluster
_C.SERVER.RAY_CLUSTER_PORT = 6379

# -----------------------------------------------------------------------------
# MANAGER CONFIGURATION
Expand Down
Loading