Skip to content

Commit

Permalink
[Fix] Add node id to BackendSim (#64)
Browse files Browse the repository at this point in the history
  • Loading branch information
ZeldaHuang authored Nov 4, 2024
1 parent 8d32ee9 commit 188b08e
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 1 deletion.
4 changes: 3 additions & 1 deletion llumnix/backends/vllm/simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,16 @@ def __init__(
migration_config: MigrationConfig,
profiling_result_file_path: str,
engine_args: EngineArgs,
node_id: str = None,
) -> None:
# multi-instance args
latency_mem = self._get_lantecy_mem(profiling_result_file_path, engine_args)
self.engine: LLMEngineLlumnix = LLMEngineLlumnix.from_engine_args(engine_args=engine_args,
output_queue_type=output_queue_type,
migration_config=migration_config,
instance_id=instance_id,
latency_mem=latency_mem)
latency_mem=latency_mem,
node_id=node_id)
self.engine.scheduler = SchedulerLlumnix(self.engine.scheduler_config, self.engine.cache_config, self.engine.lora_config)
self.engine.scheduler.add_update_instance_info_callback(self.engine.update_instance_info)
self.engine.output_processor.scheduler = self.engine.scheduler
Expand Down
20 changes: 20 additions & 0 deletions tests/unit_test/global_scheduler/test_llm_engine_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from llumnix.server_info import ServerInfo
from llumnix.queue.queue_type import QueueType
from llumnix.global_scheduler.scaling_scheduler import InstanceType
from llumnix.backends.vllm.simulator import BackendSimVLLM

# pylint: disable=unused-import
from tests.conftest import setup_ray_env
Expand Down Expand Up @@ -81,6 +82,13 @@ def migrate_out(self, src_instance_name, dst_instance_name):
def get_num_migrate_out(self):
return self.num_migrate_out

class MockBackendSim(BackendSimVLLM):

def _get_lantecy_mem(self, *args, **kwargs):
latency_mem = LatencyMemData({}, {}, {})
latency_mem.prefill_model_params = (0,0)
latency_mem.decode_model_params = (0,0,0)
return latency_mem

def init_manager():
try:
Expand Down Expand Up @@ -138,6 +146,18 @@ def test_init_llumlets(setup_ray_env, engine_manager):
engine_manager_args = EngineManagerArgs()
assert num_instances == engine_manager_args.initial_instances

def test_init_llumlets_sim(setup_ray_env, engine_manager):
engine_manager.profiling_result_file_path="//"
# pylint: disable=import-outside-toplevel
import llumnix.backends.vllm.simulator
llumnix.backends.vllm.simulator.BackendSimVLLM = MockBackendSim
engine_args = EngineArgs(model="facebook/opt-125m", worker_use_ray=True)
node_id = ray.get_runtime_context().get_node_id()
instance_ids, llumlets = ray.get(engine_manager.init_llumlets.remote(engine_args, node_id, QueueType("rayqueue")))
num_instances = ray.get(engine_manager.scale_up.remote(instance_ids, llumlets))
engine_manager_args = EngineManagerArgs()
assert num_instances == engine_manager_args.initial_instances

def test_scale_up_and_down(setup_ray_env, engine_manager):
initial_instances = 4
instance_ids, llumlets = init_llumlets(initial_instances)
Expand Down

0 comments on commit 188b08e

Please sign in to comment.