diff --git a/llumnix/backends/profiling.py b/llumnix/backends/profiling.py index ce88ec08..7fef37df 100644 --- a/llumnix/backends/profiling.py +++ b/llumnix/backends/profiling.py @@ -182,7 +182,7 @@ def model_decode(x, a, b, c): def get_latency_mem(backend_type: BackendType, profiling_database: ProfilingDatabase, gpu_type: str, **backend_args): assert BackendType.is_sim_backend(backend_type) if backend_type == BackendType.SIM_VLLM: - # TODO(ziming) support multi-lora, more device, vision language model + # TODO(ZeldaHuang): support multi-lora, more device, vision language model model_config = backend_args.get("model_config") _ = backend_args.get("cache_config") parallel_config = backend_args.get("parallel_config") diff --git a/llumnix/backends/vllm/executor.py b/llumnix/backends/vllm/executor.py index c3fd183a..411b8553 100644 --- a/llumnix/backends/vllm/executor.py +++ b/llumnix/backends/vllm/executor.py @@ -160,7 +160,7 @@ def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.last_inference_latency = 0 self.migration_bandwidth = self.latency_mem.migration_bandwidth - # TODO(ziming) add swap bandwidth + # TODO(ZeldaHuang): add swap bandwidth self.cache_block_size = get_cache_block_size( self.cache_config.block_size, self.model_config, self.parallel_config) diff --git a/llumnix/backends/vllm/scheduler.py b/llumnix/backends/vllm/scheduler.py index de4175f9..36a216c1 100644 --- a/llumnix/backends/vllm/scheduler.py +++ b/llumnix/backends/vllm/scheduler.py @@ -29,7 +29,7 @@ logger = init_logger(__name__) -# TODO(ziming) adapt prefix cache and sliding window, now use v1 manager +# TODO(ZeldaHuang): adapt prefix cache and sliding window, now use v1 manager class BlockManagerLlumnix(BlockSpaceManagerV1): def get_free_blocks(self, num_required_blocks: int) -> BlockTable: num_free_gpu_blocks = self.gpu_allocator.get_num_free_blocks() @@ -87,7 +87,7 @@ def get_last_running_request(self) -> Optional[MigratingRequest]: @scheduler_lock def get_longest_running_request(self) -> Optional[MigratingRequest]: - # TODO(ziming) use for loop find request + # TODO(ZeldaHuang): use for loop find request sorted_running = sorted(self.running, key=lambda seq_group: seq_group.get_seqs()[0].get_len()) for seq_group in reversed(sorted_running): if seq_group not in self.prefilling_seq_groups: diff --git a/llumnix/entrypoints/llumnix_utils.py b/llumnix/entrypoints/llumnix_utils.py index cb86daf0..437adbf7 100644 --- a/llumnix/entrypoints/llumnix_utils.py +++ b/llumnix/entrypoints/llumnix_utils.py @@ -19,7 +19,7 @@ logger = init_logger(__name__) -# TODO(yiwang): Set the values through tests. +# TODO(s5u13b): Set the values through tests. MAX_RESTARTS = 30 RESTART_INTERVALS = 1 MAX_TASK_RETRIES = 300 diff --git a/llumnix/entrypoints/vllm/api_server.py b/llumnix/entrypoints/vllm/api_server.py index 344b879b..112043f7 100644 --- a/llumnix/entrypoints/vllm/api_server.py +++ b/llumnix/entrypoints/vllm/api_server.py @@ -187,7 +187,7 @@ async def generate_benchmark(request: Request) -> Response: global num_finished_request if log_requests: - # TODO(yiwang): Use logger. + # TODO(s5u13b): Use logger. print(f"Finished request {request_id}.") num_finished_request += 1 print(f"num_finished_request {num_finished_request}.") diff --git a/llumnix/global_scheduler/scale_scheduler.py b/llumnix/global_scheduler/scale_scheduler.py index 1610917d..ee37cc12 100644 --- a/llumnix/global_scheduler/scale_scheduler.py +++ b/llumnix/global_scheduler/scale_scheduler.py @@ -72,7 +72,7 @@ def get_empty_instance_info(self) -> InstanceInfo: dummy_intance_info = InstanceInfo() dummy_intance_info.instance_id = -1 dummy_intance_info.step_id = -1 - # TODO(yiwang): Should be changed for proactive auto-scaling. + # TODO(s5u13b): Should be changed for proactive auto-scaling. dummy_intance_info.num_total_gpu_block = np.inf dummy_intance_info.num_available_gpu_block = np.inf dummy_intance_info.num_free_gpu_block = np.inf diff --git a/llumnix/llumlet/local_migration_scheduler.py b/llumnix/llumlet/local_migration_scheduler.py index a3cf2370..f59c5a64 100644 --- a/llumnix/llumlet/local_migration_scheduler.py +++ b/llumnix/llumlet/local_migration_scheduler.py @@ -21,7 +21,7 @@ def __init__(self, migrate_policy: str, backend_engine: BackendInterface) -> Non self.backend_engine = backend_engine def get_migrate_out_request(self) -> Optional[MigratingRequest]: - # TODO(yiwang): remove the if-else codes + # TODO(s5u13b): remove the if-else codes migrate_out_request: MigratingRequest = None if self.migrate_policy == 'LCFS': migrate_out_request = self.backend_engine.get_last_running_request() diff --git a/llumnix/llumlet/migration_coordinator.py b/llumnix/llumlet/migration_coordinator.py index 2c5438b4..dc12b8fa 100644 --- a/llumnix/llumlet/migration_coordinator.py +++ b/llumnix/llumlet/migration_coordinator.py @@ -79,7 +79,7 @@ def migrate_out_onestage(self, migrate_in_ray_actor: "ray.actor.ActorHandle", m # do stage send/recv migrate_out_request.stage_timestamps.append(time.time()) migrate_out_request.stage_num_blocks_list.append(stage_block_num) - # TODO(ziming) send_blocks in migrate_in_pre_alloc/migrate_in_last_stage + # TODO(ZeldaHuang): send_blocks in migrate_in_pre_alloc/migrate_in_last_stage self.backend_engine.send_blocks(migrate_in_ray_actor, src_blocks, dst_blocks) if not is_last_stage and self.backend_engine.should_abort_migration(migrate_out_request.backend_request, \ migrate_out_request.stage_timestamps[-1]):