Skip to content

Commit

Permalink
Change TODO id
Browse files Browse the repository at this point in the history
  • Loading branch information
s5u13b committed Jul 26, 2024
1 parent 986a617 commit fd47e87
Show file tree
Hide file tree
Showing 8 changed files with 9 additions and 9 deletions.
2 changes: 1 addition & 1 deletion llumnix/backends/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def model_decode(x, a, b, c):
def get_latency_mem(backend_type: BackendType, profiling_database: ProfilingDatabase, gpu_type: str, **backend_args):
assert BackendType.is_sim_backend(backend_type)
if backend_type == BackendType.SIM_VLLM:
# TODO(ziming) support multi-lora, more device, vision language model
# TODO(ZeldaHuang): support multi-lora, more device, vision language model
model_config = backend_args.get("model_config")
_ = backend_args.get("cache_config")
parallel_config = backend_args.get("parallel_config")
Expand Down
2 changes: 1 addition & 1 deletion llumnix/backends/vllm/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.last_inference_latency = 0
self.migration_bandwidth = self.latency_mem.migration_bandwidth
# TODO(ziming) add swap bandwidth
# TODO(ZeldaHuang): add swap bandwidth

self.cache_block_size = get_cache_block_size(
self.cache_config.block_size, self.model_config, self.parallel_config)
Expand Down
4 changes: 2 additions & 2 deletions llumnix/backends/vllm/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

logger = init_logger(__name__)

# TODO(ziming) adapt prefix cache and sliding window, now use v1 manager
# TODO(ZeldaHuang): adapt prefix cache and sliding window, now use v1 manager
class BlockManagerLlumnix(BlockSpaceManagerV1):
def get_free_blocks(self, num_required_blocks: int) -> BlockTable:
num_free_gpu_blocks = self.gpu_allocator.get_num_free_blocks()
Expand Down Expand Up @@ -87,7 +87,7 @@ def get_last_running_request(self) -> Optional[MigratingRequest]:

@scheduler_lock
def get_longest_running_request(self) -> Optional[MigratingRequest]:
# TODO(ziming) use for loop find request
# TODO(ZeldaHuang): use for loop find request
sorted_running = sorted(self.running, key=lambda seq_group: seq_group.get_seqs()[0].get_len())
for seq_group in reversed(sorted_running):
if seq_group not in self.prefilling_seq_groups:
Expand Down
2 changes: 1 addition & 1 deletion llumnix/entrypoints/llumnix_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

logger = init_logger(__name__)

# TODO(yiwang): Set the values through tests.
# TODO(s5u13b): Set the values through tests.
MAX_RESTARTS = 30
RESTART_INTERVALS = 1
MAX_TASK_RETRIES = 300
Expand Down
2 changes: 1 addition & 1 deletion llumnix/entrypoints/vllm/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ async def generate_benchmark(request: Request) -> Response:

global num_finished_request
if log_requests:
# TODO(yiwang): Use logger.
# TODO(s5u13b): Use logger.
print(f"Finished request {request_id}.")
num_finished_request += 1
print(f"num_finished_request {num_finished_request}.")
Expand Down
2 changes: 1 addition & 1 deletion llumnix/global_scheduler/scale_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def get_empty_instance_info(self) -> InstanceInfo:
dummy_intance_info = InstanceInfo()
dummy_intance_info.instance_id = -1
dummy_intance_info.step_id = -1
# TODO(yiwang): Should be changed for proactive auto-scaling.
# TODO(s5u13b): Should be changed for proactive auto-scaling.
dummy_intance_info.num_total_gpu_block = np.inf
dummy_intance_info.num_available_gpu_block = np.inf
dummy_intance_info.num_free_gpu_block = np.inf
Expand Down
2 changes: 1 addition & 1 deletion llumnix/llumlet/local_migration_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def __init__(self, migrate_policy: str, backend_engine: BackendInterface) -> Non
self.backend_engine = backend_engine

def get_migrate_out_request(self) -> Optional[MigratingRequest]:
# TODO(yiwang): remove the if-else codes
# TODO(s5u13b): remove the if-else codes
migrate_out_request: MigratingRequest = None
if self.migrate_policy == 'LCFS':
migrate_out_request = self.backend_engine.get_last_running_request()
Expand Down
2 changes: 1 addition & 1 deletion llumnix/llumlet/migration_coordinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def migrate_out_onestage(self, migrate_in_ray_actor: "ray.actor.ActorHandle", m
# do stage send/recv
migrate_out_request.stage_timestamps.append(time.time())
migrate_out_request.stage_num_blocks_list.append(stage_block_num)
# TODO(ziming) send_blocks in migrate_in_pre_alloc/migrate_in_last_stage
# TODO(ZeldaHuang): send_blocks in migrate_in_pre_alloc/migrate_in_last_stage
self.backend_engine.send_blocks(migrate_in_ray_actor, src_blocks, dst_blocks)
if not is_last_stage and self.backend_engine.should_abort_migration(migrate_out_request.backend_request, \
migrate_out_request.stage_timestamps[-1]):
Expand Down

0 comments on commit fd47e87

Please sign in to comment.