From 1b4224ca76c78000cf42e7a720e11ded7f6217e3 Mon Sep 17 00:00:00 2001 From: s5u13b Date: Thu, 7 Nov 2024 08:41:39 +0000 Subject: [PATCH] Fix index error when FCWSR --- llumnix/llm_engine_manager.py | 5 +++-- llumnix/llumlet/local_migration_scheduler.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/llumnix/llm_engine_manager.py b/llumnix/llm_engine_manager.py index 59bcec7b..df77f7a1 100644 --- a/llumnix/llm_engine_manager.py +++ b/llumnix/llm_engine_manager.py @@ -230,14 +230,15 @@ async def migrate_done_callback(ret, migrate_instance_pair: Tuple[str, str]) -> self.instance_migrating[migrate_instance_pair[0]] = False if migrate_instance_pair[1] in self.instance_migrating: self.instance_migrating[migrate_instance_pair[1]] = False - if isinstance(ret, (ray.exceptions.RayActorError, KeyError)): + # TODO(s5u13b): Add more exception types for failover. + if isinstance(ret, (ray.exceptions.RayActorError, ray.exceptions.RayTaskError, KeyError)): has_error_pair = await self._check_instance_error(migrate_instance_pair) for i, has_error in enumerate(has_error_pair): # Instance without error should clear migration states. if not has_error: try: await self.instances[migrate_instance_pair[i]].clear_migration_states.remote(is_migrate_in=bool(i)) - except (ray.exceptions.RayActorError, KeyError): + except (ray.exceptions.RayActorError, ray.exceptions.RayTaskError, KeyError): has_error = True for i, has_error in enumerate(has_error_pair): if has_error: diff --git a/llumnix/llumlet/local_migration_scheduler.py b/llumnix/llumlet/local_migration_scheduler.py index a6191606..4f30f850 100644 --- a/llumnix/llumlet/local_migration_scheduler.py +++ b/llumnix/llumlet/local_migration_scheduler.py @@ -97,5 +97,6 @@ def _get_first_waiting_request(self, min_request_len, max_request_len) -> List[L def _get_first_waiting_and_shortest_running_requests(self, min_request_len, max_request_len) -> List[LlumnixRequest]: waiting_requests = self._get_first_waiting_request(min_request_len, max_request_len) running_requests = self._get_shortest_running_request(min_request_len, max_request_len) - waiting_requests[0].eom = True + if waiting_requests: + waiting_requests[0].eom = True return waiting_requests + running_requests