Skip to content

Commit

Permalink
Fix error handling in service update process (skypilot-org#4034)
Browse files Browse the repository at this point in the history
* Fix error handling in service update process

Fixes skypilot-org#4030

Address error handling inconsistency in service update process.

* **sky/serve/controller.py**
  - Modify `/controller/update_service` endpoint to return appropriate HTTP status codes.
  - Return 400 for client errors and 500 for server errors.
  - Use `responses.JSONResponse` for returning responses.

* **sky/serve/serve_utils.py**
  - Update `update_service_encoded` function to handle different status codes.
  - Raise exceptions based on the response body for 400 and 500 status codes.

* **sky/utils/subprocess_utils.py**
  - Add `stream_logs` parameter in the comment to reflect the code.

* format

* apply to load_balancer_sync for consistency
  • Loading branch information
andylizf authored Oct 6, 2024
1 parent b0a1ea2 commit d5b6d89
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 7 deletions.
21 changes: 14 additions & 7 deletions sky/serve/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import Any, Dict, List

import fastapi
from fastapi import responses
import uvicorn

from sky import serve
Expand Down Expand Up @@ -96,26 +97,30 @@ def _run_autoscaler(self):
def run(self) -> None:

@self._app.post('/controller/load_balancer_sync')
async def load_balancer_sync(request: fastapi.Request):
async def load_balancer_sync(
request: fastapi.Request) -> fastapi.Response:
request_data = await request.json()
# TODO(MaoZiming): Check aggregator type.
request_aggregator: Dict[str, Any] = request_data.get(
'request_aggregator', {})
timestamps: List[int] = request_aggregator.get('timestamps', [])
logger.info(f'Received {len(timestamps)} inflight requests.')
self._autoscaler.collect_request_information(request_aggregator)
return {
return responses.JSONResponse(content={
'ready_replica_urls':
self._replica_manager.get_active_replica_urls()
}
},
status_code=200)

@self._app.post('/controller/update_service')
async def update_service(request: fastapi.Request):
async def update_service(request: fastapi.Request) -> fastapi.Response:
request_data = await request.json()
try:
version = request_data.get('version', None)
if version is None:
return {'message': 'Error: version is not specified.'}
return responses.JSONResponse(
content={'message': 'Error: version is not specified.'},
status_code=400)
update_mode_str = request_data.get(
'mode', serve_utils.DEFAULT_UPDATE_MODE.value)
update_mode = serve_utils.UpdateMode(update_mode_str)
Expand Down Expand Up @@ -144,11 +149,13 @@ async def update_service(request: fastapi.Request):
self._autoscaler.update_version(version,
service,
update_mode=update_mode)
return {'message': 'Success'}
return responses.JSONResponse(content={'message': 'Success'},
status_code=200)
except Exception as e: # pylint: disable=broad-except
logger.error(f'Error in update_service: '
f'{common_utils.format_exception(e)}')
return {'message': 'Error'}
return responses.JSONResponse(content={'message': 'Error'},
status_code=500)

threading.Thread(target=self._run_autoscaler).start()

Expand Down
4 changes: 4 additions & 0 deletions sky/serve/serve_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,10 @@ def update_service_encoded(service_name: str, version: int, mode: str) -> str:
raise ValueError('The service is up-ed in an old version and does not '
'support update. Please `sky serve down` '
'it first and relaunch the service. ')
elif resp.status_code == 400:
raise ValueError(f'Client error during service update: {resp.text}')
elif resp.status_code == 500:
raise RuntimeError(f'Server error during service update: {resp.text}')
elif resp.status_code != 200:
raise ValueError(f'Failed to update service: {resp.text}')

Expand Down
1 change: 1 addition & 0 deletions sky/utils/subprocess_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def handle_returncode(returncode: int,
command: The command that was run.
error_msg: The error message to print.
stderr: The stderr of the command.
stream_logs: Whether to stream logs.
"""
echo = logger.error if stream_logs else logger.debug
if returncode != 0:
Expand Down

0 comments on commit d5b6d89

Please sign in to comment.