Skip to content

Commit

Permalink
[Frontend] Adjust try/except blocks in API impl
Browse files Browse the repository at this point in the history
These were changed to separate blocks in vllm-project#9759 but I feel it's cleaner/clearer as a single block. It actually doesn't matter which parts of the block raise the specific exceptions in the except clauses, we still want to handle them in the same way.
  • Loading branch information
njhill committed Nov 5, 2024
1 parent 235366f commit eade5d6
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 14 deletions.
11 changes: 8 additions & 3 deletions benchmarks/backend_request_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,8 @@ async def async_request_openai_completions(
("completions", "profile")
), "OpenAI Completions API URL must end with 'completions' or 'profile'."

stream = False

async with aiohttp.ClientSession(timeout=AIOHTTP_TIMEOUT) as session:
payload = {
"model": request_func_input.model,
Expand All @@ -238,7 +240,7 @@ async def async_request_openai_completions(
"best_of": request_func_input.best_of,
"max_tokens": request_func_input.output_len,
"logprobs": request_func_input.logprobs,
"stream": True,
"stream": stream,
"ignore_eos": request_func_input.ignore_eos,
}
headers = {
Expand All @@ -263,9 +265,10 @@ async def async_request_openai_completions(

chunk = remove_prefix(chunk_bytes.decode("utf-8"),
"data: ")
if chunk == "[DONE]":
stream_is_done = stream and chunk == "[DONE]"
if not stream or stream_is_done:
latency = time.perf_counter() - st
else:
if not stream_is_done:
data = json.loads(chunk)

# NOTE: Some completion API might have a last
Expand Down Expand Up @@ -379,10 +382,12 @@ async def async_request_openai_chat_completions(
else:
output.error = response.reason or ""
output.success = False
print("Error reason", response.reason)
except Exception:
output.success = False
exc_info = sys.exc_info()
output.error = "".join(traceback.format_exception(*exc_info))
traceback.print_exc()

if pbar:
pbar.update(1)
Expand Down
8 changes: 2 additions & 6 deletions vllm/entrypoints/openai/serving_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,13 +189,7 @@ async def create_completion(
try:
async for i, res in result_generator:
final_res_batch[i] = res
except asyncio.CancelledError:
return self.create_error_response("Client disconnected")
except ValueError as e:
# TODO: Use a vllm-specific Validation Error
return self.create_error_response(str(e))

try:
for i, final_res in enumerate(final_res_batch):
assert final_res is not None

Expand All @@ -217,6 +211,8 @@ async def create_completion(
tokenizer,
request_metadata,
)
except asyncio.CancelledError:
return self.create_error_response("Client disconnected")
except ValueError as e:
# TODO: Use a vllm-specific Validation Error
return self.create_error_response(str(e))
Expand Down
8 changes: 3 additions & 5 deletions vllm/entrypoints/openai/serving_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,19 +205,17 @@ async def create_embedding(
try:
async for i, res in result_generator:
final_res_batch[i] = res
except asyncio.CancelledError:
return self.create_error_response("Client disconnected")

try:
for final_res in final_res_batch:
assert final_res is not None
assert all(final_res is not None for final_res in final_res_batch)

final_res_batch_checked = cast(List[EmbeddingRequestOutput],
final_res_batch)

response = request_output_to_embedding_response(
final_res_batch_checked, request_id, created_time, model_name,
encoding_format)
except asyncio.CancelledError:
return self.create_error_response("Client disconnected")
except ValueError as e:
# TODO: Use a vllm-specific Validation Error
return self.create_error_response(str(e))
Expand Down

0 comments on commit eade5d6

Please sign in to comment.