Skip to content

Commit

Permalink
feat: return output_token_ids in generate api
Browse files Browse the repository at this point in the history
  • Loading branch information
jimpang committed Nov 22, 2023
1 parent a74a2c5 commit a722522
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions vllm/entrypoints/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import json
from typing import AsyncGenerator

import uvicorn
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse, Response, StreamingResponse
import uvicorn

from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
Expand Down Expand Up @@ -55,7 +55,9 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
text_outputs = [
output.text for output in request_output.outputs
]
ret = {"text": text_outputs}
output_tokens = [output.token_ids for output in request_output.outputs]

ret = {"text": text_outputs, "output_token_ids": output_tokens}
yield (json.dumps(ret) + "\0").encode("utf-8")

if stream:
Expand All @@ -72,7 +74,8 @@ async def stream_results() -> AsyncGenerator[bytes, None]:

assert final_output is not None
text_outputs = [output.text for output in final_output.outputs]
ret = {"text": text_outputs}
output_tokens = [output.token_ids for output in final_output.outputs]
ret = {"text": text_outputs, "output_token_ids": output_tokens}
return JSONResponse(ret)


Expand Down

0 comments on commit a722522

Please sign in to comment.