feat: return output_token_ids in generate api

xjpang · Nov 22, 2023 · a722522 · a722522
1 parent a74a2c5
commit a722522
Showing 1 changed file with 6 additions and 3 deletions.
diff --git a/vllm/entrypoints/api_server.py b/vllm/entrypoints/api_server.py
@@ -2,9 +2,9 @@
 import json
 from typing import AsyncGenerator
 
+import uvicorn
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse, Response, StreamingResponse
-import uvicorn
 
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.engine.async_llm_engine import AsyncLLMEngine
@@ -55,7 +55,9 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
             text_outputs = [
                 output.text for output in request_output.outputs
             ]
-            ret = {"text": text_outputs}
+            output_tokens = [output.token_ids for output in request_output.outputs]
+
+            ret = {"text": text_outputs, "output_token_ids": output_tokens}
             yield (json.dumps(ret) + "\0").encode("utf-8")
 
     if stream:
@@ -72,7 +74,8 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
 
     assert final_output is not None
     text_outputs = [output.text for output in final_output.outputs]
-    ret = {"text": text_outputs}
+    output_tokens = [output.token_ids for output in final_output.outputs]
+    ret = {"text": text_outputs, "output_token_ids": output_tokens}
     return JSONResponse(ret)