Skip to content

Commit

Permalink
Update swagger (#20)
Browse files Browse the repository at this point in the history
* add swagger examples

* remove duplicate examples

* remove useless import

* Upgrade vllm version to 0.4.0.post1 and update prometheus client (#19)

* Upgrade vllm version and update prometheus client

* Update tests ffollowing prometheus client update

* update librairies version for mypy

* update librairies version for mypy

* update librairies version for mypy

* update image version for build and test workflows

* update image version for build and test workflows

* update image version for build and test workflows

* update image version for build and test workflows

* update image version for build and test workflows

* update image version for build and test workflows

* update image version for build and test workflows

* Remove torch dependencies and update workflow name

* Update workflow image

* Test by putting the latest release of vllm

---------

Co-authored-by: Maxime Fournioux <[email protected]>
Co-authored-by: Gautier Solard <[email protected]>

* remove useless import

* remove duplicate examples

* order imports

* Add Body to functional

* reorder examples and update sampling params

* fix mypy errors

* fix mypy errors

* update MANIFEST.in

---------

Co-authored-by: Maxime Fournioux <[email protected]>
Co-authored-by: Gautier Solard <[email protected]>
Co-authored-by: Marc de Ponnat <[email protected]>
  • Loading branch information
4 people committed Jun 27, 2024
1 parent 60dbd81 commit fc12f65
Show file tree
Hide file tree
Showing 7 changed files with 373 additions and 23 deletions.
4 changes: 3 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
include version.txt
include version.txt
include src/happy_vllm/routers/schemas/examples/request.json
include src/happy_vllm/routers/schemas/examples/response.json
63 changes: 48 additions & 15 deletions src/happy_vllm/routers/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,16 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.


import os
import json
from fastapi import APIRouter
from vllm.utils import random_uuid
from fastapi import APIRouter, Body
from pydantic import BaseModel, Field
from starlette.requests import Request
from typing import AsyncGenerator, Tuple
from vllm.sampling_params import SamplingParams
from vllm.engine.async_llm_engine import AsyncLLMEngine
from lmformatenforcer import TokenEnforcerTokenizerData
from typing import Annotated, AsyncGenerator, Tuple, List
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
from starlette.responses import JSONResponse, Response, StreamingResponse

Expand All @@ -32,6 +33,14 @@

from ..model.model_base import Model
from ..core.resources import RESOURCE_MODEL, RESOURCES
from .schemas.functional import ResponseGenerate, RequestGenerate, ResponseTokenizer, RequestTokenizer, ResponseDecode, RequestDecode, ResponseSplitText, RequestSplitText, ResponseMetadata, RequestMetadata


# Load the response examples
directory = os.path.dirname(os.path.abspath(__file__))
request_examples_path = os.path.join(directory, "schemas", "examples", "request.json")
with open(request_examples_path, 'r') as file:
request_openapi_examples = json.load(file)

# Functional router
router = APIRouter()
Expand Down Expand Up @@ -108,8 +117,13 @@ def parse_generate_parameters(request_dict: dict, model: AsyncLLMEngine, tokeniz
return prompt, prompt_in_response, sampling_params


@router.post("/generate")
async def generate(request: Request) -> Response:
@router.post("/generate", response_model=ResponseGenerate)
async def generate(
request: Request,
request_type: Annotated[
RequestGenerate,
Body(openapi_examples=request_openapi_examples["generate"])] = None
) -> Response:
"""Generate completion for the request.
The request should be a JSON object with the following fields:
Expand Down Expand Up @@ -145,8 +159,12 @@ async def generate(request: Request) -> Response:
return JSONResponse(ret)


@router.post("/generate_stream")
async def generate_stream(request: Request) -> StreamingResponse:
@router.post("/generate_stream", response_model=ResponseGenerate)
async def generate_stream(request: Request,
request_type: Annotated[
RequestGenerate,
Body(openapi_examples=request_openapi_examples["generate_stream"])] = None
) -> StreamingResponse:
"""Generate completion for the request.
The request should be a JSON object with the following fields:
Expand Down Expand Up @@ -176,8 +194,12 @@ async def stream_results() -> AsyncGenerator[str, None]:
return StreamingResponse(stream_results())


@router.post("/tokenizer")
async def tokenizer(request: Request) -> Response:
@router.post("/tokenizer", response_model=ResponseTokenizer)
async def tokenizer(request: Request,
request_type: Annotated[
RequestTokenizer,
Body(openapi_examples=request_openapi_examples["tokenizer"])] = None
) -> Response:
"""Tokenizes a text
The request should be a JSON object with the following fields:
Expand Down Expand Up @@ -207,8 +229,12 @@ async def tokenizer(request: Request) -> Response:
return JSONResponse(ret)


@router.post("/decode")
async def decode(request: Request) -> Response:
@router.post("/decode", response_model=ResponseDecode)
async def decode(request: Request,
request_type: Annotated[
RequestDecode,
Body(openapi_examples=request_openapi_examples["decode"])] = None
) -> Response:
"""Decodes token ids
The request should be a JSON object with the following fields:
Expand Down Expand Up @@ -238,8 +264,12 @@ async def decode(request: Request) -> Response:
return JSONResponse(ret)


@router.post("/split_text")
async def split_text(request: Request):
@router.post("/split_text", response_model=ResponseSplitText)
async def split_text(request: Request,
request_type: Annotated[
RequestSplitText,
Body(openapi_examples=request_openapi_examples["split_text"])] = None
):
"""Splits a text
The request should be a JSON object with the following fields:
Expand All @@ -256,8 +286,11 @@ async def split_text(request: Request):
return JSONResponse(response)


@router.post("/metadata_text")
async def metadata_text(request: Request):
@router.post("/metadata_text", response_model=ResponseMetadata)
async def metadata_text(request: Request,
request_type: Annotated[
RequestMetadata,
Body(openapi_examples=request_openapi_examples["metadata_text"])] = None):
"""Gives meta data on a text
The request should be a JSON object with the following fields:
Expand Down
123 changes: 123 additions & 0 deletions src/happy_vllm/routers/schemas/examples/request.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
{
"generate": {
"case1": {
"summary": "Case 1",
"description": "This is case 1 description",
"value": {
"prompt": "This is a prompt example",
"sampling_param": "This is a sampling param example"
}
},
"case2": {
"summary": "Case 2",
"description": "This is case 2 description",
"value": {
"prompt": "This is a second prompt example",
"sampling_param": "This is a second sampling param example"
}
}
},
"generate_stream": {
"case1": {
"summary": "Case 1",
"description": "This is case 1 description",
"value": {
"prompt": "This is a prompt example",
"sampling_param": "This is a sampling param example"
}
},
"case2": {
"summary": "Case 2",
"description": "This is case 2 description",
"value": {
"prompt": "This is a second prompt example",
"sampling_param": "This is a second sampling param example"
}
}
},
"tokenizer": {
"case1": {
"summary": "Case 1",
"description": "This is case 1 description",
"value": {
"text": "This is a text example",
"with_token_str": true,
"vanilla": true
}
},
"case2": {
"summary": "Case 2",
"description": "This is case 2 description",
"value": {
"text": "This is a second text example",
"with_token_str": false,
"vanilla": false
}
}
},
"decode": {
"case1": {
"summary": "Case 1",
"description": "This is case 1 description",
"value": {
"token_ids": "This is a token_ids example",
"with_token_str": true,
"vanilla": false
}
},
"case2": {
"summary": "Case 2",
"description": "This is case 2 description",
"value": {
"token_ids": "This is a second token_ids example",
"with_token_str": false,
"vanilla": true
}
}
},
"split_text": {
"case1": {
"summary": "Case 1",
"description": "This is case 1 description",
"value": {
"text": "This is a text example",
"num_tokens_in_chunk": 42,
"separators": [
","
]
}
},
"case2": {
"summary": "Case 2",
"description": "This is case 2 description",
"value": {
"text": "This is a second text example",
"num_tokens_in_chunk": 420,
"separators": [
".",
"!"
]
}
}
},
"metadata_text": {
"case1": {
"summary": "Case 1",
"description": "This is case 1 description",
"value": {
"text": "This is a text example",
"truncation_side": "This is a truncation_side example",
"max_length": 42
}
},
"case2": {
"summary": "Case 2",
"description": "This is case 2 description",
"value": {
"text": "This is a second text example",
"truncation_side": "This is a second truncation_side example",
"max_length": 420
}
}
}
}
48 changes: 48 additions & 0 deletions src/happy_vllm/routers/schemas/examples/response.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"generate": {
"responses": ["response example"],
"finish_reasons": ["finish_reason example"],
"prompt": "prompt example"
},
"generate_stream": {
"responses": ["response example"],
"finish_reasons": ["finish_reason example"],
"prompt": "prompt example"
},
"tokenizer": {
"tokens_ids": [1,45021,3],
"tokens_nb": 3,
"tokens_str": "tokens_str example"
},
"decode": {
"decoded_string": "decoded_string example",
"tokens_str": ["decoded", "_string", " example"]
},
"split_text": {
"split_text": ["split_text example"]
},
"metadata_text": {
"tokens_nb": 42,
"truncated_text": "truncated_text example"
},
"liveness": {
"alive": "alive example"
},
"readiness": {
"ready": "ready example"
},
"information": {
"application": "application example",
"version": "version example",
"model_name": "model_name example",
"truncation_side": "truncation_side example",
"max_length": 100
},
"live_metrics": {
"requests_running": 5,
"requests_swapped": 3,
"requests_pending": 2,
"gpu_cache_usage": 0.24,
"cpu_cache_usage": 0.36
}
}
Loading

0 comments on commit fc12f65

Please sign in to comment.