Skip to content

Commit

Permalink
Adds truncate_prompt_tokens param for embeddings creation
Browse files Browse the repository at this point in the history
Signed-off-by: Flavia Beo <[email protected]>
  • Loading branch information
flaviabeo committed Oct 1, 2024
1 parent 1b6de83 commit 551c33b
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 0 deletions.
1 change: 1 addition & 0 deletions vllm/entrypoints/openai/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,7 @@ class EmbeddingRequest(OpenAIBaseModel):
encoding_format: Literal["float", "base64"] = "float"
dimensions: Optional[int] = None
user: Optional[str] = None
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None

# doc: begin-embedding-pooling-params
additional_data: Optional[Any] = None
Expand Down
15 changes: 15 additions & 0 deletions vllm/entrypoints/openai/serving_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,20 @@ async def create_embedding(
request_id = f"embd-{random_uuid()}"
created_time = int(time.monotonic())

truncate_prompt_tokens = None

if request.truncate_prompt_tokens is not None:
if request.truncate_prompt_tokens < self.max_model_len:
truncate_prompt_tokens = request.truncate_prompt_tokens
elif request.truncate_prompt_tokens > self.max_model_len:
raise ValueError(
"truncate_prompt_tokens value is greater than max_model_len. "
"Please, select a smaller truncation size.")
else:
logger.warning(
"truncating input tokens to max_model_len")
truncate_prompt_tokens = self.max_model_len

# Schedule the request and get the result generator.
generators: List[AsyncGenerator[EmbeddingRequestOutput, None]] = []
try:
Expand All @@ -128,6 +142,7 @@ async def create_embedding(
request,
tokenizer,
request.input,
truncate_prompt_tokens
))

for i, prompt_inputs in enumerate(prompts):
Expand Down

0 comments on commit 551c33b

Please sign in to comment.