Skip to content

Commit

Permalink
♻️ use request index instead of a map
Browse files Browse the repository at this point in the history
The assumption that it remains a valid way to correlate the request and the response is because vllm does it that way

Signed-off-by: Prashant Gupta <[email protected]>
  • Loading branch information
prashantgupta24 committed Aug 19, 2024
1 parent 4af8578 commit 282bfc9
Showing 1 changed file with 2 additions and 6 deletions.
8 changes: 2 additions & 6 deletions src/vllm_tgis_adapter/grpc/grpc_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,6 @@ async def Generate(

generators = []
max_is_token_limit = [False] * request_count
# map for storing prompts for requests
request_prompt_map = {}

for i, req in enumerate(request.requests):
input_ids, max_is_token_limit[i] = await self._validate_prompt_and_tokenize(
Expand All @@ -262,13 +260,11 @@ async def Generate(
kwargs["trace_headers"] = extract_trace_headers(headers)
elif contains_trace_headers(headers):
log_tracing_disabled_warning()
unique_request_id = f"{request_id}-{i}"
request_prompt_map[unique_request_id] = req.text
generators.append(
self.engine.generate(
inputs=inputs,
sampling_params=sampling_params,
request_id=unique_request_id,
request_id=f"{request_id}-{i}",
**adapter_kwargs,
**kwargs,
),
Expand All @@ -288,7 +284,7 @@ async def Generate(
# await self.engine.abort(f"{request_id}-{i}")
# return self.create_error_response("Client disconnected")
if res.prompt is None:
res.prompt = request_prompt_map[res.request_id]
res.prompt = request.requests[i].text
responses[i] = res
service_metrics.observe_queue_time(res)

Expand Down

0 comments on commit 282bfc9

Please sign in to comment.