diff --git a/tests/entrypoints/openai/test_metrics.py b/tests/entrypoints/openai/test_metrics.py index 54e1e0fdb9e94..87a54897de85e 100644 --- a/tests/entrypoints/openai/test_metrics.py +++ b/tests/entrypoints/openai/test_metrics.py @@ -75,8 +75,9 @@ async def client(server): ("_count", _NUM_REQUESTS)], "vllm:prompt_tokens": [("_total", _NUM_REQUESTS * _NUM_PROMPT_TOKENS_PER_REQUEST)], - "vllm:generation_tokens": - [("_total", _NUM_REQUESTS * _NUM_PROMPT_TOKENS_PER_REQUEST)], + "vllm:generation_tokens": [ + ("_total", _NUM_REQUESTS * _NUM_PROMPT_TOKENS_PER_REQUEST) + ], "vllm:request_success": [("_total", _NUM_REQUESTS)], } diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 9079857b66236..1028f1424aa63 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -1793,7 +1793,8 @@ def _get_stats(self, ]) if seq_group.sampling_params is not None: n_requests.append(seq_group.sampling_params.n) - max_tokens_requests.append(seq_group.sampling_params.max_tokens) + max_tokens_requests.append( + seq_group.sampling_params.max_tokens) finished_reason_requests.extend([ SequenceStatus.get_finished_reason(seq.status) for seq in seq_group.get_finished_seqs()