Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add server metrics promql scraping #804

Merged
merged 3 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,13 @@
from datetime import datetime
import json
import random
import requests
import time
from typing import AsyncGenerator, List, Tuple

import google.auth
import google.auth.transport.requests

import aiohttp
import numpy as np
from transformers import AutoTokenizer
Expand Down Expand Up @@ -302,6 +306,54 @@ def save_json_results(args: argparse.Namespace, benchmark_result):
with open(file_name, "w", encoding="utf-8") as outfile:
json.dump(final_json, outfile)

def metrics_to_scrape(backend: str) -> List[str]:
if backend == "vllm":
return ["vllm:gpu_cache_usage_perc", "vllm:num_requests_waiting"]
elif backend == "jetstream":
return ["jetstream_slots_used_percentage", "jetstream_prefill_backlog_size"]
else:
return []

def print_metrics(metrics: List[str], duration: str, backend: str) -> None:
# Creates a credentials object from the default service account file
# Assumes that script has appropriate default credentials set up, ref:
# https://googleapis.dev/python/google-auth/latest/user-guide.html#application-default-credentials
credentials, project_id = google.auth.default()
# Prepare an authentication request - helps format the request auth token
auth_req = google.auth.transport.requests.Request()
annapendleton marked this conversation as resolved.
Show resolved Hide resolved

for metric in metrics:
print("Metric Name: %s" % (metric))

# Queries scrape all metrics collected from the last $DURATION seconds from the backend's related
# podmonitoring spec assumed to be named "$BACKEND-podmonitoring"
queries = {
"Mean": "avg_over_time(%s{job='%s-podmonitoring'}[%ss])" % (metric, backend, duration),
"Median": "quantile_over_time(0.5, %s{job='%s-podmonitoring'}[%ss])" % (metric, backend, duration),
"Min": "min_over_time(%s{job='%s-podmonitoring'}[%ss])" % (metric, backend, duration),
"Max": "max_over_time(%s{job='%s-podmonitoring'}[%ss])" % (metric, backend, duration),
"P90": "quantile_over_time(0.9, %s{job='%s-podmonitoring'}[%ss])" % (metric, backend, duration),
"P99": "quantile_over_time(0.99, %s{job='%s-podmonitoring'}[%ss])" % (metric, backend, duration),
}
for query_name, query in queries.items():
# Request refresh tokens
credentials.refresh(auth_req)

# Configure respective query
url='https://monitoring.googleapis.com/v1/projects/%s/location/global/prometheus/api/v1/query' % (project_id)
headers_api = {'Authorization': 'Bearer ' + credentials.token}
params = {'query': query}
response = requests.get(url=url, headers=headers_api, params=params)

# handle response
if response.ok:
if response["status"] == "success":
print("%s: %s" % (query_name, response["data"]["result"][0]["value"][1]))
annapendleton marked this conversation as resolved.
Show resolved Hide resolved
else:
print("Cloud Monitoring PromQL Error: %s" % (response["error"]))
else:
print("HTTP Error: %s" % (response.text))


def main(args: argparse.Namespace):
print(args)
Expand Down Expand Up @@ -420,6 +472,15 @@ def main(args: argparse.Namespace):
)
benchmark_result['avg_output_len'] = avg_output_len

'''
TODO: Add flag for enabling model server scraping
Scrape and print model server metrics
1. map model server to metrics list
2. loop through metrics list, call the same promql queries on each metric, print out the data received
'''
metrics = metrics_to_scrape(args.backend)
print_metrics(metrics, benchmark_time, args.backend)

if args.save_json_results:
save_json_results(args, benchmark_result)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,5 @@ pydantic >= 2.0 # Required for OpenAI server.
aioprometheus[starlette]
pynvml == 11.5.0
accelerate
aiohttp
aiohttp
google-auth