Skip to content

Commit

Permalink
Merge branch 'main' into kind-setup-related-updates
Browse files Browse the repository at this point in the history
  • Loading branch information
tdoublep authored Jun 25, 2024
2 parents e972961 + 3abbcd1 commit 73611a0
Show file tree
Hide file tree
Showing 11 changed files with 51 additions and 19 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/black.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: Lint

on: [push, pull_request]

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: psf/black@stable
with:
options: "--check --verbose"
version: "~= 24.0"
jupyter: true
5 changes: 4 additions & 1 deletion examples/example_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
This script runs benchmarking on TGIS server.
"""

import os
from pathlib import Path
import uuid
Expand Down Expand Up @@ -30,7 +31,9 @@ def initialize_kubernetes(location):
config.verify_ssl = False
apiclient = client.ApiClient(config)
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
cluster = Cluster(name="llm", apiclient=apiclient, namespace=os.environ["OPENSHIFT_NAMESPACE"])
cluster = Cluster(
name="llm", apiclient=apiclient, namespace=os.environ["OPENSHIFT_NAMESPACE"]
)
model_pvcs = [("my-models-pvc", "/models")]
workload_pvc_name = "my-workload-pvc"
cluster_gpu_name = "NVIDIA-A100-SXM4-80GB"
Expand Down
4 changes: 3 additions & 1 deletion examples/example_energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ def initialize_kubernetes(location):
config.verify_ssl = False
apiclient = client.ApiClient(config)
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
cluster = Cluster(name="llm", apiclient=apiclient, namespace=os.environ["OPENSHIFT_NAMESPACE"])
cluster = Cluster(
name="llm", apiclient=apiclient, namespace=os.environ["OPENSHIFT_NAMESPACE"]
)
model_pvcs = [("my-models-pvc", "/models")]
workload_pvc_name = "my-workload-pvc"
cluster_gpu_name = "NVIDIA-A100-SXM4-80GB"
Expand Down
6 changes: 4 additions & 2 deletions examples/example_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ def initialize_kubernetes(location):
config.verify_ssl = False
apiclient = client.ApiClient(config)
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
cluster = Cluster(name="llm", apiclient=apiclient, namespace=os.environ["OPENSHIFT_NAMESPACE"])
cluster = Cluster(
name="llm", apiclient=apiclient, namespace=os.environ["OPENSHIFT_NAMESPACE"]
)
model_pvcs = [("my-models-pvc", "/models")]
workload_pvc_name = "my-workload-pvc"
cluster_gpu_name = "NVIDIA-A100-SXM4-80GB"
Expand Down Expand Up @@ -84,7 +86,7 @@ def initialize_kubernetes(location):
"/models/llama/7B",
shortname="llama-7b",
dtype="float16",
max_model_len=1024, # this is needed if you have a V100 gpu. otherwise use max_model_len = None
max_model_len=1024, # this is needed if you have a V100 gpu. otherwise use max_model_len = None
image="vllm/vllm-openai:latest",
pvcs=model_pvcs,
cluster_gpu_name=cluster_gpu_name,
Expand Down
4 changes: 3 additions & 1 deletion examples/example_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ def initialize_kubernetes(location):
config.verify_ssl = False
apiclient = client.ApiClient(config)
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
cluster = Cluster(name="llm", apiclient=apiclient, namespace=os.environ["OPENSHIFT_NAMESPACE"])
cluster = Cluster(
name="llm", apiclient=apiclient, namespace=os.environ["OPENSHIFT_NAMESPACE"]
)
model_pvcs = [("my-models-pvc", "/models")]
workload_pvc_name = "my-workload-pvc"
cluster_gpu_name = "NVIDIA-A100-SXM4-80GB"
Expand Down
2 changes: 1 addition & 1 deletion fmperf/WorkloadSpecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ def __init__(

super().__init__(1, image, pvc_name, overwrite)


@classmethod
def from_yaml(cls, file: str):
return super().from_yaml(file)
Expand Down Expand Up @@ -103,6 +102,7 @@ def get_env(
]
return env


class HeterogeneousWorkloadSpec(WorkloadSpec):
def __init__(
self,
Expand Down
6 changes: 5 additions & 1 deletion fmperf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from fmperf.ModelSpecs import TGISModelSpec, vLLMModelSpec
from fmperf.Cluster import Cluster, WorkloadSpec, DeployedModel
from fmperf.WorkloadSpecs import HeterogeneousWorkloadSpec, HomogeneousWorkloadSpec, RealisticWorkloadSpec
from fmperf.WorkloadSpecs import (
HeterogeneousWorkloadSpec,
HomogeneousWorkloadSpec,
RealisticWorkloadSpec,
)
23 changes: 14 additions & 9 deletions fmperf/loadgen/collect_energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@

urllib3.disable_warnings(InsecureRequestWarning)

metrics = ["DCGM_FI_DEV_POWER_USAGE",
"kepler_container_gpu_joules_total",
"kepler_container_package_joules_total",
"kepler_container_dram_joules_total"]
metrics = [
"DCGM_FI_DEV_POWER_USAGE",
"kepler_container_gpu_joules_total",
"kepler_container_package_joules_total",
"kepler_container_dram_joules_total",
]


class MetricData:
def __init__(self, metric: str, start: str, end: str, pod: str, data: {}):
Expand Down Expand Up @@ -125,29 +128,31 @@ def get_file_prefix(start_ts: str):

return fprefix

# get target metrics from a file specified by TARGET_METRICS_LIST env variable

# get target metrics from a file specified by TARGET_METRICS_LIST env variable
def get_target_metrics():
global metrics
metric_list = os.environ.get('TARGET_METRICS_LIST', 'default_metrics.yaml')
metric_list = os.environ.get("TARGET_METRICS_LIST", "default_metrics.yaml")
if metric_list is not None:
with open(metric_list, 'r') as yml:
with open(metric_list, "r") as yml:
try:
config = yaml.safe_load(yml)
mlist = config["metrics"]
if len(mlist) > 0:
metrics.extend(mlist)
#remove redundant metrics
# remove redundant metrics
metrics = list(dict.fromkeys(metrics))
except Exception as e:
print("catch Exception: ", e)
return metrics


# read metrics files and concatenate them to integrate performance data
def summarize_energy(start_ts: str):
global metrics
all_df = pd.DataFrame(dtype=float)
# target metrics
metrics = get_target_metrics()
metrics = get_target_metrics()

try:
dirpath = os.environ.get("METRICS_DIR", "/requests")
Expand Down
2 changes: 1 addition & 1 deletion fmperf/loadgen/generate-input.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def get_streaming_response(response: requests.Response):
data = chunk.decode("utf-8").strip().split("data: ")[1]
out = json.loads(data)["choices"][0]
finished = out["finish_reason"] is not None
if not (out['text'] == ''): # filter empty tokens
if not (out["text"] == ""): # filter empty tokens
yield out


Expand Down
2 changes: 1 addition & 1 deletion fmperf/loadgen/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def get_streaming_response_vllm(response):
data = chunk.decode("utf-8").strip().split("data: ")[1]
out = json.loads(data)["choices"][0]
stop = out["finish_reason"] is not None
if not (out['text'] == ''): # filter empty tokens
if not (out["text"] == ""): # filter empty tokens
yield out, 1, timestamp, True, None
except Exception as e:
timestamp = time.time_ns()
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
kubernetes==24.2.0
black==23.1.0
black~=24.0
pandas
pyyaml
typing
Expand Down

0 comments on commit 73611a0

Please sign in to comment.