Merge branch 'main' into kind-setup-related-updates

fmperf-project · Jun 25, 2024 · 73611a0 · 73611a0
2 parents e972961 + 3abbcd1
commit 73611a0
Show file tree

Hide file tree

Showing 11 changed files with 51 additions and 19 deletions.
diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
@@ -0,0 +1,14 @@
+name: Lint
+
+on: [push, pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: psf/black@stable
+        with:
+          options: "--check --verbose"
+          version: "~= 24.0"
+          jupyter: true
diff --git a/examples/example_benchmark.py b/examples/example_benchmark.py
@@ -1,6 +1,7 @@
 """
 This script runs benchmarking on TGIS server.
 """
+
 import os
 from pathlib import Path
 import uuid
@@ -30,7 +31,9 @@ def initialize_kubernetes(location):
         config.verify_ssl = False
         apiclient = client.ApiClient(config)
         urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-        cluster = Cluster(name="llm", apiclient=apiclient, namespace=os.environ["OPENSHIFT_NAMESPACE"])
+        cluster = Cluster(
+            name="llm", apiclient=apiclient, namespace=os.environ["OPENSHIFT_NAMESPACE"]
+        )
         model_pvcs = [("my-models-pvc", "/models")]
         workload_pvc_name = "my-workload-pvc"
         cluster_gpu_name = "NVIDIA-A100-SXM4-80GB"

diff --git a/examples/example_energy.py b/examples/example_energy.py
@@ -26,7 +26,9 @@ def initialize_kubernetes(location):
         config.verify_ssl = False
         apiclient = client.ApiClient(config)
         urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-        cluster = Cluster(name="llm", apiclient=apiclient, namespace=os.environ["OPENSHIFT_NAMESPACE"])
+        cluster = Cluster(
+            name="llm", apiclient=apiclient, namespace=os.environ["OPENSHIFT_NAMESPACE"]
+        )
         model_pvcs = [("my-models-pvc", "/models")]
         workload_pvc_name = "my-workload-pvc"
         cluster_gpu_name = "NVIDIA-A100-SXM4-80GB"

diff --git a/examples/example_llama.py b/examples/example_llama.py
@@ -30,7 +30,9 @@ def initialize_kubernetes(location):
         config.verify_ssl = False
         apiclient = client.ApiClient(config)
         urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-        cluster = Cluster(name="llm", apiclient=apiclient, namespace=os.environ["OPENSHIFT_NAMESPACE"])
+        cluster = Cluster(
+            name="llm", apiclient=apiclient, namespace=os.environ["OPENSHIFT_NAMESPACE"]
+        )
         model_pvcs = [("my-models-pvc", "/models")]
         workload_pvc_name = "my-workload-pvc"
         cluster_gpu_name = "NVIDIA-A100-SXM4-80GB"
@@ -84,7 +86,7 @@ def initialize_kubernetes(location):
             "/models/llama/7B",
             shortname="llama-7b",
             dtype="float16",
-            max_model_len=1024, #  this is needed if you have a V100 gpu. otherwise use max_model_len = None
+            max_model_len=1024,  #  this is needed if you have a V100 gpu. otherwise use max_model_len = None
             image="vllm/vllm-openai:latest",
             pvcs=model_pvcs,
             cluster_gpu_name=cluster_gpu_name,

diff --git a/examples/example_vllm.py b/examples/example_vllm.py
@@ -30,7 +30,9 @@ def initialize_kubernetes(location):
         config.verify_ssl = False
         apiclient = client.ApiClient(config)
         urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-        cluster = Cluster(name="llm", apiclient=apiclient, namespace=os.environ["OPENSHIFT_NAMESPACE"])
+        cluster = Cluster(
+            name="llm", apiclient=apiclient, namespace=os.environ["OPENSHIFT_NAMESPACE"]
+        )
         model_pvcs = [("my-models-pvc", "/models")]
         workload_pvc_name = "my-workload-pvc"
         cluster_gpu_name = "NVIDIA-A100-SXM4-80GB"

diff --git a/fmperf/WorkloadSpecs.py b/fmperf/WorkloadSpecs.py
@@ -65,7 +65,6 @@ def __init__(
 
         super().__init__(1, image, pvc_name, overwrite)
 
-
     @classmethod
     def from_yaml(cls, file: str):
         return super().from_yaml(file)
@@ -103,6 +102,7 @@ def get_env(
         ]
         return env
 
+
 class HeterogeneousWorkloadSpec(WorkloadSpec):
     def __init__(
         self,

diff --git a/fmperf/__init__.py b/fmperf/__init__.py
@@ -1,3 +1,7 @@
 from fmperf.ModelSpecs import TGISModelSpec, vLLMModelSpec
 from fmperf.Cluster import Cluster, WorkloadSpec, DeployedModel
-from fmperf.WorkloadSpecs import HeterogeneousWorkloadSpec, HomogeneousWorkloadSpec, RealisticWorkloadSpec
+from fmperf.WorkloadSpecs import (
+    HeterogeneousWorkloadSpec,
+    HomogeneousWorkloadSpec,
+    RealisticWorkloadSpec,
+)
diff --git a/fmperf/loadgen/collect_energy.py b/fmperf/loadgen/collect_energy.py
@@ -10,10 +10,13 @@
 
 urllib3.disable_warnings(InsecureRequestWarning)
 
-metrics = ["DCGM_FI_DEV_POWER_USAGE",
-        "kepler_container_gpu_joules_total",
-        "kepler_container_package_joules_total",
-        "kepler_container_dram_joules_total"]
+metrics = [
+    "DCGM_FI_DEV_POWER_USAGE",
+    "kepler_container_gpu_joules_total",
+    "kepler_container_package_joules_total",
+    "kepler_container_dram_joules_total",
+]
+
 
 class MetricData:
     def __init__(self, metric: str, start: str, end: str, pod: str, data: {}):
@@ -125,29 +128,31 @@ def get_file_prefix(start_ts: str):
 
     return fprefix
 
-# get target metrics from a file specified by TARGET_METRICS_LIST env variable 
+
+# get target metrics from a file specified by TARGET_METRICS_LIST env variable
 def get_target_metrics():
     global metrics
-    metric_list = os.environ.get('TARGET_METRICS_LIST', 'default_metrics.yaml')
+    metric_list = os.environ.get("TARGET_METRICS_LIST", "default_metrics.yaml")
     if metric_list is not None:
-        with open(metric_list, 'r') as yml:
+        with open(metric_list, "r") as yml:
             try:
                 config = yaml.safe_load(yml)
                 mlist = config["metrics"]
                 if len(mlist) > 0:
                     metrics.extend(mlist)
-                #remove redundant metrics
+                # remove redundant metrics
                 metrics = list(dict.fromkeys(metrics))
             except Exception as e:
                 print("catch Exception: ", e)
     return metrics
 
+
 # read metrics files and concatenate them to integrate performance data
 def summarize_energy(start_ts: str):
     global metrics
     all_df = pd.DataFrame(dtype=float)
     # target metrics
-    metrics = get_target_metrics()  
+    metrics = get_target_metrics()
 
     try:
         dirpath = os.environ.get("METRICS_DIR", "/requests")

diff --git a/fmperf/loadgen/generate-input.py b/fmperf/loadgen/generate-input.py
@@ -38,7 +38,7 @@ def get_streaming_response(response: requests.Response):
             data = chunk.decode("utf-8").strip().split("data: ")[1]
             out = json.loads(data)["choices"][0]
             finished = out["finish_reason"] is not None
-            if not (out['text'] == ''): # filter empty tokens
+            if not (out["text"] == ""):  # filter empty tokens
                 yield out
 
 

diff --git a/fmperf/loadgen/run.py b/fmperf/loadgen/run.py
@@ -49,7 +49,7 @@ def get_streaming_response_vllm(response):
                     data = chunk.decode("utf-8").strip().split("data: ")[1]
                     out = json.loads(data)["choices"][0]
                     stop = out["finish_reason"] is not None
-                    if not (out['text'] == ''): # filter empty tokens
+                    if not (out["text"] == ""):  # filter empty tokens
                         yield out, 1, timestamp, True, None
             except Exception as e:
                 timestamp = time.time_ns()

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 kubernetes==24.2.0
-black==23.1.0
+black~=24.0
 pandas
 pyyaml
 typing