fix

AlibabaPAI · Sep 12, 2024 · 0dd1aa5 · 0dd1aa5
1 parent c6326b1
commit 0dd1aa5
Show file tree

Hide file tree

Showing 6 changed files with 70 additions and 34 deletions.
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -29,3 +29,16 @@ jobs:
         [[ -n $(docker ps -q) ]] && docker kill $(docker ps -q) || echo "No running containers to kill."
     - name: Build And Test
       run: ./tools/bench_test.sh
+    - name: Create comment from file
+      uses: actions/github-script@v7
+      with:
+        script: |
+          const fs = require('fs');
+          const filePath = 'performance.txt';
+          const commentBody = fs.readFileSync(filePath, 'utf8');
+          await github.rest.issues.createComment({
+            issue_number: context.issue.number,
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            body: commentBody
+          });
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
@@ -30,12 +30,3 @@ jobs:
           -w /workspace \
           registry.cn-beijing.aliyuncs.com/llumnix/llumnix-dev:20240909_action_678a439 \
           bash -c "pip install -e . > /dev/null && make lint"
-    - uses: actions/github-script@v7
-      with:
-        script: |
-          github.rest.issues.createComment({
-            issue_number: context.issue.number,
-            owner: context.repo.owner,
-            repo: context.repo.repo,
-            body: '**ok**'
-          })
diff --git a/requirements.txt b/requirements.txt
diff --git a/tests/e2e_test/test_bench.py b/tests/e2e_test/test_bench.py
@@ -18,9 +18,10 @@
 import pytest
 import ray
 import torch
-
 import numpy as np
+
 from .test_e2e import generate_launch_command
+from .utils import to_markdown_table
 
 def launch_llumnix_service(command):
     subprocess.run(command, shell=True, check=True)
@@ -74,23 +75,29 @@ def clear_ray_state():
 def parse_log_file():
     json_files = [f for f in os.listdir('.') if f.endswith('_latency_info.json')]
 
-    request_latencies = []
-    prefill_latencies = []
     decode_latencies = []
 
     for json_file in json_files:
         with open(json_file, 'r', encoding="utf-8") as file:
             data = json.load(file)[0]
 
-        request_latencies.append(data.get('request_latencies', []))
-        prefill_latencies.append(data.get('prefill_latencies', []))
         decode_latencies.append(data.get('decode_latencies', []))
 
-    request_mean = np.mean(request_latencies)
-    prefill_mean = np.mean(prefill_latencies)
-    decode_mean = np.mean(decode_latencies)
+    latencies_array = np.array(decode_latencies)
+
+    p25 = np.percentile(latencies_array, 25)
+    p50 = np.percentile(latencies_array, 50)
+    p75 = np.percentile(latencies_array, 75)
+    p95 = np.percentile(latencies_array, 95)
+    p99 = np.percentile(latencies_array, 99)
+    mean = np.mean(latencies_array)
+
+    data = [
+        ["decode", "p25", "p50", "p75", "p95", "p99", "mean"],
+        ["latency(ms)", f"{p25:.2f}", f"{p50:.2f}", f"{p75:.2f}", f"{p95:.2f}", f"{p99:.2f}", f"{mean:.2f}"]
+    ]
 
-    print(f"request_mean: {request_mean}, prefill_mean: {prefill_mean}, decode_mean: {decode_mean}")
+    return to_markdown_table(data)
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model", ['/mnt/model/Qwen-7B'])
@@ -111,7 +118,7 @@ async def run_bench_command(command):
 
     tasks = []
     for i in range(device_count):
-        bench_command = generate_bench_command(ip_ports=f"127.0.0.1:{base_port+i}", model=model, num_prompts=300,
+        bench_command = generate_bench_command(ip_ports=f"127.0.0.1:{base_port+i}", model=model, num_prompts=30,
                                                dataset_type="sharegpt",
                                                dataset_path="/mnt/dataset/sharegpt_gpt4/sharegpt_gpt4.jsonl" ,
                                                qps=30,
@@ -120,7 +127,8 @@ async def run_bench_command(command):
 
     await asyncio.wait(tasks, timeout=60*30)
 
-    parse_log_file()
+    with open("performance.txt", "w") as f:
+        f.write(parse_log_file())
 
     shutdown_llumnix_service()
     clear_ray_state()

diff --git a/tests/e2e_test/test_migration.py b/tests/e2e_test/test_migration.py
@@ -20,6 +20,7 @@
 
 from .test_e2e import generate_launch_command
 from .test_bench import generate_bench_command, clear_ray_state, shutdown_llumnix_service
+from .utils import to_markdown_table
 
 size_pattern = re.compile(r'total_kv_cache_size:\s*([\d.]+)\s*(B|KB|MB|GB|KB|TB)')
 speed_pattern = re.compile(r'speed:\s*([\d.]+)GB/s')
@@ -72,16 +73,22 @@ async def run_bench_command(command):
         await process.wait()
         assert process.returncode == 0
 
-    bench_command = generate_bench_command(ip_ports=f"127.0.0.1:{base_port}", model=model, num_prompts=300,
+    bench_command = generate_bench_command(ip_ports=f"127.0.0.1:{base_port}", model=model, num_prompts=30,
                                             dataset_type="sharegpt",
                                             dataset_path="/mnt/dataset/sharegpt_gpt4/sharegpt_gpt4.jsonl" ,
                                             qps=30)
     await asyncio.wait_for(run_bench_command(bench_command), timeout=60*30)
 
     averger_speed = parse_log_file(instance_output_logs)
 
-    print(averger_speed)
+    data = [
+        ['migration_size'].extend(list(averger_speed.keys())),
+        ['speed(GB/s)'].extend([f"{value:.2f}" for value in averger_speed.values()])
+    ]
 
+    with open("performance.txt", "w") as f:
+        f.write(to_markdown_table(data))
+
     shutdown_llumnix_service()
     clear_ray_state()
     await asyncio.sleep(3)
diff --git a/tests/e2e_test/utils.py b/tests/e2e_test/utils.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2024, Alibaba Group;
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+def to_markdown_table(data):
+    headers = data[0]
+    rows = data[1:]
+
+    col_widths = [max(len(str(item)) for item in col) for col in zip(*data)]
+
+    header_row = " | ".join(f"{str(item):<{col_widths[i]}}" for i, item in enumerate(headers))
+    separator_row = " | ".join('-' * col_widths[i] for i in range(len(headers)))
+
+    data_rows = []
+    for row in rows:
+        data_row = " | ".join(f"{str(item):<{col_widths[i]}}" for i, item in enumerate(row))
+        data_rows.append(data_row)
+
+    table = f"{header_row}\n{separator_row}\n" + "\n".join(data_rows)
+    return table