From 707455f5d62f0433adc995346bfeca9ce4c9f152 Mon Sep 17 00:00:00 2001
From: Bangtian Liu <liubangtian@gmail.com>
Date: Mon, 13 Jan 2025 11:15:16 -0600
Subject: [PATCH] add time unit and format the code

Signed-off-by: Bangtian Liu <liubangtian@gmail.com>
---
 tuner/tuner/libtuner.py      | 33 +++++++++++++++------------------
 tuner/tuner/libtuner_test.py |  4 ++--
 2 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/tuner/tuner/libtuner.py b/tuner/tuner/libtuner.py
index 5d848d02f..ce19b5944 100644
--- a/tuner/tuner/libtuner.py
+++ b/tuner/tuner/libtuner.py
@@ -234,16 +234,15 @@ def get_valid_benchmark_results(
     return filtered_benchmark_results
 
 
-def check_baseline_devices_uniqueness(baseline_results: list[BenchmarkResult]) -> bool:
-    seen = set()
-    for result in baseline_results:
-        if result.device_id in seen:
-            return False
-        seen.add(result.device_id)
-    return True
+def are_baseline_devices_unique(baseline_results: list[BenchmarkResult]) -> bool:
+    return len(baseline_results) == len(
+        set(map(lambda r: r.device_id, baseline_results))
+    )
 
 
 def map_baseline_by_device(baseline_results: list[BenchmarkResult]) -> dict[str, float]:
+    if not are_baseline_devices_unique(baseline_results):
+        logging.warning("Duplicate device IDs detected in the baseline results.")
     return {r.device_id: r.time for r in baseline_results}
 
 
@@ -253,6 +252,7 @@ def detect_baseline_regression(
 ) -> list[str]:
     """
     Detects performance regressions between two sets of baseline results.
+    Returns a list of device IDs where performance regressions are detected.
     """
     regression_device_ids = []
     first_baseline_by_device = map_baseline_by_device(first_baseline_results)
@@ -260,16 +260,16 @@ def detect_baseline_regression(
     for device_id in first_baseline_by_device:
         if device_id not in second_baseline_by_device:
             continue
-        first_baseline_time = first_baseline_by_device[device_id]
-        second_baseline_time = second_baseline_by_device[device_id]
+        first_baseline_ms = first_baseline_by_device[device_id]
+        second_baseline_ms = second_baseline_by_device[device_id]
 
-        if second_baseline_time > first_baseline_time * 1.03:
+        if second_baseline_ms > first_baseline_ms * 1.03:
             percentage_slower = (
-                (second_baseline_time - first_baseline_time) / first_baseline_time
+                (second_baseline_ms - first_baseline_ms) / first_baseline_ms
             ) * 100
             logging.warning(
                 f"Performance regression detected on device {device_id}: "
-                f"Baseline time = {first_baseline_time}, Post-baseline time = {second_baseline_time}, "
+                f"First baseline time = {first_baseline_ms} ms, Second baseline time = {second_baseline_ms} ms, "
                 f"Slower by {percentage_slower:.3f}%"
             )
             regression_device_ids.append(device_id)
@@ -618,7 +618,7 @@ def run_iree_benchmark_module_command(benchmark_pack: BenchmarkPack):
 
     mean_benchmark_time = sum(times) / float(len(times))
     logging.debug(
-        f"Benchmark time of candidate {candidate_id}: {mean_benchmark_time:.2f}"
+        f"Benchmark time of candidate {candidate_id}: {mean_benchmark_time:.2f} ms"
     )
     return BenchmarkResult(
         candidate_id=candidate_id,
@@ -956,7 +956,7 @@ def get_speedup(result: BenchmarkResult) -> float:
             speedup = f"{round(get_speedup(r) * 100, 2)}% of baseline"
         else:
             speedup = "baseline unavailable"
-        logging.info(f"Candidate {r.candidate_id} time: {r.time:.2f} ({speedup})")
+        logging.info(f"Candidate {r.candidate_id} time: {r.time:.2f} ms ({speedup})")
     return best_results
 
 
@@ -981,7 +981,7 @@ def benchmark(
         tuning_client=tuning_client,
         candidate_trackers=candidate_trackers,
     )
-    if not check_baseline_devices_uniqueness(baseline_results):
+    if not are_baseline_devices_unique(baseline_results):
         logging.warning("Duplicate device IDs detected in the first baseline results.")
 
     candidate_indices = [i for i in compiled_candidates if i != 0]
@@ -1002,9 +1002,6 @@ def benchmark(
         candidate_trackers=candidate_trackers,
     )
 
-    if not check_baseline_devices_uniqueness(post_baseline_results):
-        logging.warning("Duplicate device IDs detected in the second baseline results.")
-
     first_baseline_by_device = map_baseline_by_device(baseline_results)
     second_baseline_by_device = map_baseline_by_device(post_baseline_results)
     if first_baseline_by_device.keys() != second_baseline_by_device.keys():
diff --git a/tuner/tuner/libtuner_test.py b/tuner/tuner/libtuner_test.py
index 00c661a42..f262e6dfb 100644
--- a/tuner/tuner/libtuner_test.py
+++ b/tuner/tuner/libtuner_test.py
@@ -258,14 +258,14 @@ def test_check_baseline_devices_uniqueness():
         libtuner.BenchmarkResult(0, 2000.0, "hip://1"),
         libtuner.BenchmarkResult(0, 3000.0, "hip://2"),
     ]
-    assert libtuner.check_baseline_devices_uniqueness(baseline_results)
+    assert libtuner.are_baseline_devices_unique(baseline_results)
 
     baseline_results = [
         libtuner.BenchmarkResult(0, 1000.0, "hip://0"),
         libtuner.BenchmarkResult(0, 2000.0, "hip://0"),
         libtuner.BenchmarkResult(0, 3000.0, "hip://2"),
     ]
-    assert not libtuner.check_baseline_devices_uniqueness(baseline_results)
+    assert not libtuner.are_baseline_devices_unique(baseline_results)
 
 
 def test_detect_baseline_regression():