From 707455f5d62f0433adc995346bfeca9ce4c9f152 Mon Sep 17 00:00:00 2001 From: Bangtian Liu Date: Mon, 13 Jan 2025 11:15:16 -0600 Subject: [PATCH] add time unit and format the code Signed-off-by: Bangtian Liu --- tuner/tuner/libtuner.py | 33 +++++++++++++++------------------ tuner/tuner/libtuner_test.py | 4 ++-- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/tuner/tuner/libtuner.py b/tuner/tuner/libtuner.py index 5d848d02f..ce19b5944 100644 --- a/tuner/tuner/libtuner.py +++ b/tuner/tuner/libtuner.py @@ -234,16 +234,15 @@ def get_valid_benchmark_results( return filtered_benchmark_results -def check_baseline_devices_uniqueness(baseline_results: list[BenchmarkResult]) -> bool: - seen = set() - for result in baseline_results: - if result.device_id in seen: - return False - seen.add(result.device_id) - return True +def are_baseline_devices_unique(baseline_results: list[BenchmarkResult]) -> bool: + return len(baseline_results) == len( + set(map(lambda r: r.device_id, baseline_results)) + ) def map_baseline_by_device(baseline_results: list[BenchmarkResult]) -> dict[str, float]: + if not are_baseline_devices_unique(baseline_results): + logging.warning("Duplicate device IDs detected in the baseline results.") return {r.device_id: r.time for r in baseline_results} @@ -253,6 +252,7 @@ def detect_baseline_regression( ) -> list[str]: """ Detects performance regressions between two sets of baseline results. + Returns a list of device IDs where performance regressions are detected. """ regression_device_ids = [] first_baseline_by_device = map_baseline_by_device(first_baseline_results) @@ -260,16 +260,16 @@ def detect_baseline_regression( for device_id in first_baseline_by_device: if device_id not in second_baseline_by_device: continue - first_baseline_time = first_baseline_by_device[device_id] - second_baseline_time = second_baseline_by_device[device_id] + first_baseline_ms = first_baseline_by_device[device_id] + second_baseline_ms = second_baseline_by_device[device_id] - if second_baseline_time > first_baseline_time * 1.03: + if second_baseline_ms > first_baseline_ms * 1.03: percentage_slower = ( - (second_baseline_time - first_baseline_time) / first_baseline_time + (second_baseline_ms - first_baseline_ms) / first_baseline_ms ) * 100 logging.warning( f"Performance regression detected on device {device_id}: " - f"Baseline time = {first_baseline_time}, Post-baseline time = {second_baseline_time}, " + f"First baseline time = {first_baseline_ms} ms, Second baseline time = {second_baseline_ms} ms, " f"Slower by {percentage_slower:.3f}%" ) regression_device_ids.append(device_id) @@ -618,7 +618,7 @@ def run_iree_benchmark_module_command(benchmark_pack: BenchmarkPack): mean_benchmark_time = sum(times) / float(len(times)) logging.debug( - f"Benchmark time of candidate {candidate_id}: {mean_benchmark_time:.2f}" + f"Benchmark time of candidate {candidate_id}: {mean_benchmark_time:.2f} ms" ) return BenchmarkResult( candidate_id=candidate_id, @@ -956,7 +956,7 @@ def get_speedup(result: BenchmarkResult) -> float: speedup = f"{round(get_speedup(r) * 100, 2)}% of baseline" else: speedup = "baseline unavailable" - logging.info(f"Candidate {r.candidate_id} time: {r.time:.2f} ({speedup})") + logging.info(f"Candidate {r.candidate_id} time: {r.time:.2f} ms ({speedup})") return best_results @@ -981,7 +981,7 @@ def benchmark( tuning_client=tuning_client, candidate_trackers=candidate_trackers, ) - if not check_baseline_devices_uniqueness(baseline_results): + if not are_baseline_devices_unique(baseline_results): logging.warning("Duplicate device IDs detected in the first baseline results.") candidate_indices = [i for i in compiled_candidates if i != 0] @@ -1002,9 +1002,6 @@ def benchmark( candidate_trackers=candidate_trackers, ) - if not check_baseline_devices_uniqueness(post_baseline_results): - logging.warning("Duplicate device IDs detected in the second baseline results.") - first_baseline_by_device = map_baseline_by_device(baseline_results) second_baseline_by_device = map_baseline_by_device(post_baseline_results) if first_baseline_by_device.keys() != second_baseline_by_device.keys(): diff --git a/tuner/tuner/libtuner_test.py b/tuner/tuner/libtuner_test.py index 00c661a42..f262e6dfb 100644 --- a/tuner/tuner/libtuner_test.py +++ b/tuner/tuner/libtuner_test.py @@ -258,14 +258,14 @@ def test_check_baseline_devices_uniqueness(): libtuner.BenchmarkResult(0, 2000.0, "hip://1"), libtuner.BenchmarkResult(0, 3000.0, "hip://2"), ] - assert libtuner.check_baseline_devices_uniqueness(baseline_results) + assert libtuner.are_baseline_devices_unique(baseline_results) baseline_results = [ libtuner.BenchmarkResult(0, 1000.0, "hip://0"), libtuner.BenchmarkResult(0, 2000.0, "hip://0"), libtuner.BenchmarkResult(0, 3000.0, "hip://2"), ] - assert not libtuner.check_baseline_devices_uniqueness(baseline_results) + assert not libtuner.are_baseline_devices_unique(baseline_results) def test_detect_baseline_regression():