diff --git a/benchmark/tools/compare.py b/benchmark/tools/compare.py new file mode 100755 index 00000000000..f6ac5ae321a --- /dev/null +++ b/benchmark/tools/compare.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +# SPDX-License-Identifier: BSD-3-Clause +import sys +import json +import argparse +import math +import pandas as pd +import tabulate # for pandas markdown output +from frozendict import frozendict + + +keys = {"stencil", "size", "filename", "n", "r", "k", "m"} +comparison_keys = {"time", "storage", "iterations"} +suffix = ".ratio" + + +def sorted_key_intersection(a: dict, b: dict) -> list: + return sorted(set(a.keys()).intersection(b.keys()), key=str) + + +def parse_json_matrix(filename: str) -> dict: + """Parse a JSON file into a key -> test_case dict""" + with open(filename) as file: + parsed = json.load(file) + result = {} + assert isinstance(parsed, list) + for case in parsed: + assert isinstance(case, dict) + assert not keys.isdisjoint(case.keys()) + dict_key = frozendict( + {key: case[key] for key in keys.intersection(case.keys())} + ) + if dict_key in result.keys(): + print( + f"WARNING: Duplicate key {json.dumps(dict_key)}", + file=sys.stderr, + ) + result[dict_key] = case + return result + + +def warn_on_inconsistent_keys(baseline: dict, comparison: dict, context: str): + """Print a warning message for non-matching keys between baseline/comparison using the given context string""" + baseline_only = sorted(set(baseline.keys()).difference(comparison.keys())) + comparison_only = sorted(set(comparison.keys()).difference(baseline.keys())) + for key in baseline_only: + print( + f"WARNING: Key {json.dumps(key) if isinstance(key, dict) else key} found in baseline only in context {context}", + file=sys.stderr, + ) + for key in comparison_only: + print( + f"WARNING: Key {json.dumps(key) if isinstance(key, dict) else key} found in comparison only in context {context}", + file=sys.stderr, + ) + for key in sorted_key_intersection(baseline, comparison): + if isinstance(baseline[key], dict): + assert isinstance(comparison[key], dict) + warn_on_inconsistent_keys( + baseline[key], comparison[key], f"{context}/{key}" + ) + + +def ratio(baseline: int | float, comparison: int | float) -> float: + """Compares the ratio between baseline and comparison. For runtimes, this is the speedup.""" + return baseline / comparison + + +def compare_benchmark(baseline: dict, comparison: dict) -> dict: + """Compares a handful of keys and component breakdowns recursively, writing them with a suffix to the output""" + result = {} + for key in sorted_key_intersection(baseline, comparison): + if key == "components": + assert isinstance(baseline[key], dict) + assert isinstance(comparison[key], dict) + result[key + suffix] = { + sub_key: ratio(baseline[key][sub_key], comparison[key][sub_key]) + for sub_key in baseline[key] + } + elif isinstance(baseline[key], dict): + result[key] = compare_benchmark(baseline[key], comparison[key]) + elif key in comparison_keys: + result[key + suffix] = ratio(baseline[key], comparison[key]) + return result + + +def compare(baseline: dict, comparison: dict) -> dict: + """Compares a test case, keeping root-level values and recursing into benchmarks""" + result = {} + for key in sorted_key_intersection(baseline, comparison): + # we don't have lists on the test case root level + assert not isinstance(baseline[key], list) + if isinstance(baseline[key], dict): + benchmark_result = {} + for benchmark_name in baseline[key].keys(): + if isinstance(baseline[key][benchmark_name], dict): + comparison_result = compare_benchmark( + baseline[key][benchmark_name], comparison[key][benchmark_name] + ) + if len(comparison_result) > 0: + benchmark_result[benchmark_name] = comparison_result + if len(benchmark_result) > 0: + result[key] = benchmark_result + else: + # everything that's not a dict should only depend on the key in the root level + if baseline[key] != comparison[key]: + print( + f"WARNING: Inconsistent value for {key}: {baseline[key]} != {comparison[key]}", + file=sys.stderr, + ) + result[key] = baseline[key] + return result + + +def extract_benchmark_results( + input: dict, benchmarks: dict, case_key: tuple, context: str | None +) -> None: + for key, value in input.items(): + benchmark_name = key if context is None else f"{context}/{key}" + if key in map(lambda x: x + suffix, comparison_keys): + benchmark_name = benchmark_name[: -len(suffix)] + if benchmark_name not in benchmarks.keys(): + benchmarks[benchmark_name] = [] + benchmarks[benchmark_name].append((case_key, value)) + elif isinstance(value, dict): + extract_benchmark_results(value, benchmarks, case_key, benchmark_name) + + +def is_outlier(value: float, args) -> bool: + """returns true iff the is more than the outlier threshold away from 1.0""" + return math.fabs(math.log(value)) > math.log(1.0 + args.outlier_threshold / 100) + + +def compare_main(args: list): + """Runs the comparison script""" + parser = argparse.ArgumentParser(description="Compare to Ginkgo benchmark outputs") + parser.add_argument( + "--outliers", action="store_true", help="List outliers from the results" + ) + parser.add_argument( + "--outlier-threshold", + type=float, + default=10, + help="At what percentage of deviation (above or below) should outliers be reported", + ) + parser.add_argument( + "--outlier-count", + type=int, + default=1000, + help="How many outliers should be reported per benchmark", + ) + parser.add_argument("--output", choices=["json", "csv", "markdown"], default="json") + parser.add_argument("baseline") + parser.add_argument("comparison") + args = parser.parse_args(args) + baseline_json = parse_json_matrix(args.baseline) + comparison_json = parse_json_matrix(args.comparison) + warn_on_inconsistent_keys(baseline_json, comparison_json, "root") + + results = {} + + for key in sorted_key_intersection(baseline_json, comparison_json): + results[key] = compare(baseline_json[key], comparison_json[key]) + + outliers = {} + benchmarks = {} + for key, value in results.items(): + extract_benchmark_results(value, benchmarks, key, None) + if args.outliers: + for benchmark_name, benchmark_results in benchmarks.items(): + outlier = sorted( + [ + (case_key, value) + for case_key, value in benchmark_results + if is_outlier(value, args) + ], + key=lambda x: math.fabs(math.log(x[1])), + reverse=True, + ) + outliers[benchmark_name] = outlier[: min(len(outlier), args.outlier_count)] + + if args.output == "json": + print( + json.dumps( + { + "results": [value for _, value in results.items()], + "outliers": { + key: [ + {"value": ratio_value, **case_key} + for (case_key, ratio_value) in value + ] + for key, value in outliers.items() + if len(value) > 0 + }, + }, + indent=4, + ) + ) + else: + columns = ["benchmark", "testcase", "ratio"] + only_first = args.output == "markdown" + table = pd.DataFrame( + sum( + [ + [ + ( + key if i == 0 or not only_first else "", + json.dumps(value[0]), + value[1], + ) + for i, value in enumerate(values) + ] + for key, values in benchmarks.items() + ], + [], + ), + columns=columns, + ) + if args.output == "csv": + table.to_csv(sys.stdout, index=False) + else: + table.to_markdown(sys.stdout, index=False) + if args.outliers: + outlier_table = pd.DataFrame( + sum( + [ + [ + ( + key if i == 0 or not only_first else "", + json.dumps(value[0]), + value[1], + ) + for i, value in enumerate(values) + ] + for key, values in outliers.items() + ], + [], + ), + columns=columns, + ) + if len(outlier_table) > 0: + print("\n\nOutliers") + if args.output == "csv": + outlier_table.to_csv(sys.stdout, index=False) + else: + outlier_table.to_markdown(sys.stdout, index=False) + print() + + +if __name__ == "__main__": + compare_main(sys.argv) diff --git a/benchmark/tools/compare_test.py b/benchmark/tools/compare_test.py new file mode 100644 index 00000000000..83e2ee5dbda --- /dev/null +++ b/benchmark/tools/compare_test.py @@ -0,0 +1,226 @@ +import json +import compare +import os + +dir_path = os.path.dirname(os.path.realpath(__file__)) + + +def test_mismatch(capsys): + compare.compare_main( + [ + dir_path + "/../test/reference/blas.simple.stdout", + dir_path + "/../test/reference/spmv.matrix.stdout", + ] + ) + captured = capsys.readouterr() + ref_out = {"results": [], "outliers": {}} + + ref_err = """WARNING: Key {"n": 100} found in baseline only in context root +WARNING: Key {"filename": ""} found in comparison only in context root +""" + assert json.loads(captured.out) == ref_out + assert captured.err == ref_err + + +def test_simple(capsys): + compare.compare_main( + [ + dir_path + "/../test/reference/spmv.matrix.stdout", + dir_path + "/../test/reference/spmv.matrix.stdout", + ] + ) + captured = capsys.readouterr() + ref_out = { + "results": [ + { + "cols": 36, + "filename": "", + "nonzeros": 208, + "rows": 36, + "spmv": {"coo": {"storage.ratio": 1.0, "time.ratio": 1.0}}, + } + ], + "outliers": {}, + } + + assert json.loads(captured.out) == ref_out + assert captured.err == "" + + +def test_outliers(capsys): + compare.compare_main( + [ + "--outliers", + dir_path + "/compare_test_input1.json", + dir_path + "/compare_test_input2.json", + ] + ) + captured = capsys.readouterr() + ref_out = { + "results": [ + { + "cols": 36, + "filename": "mtx", + "nonzeros": 208, + "rows": 36, + "spmv": { + "coo": {"storage.ratio": 1.0, "time.ratio": 1.2}, + "csr": {"storage.ratio": 2.0, "time.ratio": 0.8}, + "ell": {"storage.ratio": 0.5, "time.ratio": 1.0}, + "sellp": {"storage.ratio": 1.0, "time.ratio": 1.11}, + "hybrid": {"storage.ratio": 1.0, "time.ratio": 1.01}, + }, + } + ], + "outliers": { + "spmv/coo/time": [{"value": 1.2, "filename": "mtx"}], + "spmv/csr/storage": [{"value": 2.0, "filename": "mtx"}], + "spmv/csr/time": [{"value": 0.8, "filename": "mtx"}], + "spmv/ell/storage": [{"value": 0.5, "filename": "mtx"}], + "spmv/sellp/time": [{"value": 1.11, "filename": "mtx"}], + }, + } + + assert json.loads(captured.out) == ref_out + assert captured.err == "" + + +def test_outliers_imited(capsys): + compare.compare_main( + [ + "--outliers", + "--outlier-count", + "0", + dir_path + "/compare_test_input1.json", + dir_path + "/compare_test_input2.json", + ] + ) + captured = capsys.readouterr() + ref_out = { + "results": [ + { + "cols": 36, + "filename": "mtx", + "nonzeros": 208, + "rows": 36, + "spmv": { + "coo": {"storage.ratio": 1.0, "time.ratio": 1.2}, + "csr": {"storage.ratio": 2.0, "time.ratio": 0.8}, + "ell": {"storage.ratio": 0.5, "time.ratio": 1.0}, + "sellp": {"storage.ratio": 1.0, "time.ratio": 1.11}, + "hybrid": {"storage.ratio": 1.0, "time.ratio": 1.01}, + }, + } + ], + "outliers": {}, + } + + assert json.loads(captured.out) == ref_out + assert captured.err == "" + + +def test_csv(capsys): + compare.compare_main( + [ + "--outliers", + "--output", + "csv", + dir_path + "/compare_test_input1.json", + dir_path + "/compare_test_input2.json", + ] + ) + captured = capsys.readouterr() + ref_out = """benchmark,testcase,ratio +spmv/coo/storage,"{""filename"": ""mtx""}",1.0 +spmv/coo/time,"{""filename"": ""mtx""}",1.2 +spmv/csr/storage,"{""filename"": ""mtx""}",2.0 +spmv/csr/time,"{""filename"": ""mtx""}",0.8 +spmv/ell/storage,"{""filename"": ""mtx""}",0.5 +spmv/ell/time,"{""filename"": ""mtx""}",1.0 +spmv/sellp/storage,"{""filename"": ""mtx""}",1.0 +spmv/sellp/time,"{""filename"": ""mtx""}",1.11 +spmv/hybrid/storage,"{""filename"": ""mtx""}",1.0 +spmv/hybrid/time,"{""filename"": ""mtx""}",1.01 + + +Outliers +benchmark,testcase,ratio +spmv/coo/time,"{""filename"": ""mtx""}",1.2 +spmv/csr/storage,"{""filename"": ""mtx""}",2.0 +spmv/csr/time,"{""filename"": ""mtx""}",0.8 +spmv/ell/storage,"{""filename"": ""mtx""}",0.5 +spmv/sellp/time,"{""filename"": ""mtx""}",1.11 + +""" + assert captured.out == ref_out + assert captured.err == "" + + +def test_md(capsys): + compare.compare_main( + [ + "--outliers", + "--output", + "markdown", + dir_path + "/compare_test_input1.json", + dir_path + "/compare_test_input2.json", + ] + ) + captured = capsys.readouterr() + ref_out = """| benchmark | testcase | ratio | +|:--------------------|:--------------------|--------:| +| spmv/coo/storage | {"filename": "mtx"} | 1 | +| spmv/coo/time | {"filename": "mtx"} | 1.2 | +| spmv/csr/storage | {"filename": "mtx"} | 2 | +| spmv/csr/time | {"filename": "mtx"} | 0.8 | +| spmv/ell/storage | {"filename": "mtx"} | 0.5 | +| spmv/ell/time | {"filename": "mtx"} | 1 | +| spmv/sellp/storage | {"filename": "mtx"} | 1 | +| spmv/sellp/time | {"filename": "mtx"} | 1.11 | +| spmv/hybrid/storage | {"filename": "mtx"} | 1 | +| spmv/hybrid/time | {"filename": "mtx"} | 1.01 | + +Outliers +| benchmark | testcase | ratio | +|:-----------------|:--------------------|--------:| +| spmv/coo/time | {"filename": "mtx"} | 1.2 | +| spmv/csr/storage | {"filename": "mtx"} | 2 | +| spmv/csr/time | {"filename": "mtx"} | 0.8 | +| spmv/ell/storage | {"filename": "mtx"} | 0.5 | +| spmv/sellp/time | {"filename": "mtx"} | 1.11 | +""" + assert captured.out == ref_out + assert captured.err == "" + + +def test_complex(capsys): + compare.compare_main( + [ + dir_path + "/compare_test_input3.json", + dir_path + "/compare_test_input3.json", + ] + ) + captured = capsys.readouterr() + ref_out = { + "results": [ + { + "filename": "mtx", + "solver": { + "gmres": { + "apply": { + "components.ratio": {"foo": 1.0}, + "iterations.ratio": 1.0, + "time.ratio": 1.0, + }, + "generate": {"time.ratio": 1.0}, + } + }, + }, + {"blas": {"axpy": {"time.ratio": 1.0}}, "k": 2, "m": 3, "n": 1, "r": 4}, + {"size": 100, "spmv": {"csr": {"time.ratio": 1.0}}, "stencil": "7pt"}, + ], + "outliers": {}, + } + + assert json.loads(captured.out) == ref_out + assert captured.err == "" diff --git a/benchmark/tools/compare_test_input1.json b/benchmark/tools/compare_test_input1.json new file mode 100644 index 00000000000..da7b190c270 --- /dev/null +++ b/benchmark/tools/compare_test_input1.json @@ -0,0 +1,48 @@ +[ + { + "filename": "mtx", + "spmv": { + "coo": { + "storage": 1000, + "max_relative_norm2": 1.0, + "time": 1.2, + "repetitions": 10, + "completed": true + }, + "csr": { + "storage": 2000, + "max_relative_norm2": 1.0, + "time": 0.8, + "repetitions": 10, + "completed": true + }, + "ell": { + "storage": 500, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + }, + "sellp": { + "storage": 1000, + "max_relative_norm2": 1.0, + "time": 1.11, + "repetitions": 10, + "completed": true + }, + "hybrid": { + "storage": 1000, + "max_relative_norm2": 1.0, + "time": 1.01, + "repetitions": 10, + "completed": true + } + }, + "rows": 36, + "cols": 36, + "nonzeros": 208, + "optimal": { + "spmv": "coo" + } + } +] \ No newline at end of file diff --git a/benchmark/tools/compare_test_input2.json b/benchmark/tools/compare_test_input2.json new file mode 100644 index 00000000000..29a8d348618 --- /dev/null +++ b/benchmark/tools/compare_test_input2.json @@ -0,0 +1,48 @@ +[ + { + "filename": "mtx", + "spmv": { + "coo": { + "storage": 1000, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + }, + "csr": { + "storage": 1000, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + }, + "ell": { + "storage": 1000, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + }, + "sellp": { + "storage": 1000, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + }, + "hybrid": { + "storage": 1000, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + } + }, + "rows": 36, + "cols": 36, + "nonzeros": 208, + "optimal": { + "spmv": "coo" + } + } +] \ No newline at end of file diff --git a/benchmark/tools/compare_test_input3.json b/benchmark/tools/compare_test_input3.json new file mode 100644 index 00000000000..f317073d12d --- /dev/null +++ b/benchmark/tools/compare_test_input3.json @@ -0,0 +1,39 @@ +[ + { + "stencil": "7pt", + "size": 100, + "spmv": { + "csr": { + "time": 0.5 + } + } + }, + { + "n": 1, + "k": 2, + "m": 3, + "r": 4, + "blas": { + "axpy": { + "time": 100 + } + } + }, + { + "filename": "mtx", + "solver": { + "gmres": { + "apply": { + "time": 1.0, + "components": { + "foo": 2.0 + }, + "iterations": 10 + }, + "generate": { + "time": 2.0 + } + } + } + } +] \ No newline at end of file diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp index 241d2225938..e0045d8f417 100644 --- a/benchmark/utils/general.hpp +++ b/benchmark/utils/general.hpp @@ -289,7 +289,7 @@ void backup_results(json& results) return; } std::ofstream ofs(filenames[next]); - ofs << results; + ofs << std::setw(4) << results; next = 1 - next; }