Skip to content

Commit

Permalink
Merge pull request #2439 from pbalcer/explicit-benchmark-groups
Browse files Browse the repository at this point in the history
[benchmarks] add explicit benchmark groups
  • Loading branch information
pbalcer authored Dec 9, 2024
2 parents 5c10466 + 10f499a commit d3b81bf
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 20 deletions.
15 changes: 14 additions & 1 deletion scripts/benchmarks/benches/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ def extra_env_vars(self) -> dict:
def setup(self):
self.benchmark_bin = os.path.join(self.bench.directory, 'compute-benchmarks-build', 'bin', self.bench_name)

def explicit_group(self):
return ""

def run(self, env_vars) -> list[Result]:
command = [
f"{self.benchmark_bin}",
Expand All @@ -120,7 +123,8 @@ def run(self, env_vars) -> list[Result]:
ret = []
for label, median, stddev, unit in parsed_results:
extra_label = " CPU count" if parse_unit_type(unit) == "instr" else ""
ret.append(Result(label=self.name() + extra_label, value=median, stddev=stddev, command=command, env=env_vars, stdout=result, unit=parse_unit_type(unit)))
explicit_group = self.explicit_group() + extra_label if self.explicit_group() != "" else ""
ret.append(Result(label=self.name() + extra_label, explicit_group=explicit_group, value=median, stddev=stddev, command=command, env=env_vars, stdout=result, unit=parse_unit_type(unit)))
return ret

def parse_output(self, output):
Expand Down Expand Up @@ -158,6 +162,9 @@ def name(self):
order = "in order" if self.ioq else "out of order"
return f"api_overhead_benchmark_sycl SubmitKernel {order}"

def explicit_group(self):
return "SubmitKernel"

def bin_args(self) -> list[str]:
return [
f"--Ioq={self.ioq}",
Expand All @@ -178,6 +185,9 @@ def name(self):
order = "in order" if self.ioq else "out of order"
return f"api_overhead_benchmark_ur SubmitKernel {order}"

def explicit_group(self):
return "SubmitKernel"

def bin_args(self) -> list[str]:
return [
f"--Ioq={self.ioq}",
Expand All @@ -198,6 +208,9 @@ def name(self):
order = "in order" if self.ioq else "out of order"
return f"api_overhead_benchmark_l0 SubmitKernel {order}"

def explicit_group(self):
return "SubmitKernel"

def bin_args(self) -> list[str]:
return [
f"--Ioq={self.ioq}",
Expand Down
1 change: 1 addition & 0 deletions scripts/benchmarks/benches/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class Result:
stdout: str
passed: bool = True
unit: str = ""
explicit_group: str = ""
# stddev can be optionally set by the benchmark,
# if not set, it will be calculated automatically.
stddev: float = 0.0
Expand Down
19 changes: 10 additions & 9 deletions scripts/benchmarks/benches/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,30 +20,31 @@ def setup(self):

def benchmarks(self) -> list[Benchmark]:
bench_configs = [
("Memory Bandwidth", 2000, 200),
("Latency", 100, 20),
("Throughput", 1500, 150),
("FLOPS", 3000, 300),
("Cache Miss Rate", 250, 25),
("Memory Bandwidth", 2000, 200, "Foo Group"),
("Latency", 100, 20, "Bar Group"),
("Throughput", 1500, 150, "Foo Group"),
("FLOPS", 3000, 300, "Foo Group"),
("Cache Miss Rate", 250, 25, "Bar Group"),
]

result = []
for base_name, base_value, base_diff in bench_configs:
for base_name, base_value, base_diff, group in bench_configs:
for variant in range(6):
value_multiplier = 1.0 + (variant * 0.2)
name = f"{base_name} {variant+1}"
value = base_value * value_multiplier
diff = base_diff * value_multiplier

result.append(TestBench(name, value, diff))
result.append(TestBench(name, value, diff, group))

return result

class TestBench(Benchmark):
def __init__(self, name, value, diff):
def __init__(self, name, value, diff, group = ''):
self.bname = name
self.value = value
self.diff = diff
self.group = group
super().__init__("")

def name(self):
Expand All @@ -58,7 +59,7 @@ def setup(self):
def run(self, env_vars) -> list[Result]:
random_value = self.value + random.uniform(-1 * (self.diff), self.diff)
return [
Result(label=self.name(), value=random_value, command="", env={"A": "B"}, stdout="no output", unit="ms")
Result(label=self.name(), explicit_group=self.group, value=random_value, command="", env={"A": "B"}, stdout="no output", unit="ms")
]

def teardown(self):
Expand Down
6 changes: 5 additions & 1 deletion scripts/benchmarks/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,9 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
# should this be configurable?
history.load(1000)

# remove duplicates. this can happen if e.g., --compare baseline is specified manually.
compare_names = list(dict.fromkeys(compare_names))

for name in compare_names:
compare_result = history.get_compare(name)
if compare_result:
Expand All @@ -203,7 +206,8 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
# Otherwise we might be comparing the results to themselves.
if not options.dry_run:
history.save(saved_name, results, save_name is not None)
compare_names.append(saved_name)
if saved_name not in compare_names:
compare_names.append(saved_name)

if options.output_html:
html_content = generate_html(history.runs, 'oneapi-src/unified-runtime', compare_names)
Expand Down
182 changes: 173 additions & 9 deletions scripts/benchmarks/output_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from dataclasses import dataclass
import matplotlib.dates as mdates
from benches.result import BenchmarkRun, Result
import numpy as np

@dataclass
class BenchmarkMetadata:
Expand All @@ -23,11 +24,14 @@ class BenchmarkSeries:
runs: list[BenchmarkRun]

@dataclass
class BenchmarkTimeSeries:
class BenchmarkChart:
label: str
html: str

def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str) -> list[BenchmarkTimeSeries]:
def tooltip_css() -> str:
return '.mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}'

def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str) -> list[BenchmarkChart]:
plt.close('all')

num_benchmarks = len(benchmarks)
Expand Down Expand Up @@ -66,7 +70,7 @@ def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str
for point in sorted_points]

tooltip = mpld3.plugins.PointHTMLTooltip(scatter, tooltip_labels,
css='.mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}',
css=tooltip_css(),
targets=targets)
mpld3.plugins.connect(fig, tooltip)

Expand Down Expand Up @@ -94,7 +98,104 @@ def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter('%Y-%m-%d %H:%M:%S'))

plt.tight_layout()
html_charts.append(BenchmarkTimeSeries(html=mpld3.fig_to_html(fig), label=benchmark.label))
html_charts.append(BenchmarkChart(html=mpld3.fig_to_html(fig), label=benchmark.label))
plt.close(fig)

return html_charts

@dataclass
class ExplicitGroup:
name: str
nnames: int
metadata: BenchmarkMetadata
runs: dict[str, dict[str, Result]]

def create_explicit_groups(benchmark_runs: list[BenchmarkRun], compare_names: list[str]) -> list[ExplicitGroup]:
groups = {}

for run in benchmark_runs:
if run.name in compare_names:
for res in run.results:
if res.explicit_group != '':
if res.explicit_group not in groups:
groups[res.explicit_group] = ExplicitGroup(name=res.explicit_group, nnames=len(compare_names),
metadata=BenchmarkMetadata(unit=res.unit, lower_is_better=res.lower_is_better),
runs={})

group = groups[res.explicit_group]
if res.label not in group.runs:
group.runs[res.label] = {name: None for name in compare_names}

if group.runs[res.label][run.name] is None:
group.runs[res.label][run.name] = res

return list(groups.values())

def create_grouped_bar_charts(groups: list[ExplicitGroup]) -> list[BenchmarkChart]:
plt.close('all')

html_charts = []

for group in groups:
fig, ax = plt.subplots(figsize=(10, 6))

x = np.arange(group.nnames)
x_labels = []
width = 0.8 / len(group.runs)

max_height = 0

for i, (run_name, run_results) in enumerate(group.runs.items()):
offset = width * i

positions = x + offset
x_labels = run_results.keys()
valid_data = [r.value if r is not None else 0 for r in run_results.values()]
rects = ax.bar(positions, valid_data, width, label=run_name)
# This is a hack to disable all bar_label. Setting labels to empty doesn't work.
# We create our own labels below for each bar, this works better in mpld3.
ax.bar_label(rects, fmt='')

for rect, run, res in zip(rects, run_results.keys(), run_results.values()):
height = rect.get_height()
if height > max_height:
max_height = height

ax.text(rect.get_x() + rect.get_width()/2., height + 2,
f'{res.value:.1f}',
ha='center', va='bottom', fontsize=9)

tooltip_labels = [
f"Run: {run}\n"
f"Label: {res.label}\n"
f"Value: {res.value:.2f} {res.unit}\n"
]
tooltip = mpld3.plugins.LineHTMLTooltip(rect, tooltip_labels, css=tooltip_css())
mpld3.plugins.connect(ax.figure, tooltip)

ax.set_xticks([])
ax.grid(True, axis='y', alpha=0.2)
ax.set_ylabel(f"Value ({group.metadata.unit})")
ax.legend(loc='upper left')
ax.set_title(group.name, pad=20)
performance_indicator = "lower is better" if group.metadata.lower_is_better else "higher is better"
ax.text(0.5, 1.03, f"({performance_indicator})",
ha='center',
transform=ax.transAxes,
style='italic',
fontsize=7,
color='#666666')

for idx, label in enumerate(x_labels):
# this is a hack to get labels to show above the legend
# we normalize the idx to transAxes transform and offset it a little.
x_norm = (idx + 0.3 - ax.get_xlim()[0]) / (ax.get_xlim()[1] - ax.get_xlim()[0])
ax.text(x_norm, 1.00, label,
transform=ax.transAxes,
color='#666666')

plt.tight_layout()
html_charts.append(BenchmarkChart(label=group.name, html=mpld3.fig_to_html(fig)))
plt.close(fig)

return html_charts
Expand Down Expand Up @@ -138,6 +239,11 @@ def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_
timeseries = create_time_series_chart(benchmarks, github_repo)
timeseries_charts_html = '\n'.join(f'<div class="chart" data-label="{ts.label}"><div>{ts.html}</div></div>' for ts in timeseries)

explicit_groups = create_explicit_groups(benchmark_runs, compare_names)

bar_charts = create_grouped_bar_charts(explicit_groups)
bar_charts_html = '\n'.join(f'<div class="chart" data-label="{bc.label}"><div>{bc.html}</div></div>' for bc in bar_charts)

html_template = f"""
<!DOCTYPE html>
<html>
Expand Down Expand Up @@ -199,21 +305,72 @@ def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_
width: 400px;
max-width: 100%;
}}
details {{
margin-bottom: 24px;
}}
summary {{
font-size: 18px;
font-weight: 500;
cursor: pointer;
padding: 12px;
background: #e9ecef;
border-radius: 8px;
user-select: none;
}}
summary:hover {{
background: #dee2e6;
}}
</style>
<script>
function getQueryParam(param) {{
const urlParams = new URLSearchParams(window.location.search);
return urlParams.get(param);
}}
function filterCharts() {{
const regexInput = document.getElementById('bench-filter').value;
const regex = new RegExp(regexInput, 'i');
const charts = document.querySelectorAll('.chart');
let timeseriesVisible = false;
let barChartsVisible = false;
charts.forEach(chart => {{
const label = chart.getAttribute('data-label');
if (regex.test(label)) {{
chart.style.display = '';
if (chart.closest('.timeseries')) {{
timeseriesVisible = true;
}} else if (chart.closest('.bar-charts')) {{
barChartsVisible = true;
}}
}} else {{
chart.style.display = 'none';
}}
}});
updateURL(regexInput);
document.querySelector('.timeseries').open = timeseriesVisible;
document.querySelector('.bar-charts').open = barChartsVisible;
}}
function updateURL(regex) {{
const url = new URL(window.location);
if (regex) {{
url.searchParams.set('regex', regex);
}} else {{
url.searchParams.delete('regex');
}}
history.replaceState(null, '', url);
}}
document.addEventListener('DOMContentLoaded', (event) => {{
const regexParam = getQueryParam('regex');
if (regexParam) {{
document.getElementById('bench-filter').value = regexParam;
filterCharts();
}}
}});
</script>
</head>
<body>
Expand All @@ -222,13 +379,20 @@ def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_
<div class="filter-container">
<input type="text" id="bench-filter" placeholder="Regex..." oninput="filterCharts()">
</div>
<h2>Historical Results</h2>
<div class="charts">
{timeseries_charts_html}
</div>
<details class="timeseries">
<summary>Historical Results</summary>
<div class="charts">
{timeseries_charts_html}
</div>
</details>
<details class="bar-charts">
<summary>Comparisons</summary>
<div class="charts">
{bar_charts_html}
</div>
</details>
</div>
</body>
</html>
"""

return html_template

0 comments on commit d3b81bf

Please sign in to comment.