Skip to content

Commit

Permalink
improve benchmarks automation
Browse files Browse the repository at this point in the history
This patch:
 - uses geomean instead of arithmetic mean for calculating summary
 - adds an option to run a benchmark a few times to pick a median value
 - adds a timeout for benchmarks, set at 10 minutes by default.
 - adds an option to filter out benchmarks by name
 - adds an option to pick a specific compiler commit to test with
  • Loading branch information
pbalcer committed Jul 26, 2024
1 parent 2baf095 commit ae92874
Show file tree
Hide file tree
Showing 11 changed files with 100 additions and 50 deletions.
21 changes: 18 additions & 3 deletions .github/workflows/benchmarks_compute.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,23 @@ on:
type: string
required: false
default: ''
sycl_repo:
description: 'Compiler repo'
type: string
required: true
default: 'intel/llvm'
sycl_commit:
description: 'Compiler commit'
type: string
required: false
default: ''

permissions:
contents: read
pull-requests: write

jobs:
e2e-build-hw:
# Run only on upstream; forks will not have the HW
# if: github.repository == 'oneapi-src/unified-runtime'
name: Build SYCL, UR, run Compute Benchmarks
strategy:
matrix:
Expand Down Expand Up @@ -105,12 +113,19 @@ jobs:
- name: Checkout SYCL
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
repository: intel/llvm
repository: ${{inputs.sycl_repo}}
ref: refs/heads/sycl
path: sycl-repo
fetch-depth: 1
fetch-tags: false

- name: Fetch specific SYCL commit
if: inputs.sycl_commit != ''
working-directory: ./sycl-repo
run: |
git fetch --depth=1 origin ${{ inputs.sycl_commit }}
git checkout ${{ inputs.sycl_commit }}
- name: Set CUDA env vars
if: matrix.adapter.str_name == 'cuda'
run: |
Expand Down
3 changes: 3 additions & 0 deletions scripts/benchmarks/benches/SobelFilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
class SobelFilter(VelocityBase):
def __init__(self, vb: VelocityBench):
super().__init__("sobel_filter", "sobel_filter", vb)

def download_deps(self):
self.download_untar("sobel_filter", "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=", "sobel_filter_data.tgz")
return

def name(self):
return "Velocity-Bench Sobel Filter"
Expand Down
18 changes: 7 additions & 11 deletions scripts/benchmarks/benches/api_overhead.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@
from .result import Result
from .options import options

## TODO: create a generic ComputeBenchmarks class that specific scenarios can inherit
class APIOverheadSYCL(Benchmark):
def __init__(self, directory):
def __init__(self, ioq, directory):
self.ioq = ioq
super().__init__(directory)

def name(self):
return "api_overhead_benchmark_sycl, mean execution time per 10 kernels"
order = "in order" if self.ioq else "out of order"
return f"api_overhead_benchmark_sycl {order}, mean execution time per 10 kernels"

def unit(self):
return "μs"
Expand All @@ -39,11 +42,11 @@ def setup(self):
run(f"cmake --build {build_path} -j", add_sycl=True)
self.benchmark_bin = f"{build_path}/bin/api_overhead_benchmark_sycl"

def run_internal(self, ioq, env_vars):
def run(self, env_vars) -> Result:
command = [
f"{self.benchmark_bin}",
"--test=SubmitKernel",
f"--Ioq={ioq}",
f"--Ioq={self.ioq}",
"--DiscardEvents=0",
"--MeasureCompletion=0",
"--iterations=100000",
Expand All @@ -57,13 +60,6 @@ def run_internal(self, ioq, env_vars):
(label, mean) = self.parse_output(result)
return Result(label=label, value=mean, command=command, env=env_vars, stdout=result)

def run(self, env_vars) -> list[Result]:
results = []
for ioq in [0, 1]:
results.append(self.run_internal(ioq, env_vars))

return results

def parse_output(self, output):
csv_file = io.StringIO(output)
reader = csv.reader(csv_file)
Expand Down
2 changes: 1 addition & 1 deletion scripts/benchmarks/benches/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def unit(self):
def setup(self):
raise NotImplementedError()

def run(self, env_vars):
def run(self, env_vars) -> Result:
raise NotImplementedError()

def teardown(self):
Expand Down
2 changes: 2 additions & 0 deletions scripts/benchmarks/benches/easywave.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
class Easywave(VelocityBase):
def __init__(self, vb: VelocityBench):
super().__init__("easywave", "easyWave_sycl", vb)

def download_deps(self):
self.download_untar("easywave", "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz", "examples.tar.gz")

def name(self):
Expand Down
2 changes: 2 additions & 0 deletions scripts/benchmarks/benches/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ class Options:
sycl: str = ""
rebuild: bool = True
benchmark_cwd: str = "INVALID"
timeout: float = 600
iterations: int = 5

options = Options()

4 changes: 2 additions & 2 deletions scripts/benchmarks/benches/quicksilver.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ def __init__(self, vb: VelocityBench):
super().__init__("QuickSilver", "qs", vb)
self.data_path = os.path.join(vb.repo_path, "QuickSilver", "Examples", "AllScattering")

def run(self, env_vars) -> list[Result]:
def run(self, env_vars) -> Result:
# TODO: fix the crash in QuickSilver when UR_L0_USE_IMMEDIATE_COMMANDLISTS=0
if 'UR_L0_USE_IMMEDIATE_COMMANDLISTS' in env_vars and env_vars['UR_L0_USE_IMMEDIATE_COMMANDLISTS'] == '0':
return []
return None

return super().run(env_vars)

Expand Down
9 changes: 7 additions & 2 deletions scripts/benchmarks/benches/velocity.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ def __init__(self, name: str, bin_name: str, vb: VelocityBench):
self.bin_name = bin_name
self.code_path = os.path.join(self.vb.repo_path, self.bench_name, 'SYCL')

def download_deps(self):
return

def setup(self):
self.download_deps()

build_path = self.create_build_path(self.bench_name)

configure_command = [
Expand All @@ -47,7 +52,7 @@ def extra_env_vars(self) -> dict:
def parse_output(self, stdout: str) -> float:
raise NotImplementedError()

def run(self, env_vars) -> list[Result]:
def run(self, env_vars) -> Result:
env_vars.update(self.extra_env_vars())

command = [
Expand All @@ -57,7 +62,7 @@ def run(self, env_vars) -> list[Result]:

result = self.run_bench(command, env_vars)

return [Result(label=self.bench_name, value=self.parse_output(result), command=command, env=env_vars, stdout=result)]
return Result(label=self.bench_name, value=self.parse_output(result), command=command, env=env_vars, stdout=result)

def teardown(self):
return
50 changes: 39 additions & 11 deletions scripts/benchmarks/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import os
from utils.utils import prepare_workdir, load_benchmark_results, save_benchmark_results;
from benches.api_overhead import APIOverheadSYCL
from benches.hashtable import Hashtable
Expand All @@ -18,11 +17,12 @@
from benches.options import options
from output import generate_markdown
import argparse
import re

# Update this if you are changing the layout of the results files
INTERNAL_WORKDIR_VERSION = '1.0'
INTERNAL_WORKDIR_VERSION = '1.3'

def main(directory, additional_env_vars, save_name, compare_names):
def main(directory, additional_env_vars, save_name, compare_names, filter):
variants = [
({'UR_L0_USE_IMMEDIATE_COMMANDLISTS': '0'}, "Imm-CmdLists-OFF"),
({'UR_L0_USE_IMMEDIATE_COMMANDLISTS': '1'}, ""),
Expand All @@ -33,7 +33,8 @@ def main(directory, additional_env_vars, save_name, compare_names):
vb = VelocityBench(directory)

benchmarks = [
APIOverheadSYCL(directory),
APIOverheadSYCL(0, directory),
APIOverheadSYCL(1, directory),
Hashtable(vb),
Bitcracker(vb),
#CudaSift(vb), TODO: the benchmark is passing, but is outputting "Failed to allocate device data"
Expand All @@ -42,22 +43,42 @@ def main(directory, additional_env_vars, save_name, compare_names):
SobelFilter(vb)
]

if filter:
benchmarks = [benchmark for benchmark in benchmarks if filter.search(benchmark.name())]

for benchmark in benchmarks:
print(f"setting up {benchmark.name()}... ", end='', flush=True)
benchmark.setup()
print("complete.")

results = []
for benchmark in benchmarks:
for env_vars, extra_label in variants:
merged_env_vars = {**env_vars, **additional_env_vars}
bench_results = benchmark.run(merged_env_vars)
for res in bench_results:
res.unit = benchmark.unit()
res.name = benchmark.name()
res.label += f" {extra_label}"
results.append(res)
iteration_results = []
for iter in range(options.iterations):
print(f"running {benchmark.name()} {extra_label}, iteration {iter}... ", end='', flush=True)
bench_results = benchmark.run(merged_env_vars)
if bench_results is not None:
print(f"complete ({bench_results.value} {bench_results.unit}).")
iteration_results.append(bench_results)
else:
print(f"did not finish.")

iteration_results.sort(key=lambda res: res.value)
median_index = len(iteration_results) // 2
median_result = iteration_results[median_index]

median_result.unit = benchmark.unit()
median_result.name = benchmark.name()
median_result.label += f" {extra_label}"

results.append(median_result)

for benchmark in benchmarks:
print(f"tearing down {benchmark.name()}... ", end='', flush=True)
benchmark.teardown()
print("complete.")

chart_data = {"This PR" : results}

Expand Down Expand Up @@ -93,11 +114,18 @@ def validate_and_parse_env_args(env_args):
parser.add_argument("--env", type=str, help='Use env variable for a benchmark run.', action="append", default=[])
parser.add_argument("--save", type=str, help='Save the results for comparison under a specified name.')
parser.add_argument("--compare", type=str, help='Compare results against previously saved data.', action="append", default=["baseline"])
parser.add_argument("--iterations", type=int, help='Number of times to run each benchmark to select a median value.', default=3)
parser.add_argument("--timeout", type=int, help='Timeout for individual benchmarks in seconds.', default=600)
parser.add_argument("--filter", type=str, help='Regex pattern to filter benchmarks by name.', default=None)

args = parser.parse_args()
additional_env_vars = validate_and_parse_env_args(args.env)

options.rebuild = not args.no_rebuild
options.sycl = args.sycl
options.iterations = args.iterations
options.timeout = args.timeout

benchmark_filter = re.compile(args.filter) if args.filter else None

main(args.benchmark_directory, additional_env_vars, args.save, args.compare)
main(args.benchmark_directory, additional_env_vars, args.save, args.compare, benchmark_filter)
35 changes: 18 additions & 17 deletions scripts/benchmarks/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import collections
from benches.base import Result
import math

# Function to generate the mermaid bar chart script
def generate_mermaid_script(chart_data: dict[str, list[Result]]):
Expand Down Expand Up @@ -84,28 +85,28 @@ def generate_markdown_details(results: list[Result]):
return "\n".join(markdown_sections)

def generate_summary(chart_data: dict[str, list[Result]]) -> str:
# Calculate the mean value of "This PR" for each benchmark
this_pr_means = {}
# Calculate the geometric mean value of "This PR" for each benchmark
this_pr_geomeans = {}
for res in chart_data["This PR"]:
if res.name not in this_pr_means:
this_pr_means[res.name] = []
this_pr_means[res.name].append(res.value)
for bname in this_pr_means:
this_pr_means[bname] = sum(this_pr_means[bname]) / len(this_pr_means[bname])

# Calculate the percentage for each entry relative to "This PR"
if res.name not in this_pr_geomeans:
this_pr_geomeans[res.name] = []
this_pr_geomeans[res.name].append(res.value)
for bname in this_pr_geomeans:
product = math.prod(this_pr_geomeans[bname])
this_pr_geomeans[bname] = product ** (1 / len(this_pr_geomeans[bname]))

# Calculate the percentage for each entry relative to "This PR" using geometric mean
summary_data = {"This PR": 100}
for entry_name, results in chart_data.items():
if entry_name == "This PR":
continue
entry_sum = 0
for res in results:
if res.name in this_pr_means:
percentage = (res.value / this_pr_means[res.name]) * 100
entry_sum += percentage

entry_average = entry_sum / len(results) if results else 0
summary_data[entry_name] = entry_average
entry_product = math.prod([res.value for res in results if res.name in this_pr_geomeans])
entry_geomean = entry_product ** (1 / len(results)) if results else 0
if entry_geomean and this_pr_geomeans.get(results[0].name):
percentage = (entry_geomean / this_pr_geomeans[results[0].name]) * 100
else:
percentage = 0
summary_data[entry_name] = percentage

markdown_table = "| Name | Result % |\n| --- | --- |\n"
for entry_name, percentage in summary_data.items():
Expand Down
4 changes: 1 addition & 3 deletions scripts/benchmarks/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,7 @@ def run(command, env_vars={}, cwd=None, add_sycl=False):
env['LD_LIBRARY_PATH'] = sycl_lib_path + os.pathsep + env.get('LD_LIBRARY_PATH', '')

env.update(env_vars)
result = subprocess.run(command, cwd=cwd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) # nosec B603
print(result.stdout.decode())
print(result.stderr.decode())
result = subprocess.run(command, cwd=cwd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, timeout=options.timeout) # nosec B603
return result
except subprocess.CalledProcessError as e:
print(e.stdout.decode())
Expand Down

0 comments on commit ae92874

Please sign in to comment.