From 88106ca60ea7dfd540b0b3b5eb8f566e8e8b2437 Mon Sep 17 00:00:00 2001 From: Denys Shabalin Date: Wed, 15 Dec 2021 15:29:58 +0000 Subject: [PATCH] Make harness return performance results As discussed in #109 there is an opportunity to connect OpenTuner to tune parameters for existing experts. This change makes test_harness return performance result as a nested dictionary that contains the same information as what's currently printed to stdout. As a result, it can be invoked programmaticaly as part of tuning loop. --- python/examples/core/harness.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/python/examples/core/harness.py b/python/examples/core/harness.py index b7db8f53769a..00915f59f919 100644 --- a/python/examples/core/harness.py +++ b/python/examples/core/harness.py @@ -1,6 +1,7 @@ import sys import os import time +from collections import defaultdict from typing import AbstractSet, Any, Callable, List, Mapping, Optional, Sequence, Union @@ -26,7 +27,7 @@ def log(*args): def timed_invoke(run_n_iters: Callable, gflop_count: float, gbyte_count: float, - n_iters: int): + n_iters: int) -> Mapping[str, Any]: elapsed_ns = run_n_iters(n_iters) elapsed_s = np.flip(np.sort(elapsed_ns / 1.e9)) elapsed_s_per_iter = [ \ @@ -62,6 +63,12 @@ def timed_invoke(run_n_iters: Callable, gflop_count: float, gbyte_count: float, format_str = '{:>12.2f}' * (len(data[0]) - 1) + '{:>12s}' print(format_str.format(*data[i])) + return { + "elapsed_s_per_iter": elapsed_s_per_iter, + "gbyte_per_s_per_iter": gbyte_per_s_per_iter, + "gflop_per_s_per_iter": gflop_per_s_per_iter, + } + # TODO: support more than just RankedTensorType. def get_mlir_abi_compatible_type(value): @@ -195,7 +202,7 @@ def run_n_iters(n_iters: int): run_n_iters(1) # 5. Showtime. - timed_invoke( + return timed_invoke( run_n_iters=run_n_iters, gflop_count=self.problem_definition.gflop_count_builder( runtime_problem_sizes_dict), @@ -227,7 +234,7 @@ def test_harness( n_iters: int = 1, function_name: str = 'tested_function', runtime_only_sizes: AbstractSet[str] = set(), - **kwargs): + **kwargs) -> Mapping[str, Any]: """Test runner facility. Compiles and runs the a test or a benchmark for a cross-product of possible @@ -258,8 +265,12 @@ def test_harness( PyTorch. If the `BENCHMARK_TORCH` environment variable is set and the argument is provided, it will be called `n_iters` times for the purpose of measuring baseline performance. + + Returns: A dictionary of all collected benchmark results. """ + results = defaultdict(lambda: defaultdict(lambda: {})) + for np_types in np_types_list: for problem_sizes_dict in problem_sizes_list: compile_time_problem_sizes_dict = { @@ -286,7 +297,7 @@ def test_harness( transform=expert, dump_ir_to_file=kwargs.get('dump_ir_to_file', '')) - problem.run( + results['expert'][expert] = problem.run( n_iters=n_iters, entry_point_name='main', runtime_problem_sizes_dict=runtime_problem_sizes_dict, @@ -301,7 +312,7 @@ def test_harness( print('\nNumPy reference\n') args = problem_definition.tensors_np_builder(problem_sizes_dict, np_types) - timed_invoke( + results['numpy'] = timed_invoke( lambda n: _run_benchmark_n_iters(kwargs['numpy_benchmark'], n, args, problem_sizes_dict, np_types), gflops, gbytes, n_iters) @@ -313,7 +324,9 @@ def test_harness( numpy_args = problem_definition.tensors_np_builder( problem_sizes_dict, np_types) args = list(map(torch.from_numpy, numpy_args)) - timed_invoke( + results['pytorch'] = timed_invoke( lambda n: _run_benchmark_n_iters(kwargs[ 'pytorch_benchmark'], n, args, problem_sizes_dict, np_types), gflops, gbytes, n_iters) + + return results