Skip to content

Commit

Permalink
response to comments
Browse files Browse the repository at this point in the history
Signed-off-by: Alexandre Eichenberger <[email protected]>
  • Loading branch information
AlexandreEichenberger committed Feb 20, 2025
1 parent ce1c178 commit 46f08ca
Showing 1 changed file with 53 additions and 44 deletions.
97 changes: 53 additions & 44 deletions utils/RunONNXModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,11 +654,14 @@ def data_without_top_bottom_quartile(data, percent):
################################################################################
# Inference Session implementing RunONNXModel.
#
# Constructor: fetch the model and compile if needed.
# Setup: initialize the inputs, which can come from various sources.
# run_one_inference: run one inference using the inputs set in Setup.
# run: perform one or more inferences depending on -w/-n and perform timing.
# teardown: verify values generated in run, save model/input,...
# Constructor: fetch the model and compile if needed, save model if requested.
# process_inputs: initialize the inputs, which can come from various sources.
# run_inference: run one inference using the inputs set in process_inputs.
# process_output: verify values generated in run, save outputs,...
# process_perf_results: compute and print performance data.
#
# run_performance_test: process inputs, perform several inferences (warmup and perf),
# process performance results and validate outputs,


class InferenceSession:
Expand All @@ -677,7 +680,7 @@ class InferenceSession:
# default_model_name
# model_dir
# session
# inputs (definition of inputs delayed to setup).
# inputs (definition of inputs delayed to process_inputs).
# input_names, output_names
# temp_dir

Expand Down Expand Up @@ -855,11 +858,11 @@ def __init__(self, model_file=None, **kwargs):
os.environ["OM_CONSTANT_PATH"] = self.model_dir

"""
Setup: define the inputs for the model and store them in self.inputs.
process_inputs: define the model inputs for the model and store them in self.inputs.
Print input if requested.
"""

def setup(self, input_feed=None):
def process_inputs(self, input_feed=None):
# Define inputs.
self.inputs = []
if input_feed:
Expand Down Expand Up @@ -917,37 +920,14 @@ def setup(self, input_feed=None):
Perform one inference without any timing.
"""

def run_one_inference(self):
def run_inference(self):
return self.session.run(self.inputs)

"""
Perform a short analysis of time spent in the model.
When requested outputs are printed and/or verified.
When requested outputs are printed, verified, and/or saved.
"""

def teardown(self, outs, perf_results=None):
# Print statistics info, e.g., min/max/stddev inference time.
if args.n_iteration > 1:
print(
" Statistics 1 (excluding warmup),"
" min, {:.6e}, max, {:.6e}, mean, {:.6e}, stdev, {:.6e}".format(
np.min(perf_results),
np.max(perf_results),
np.mean(perf_results),
np.std(perf_results, dtype=np.float64),
)
)
t_perf_results = data_without_top_bottom_quartile(perf_results, 25)
print(
" Statistics 2 (no warmup/quart.),"
" min, {:.6e}, max, {:.6e}, mean, {:.6e}, stdev, {:.6e}".format(
np.min(t_perf_results),
np.max(t_perf_results),
np.mean(t_perf_results),
np.std(t_perf_results, dtype=np.float64),
)
)

def process_outputs(self, outs):
# Print the output if required.
if args.print_output:
for i, out in enumerate(outs):
Expand Down Expand Up @@ -1031,10 +1011,37 @@ def teardown(self, outs, perf_results=None):
)
verify_outs(outs[i], ref_outs[i], args.atol, args.rtol)

"""
Perform a short analysis of time spent in the model.
"""

def process_perf_results(self, perf_results):
# Print statistics info, e.g., min/max/stddev inference time.
if args.n_iteration > 1:
print(
" Statistics 1 (excluding warmup),"
" min, {:.6e}, max, {:.6e}, mean, {:.6e}, stdev, {:.6e}".format(
np.min(perf_results),
np.max(perf_results),
np.mean(perf_results),
np.std(perf_results, dtype=np.float64),
)
)
t_perf_results = data_without_top_bottom_quartile(perf_results, 25)
print(
" Statistics 2 (no warmup/quart.),"
" min, {:.6e}, max, {:.6e}, mean, {:.6e}, stdev, {:.6e}".format(
np.min(t_perf_results),
np.max(t_perf_results),
np.mean(t_perf_results),
np.std(t_perf_results, dtype=np.float64),
)
)

"""
From onnxruntime API:
run(output_names, input_feed)
run_performance_test(output_names, input_feed)
Compute the predictions.
PARAMETERS:
Expand All @@ -1050,27 +1057,29 @@ def teardown(self, outs, perf_results=None):
allow different shape from run to run.
"""

def run(self, output_name=None, input_feed=None, **kwargs):
# Setup inputs.
self.setup(input_feed)
def run_performance_test(self, output_name=None, input_feed=None, **kwargs):
# Process inputs, saved in self.inputs.
self.process_inputs(input_feed)
# Running inference.
print("Running inference ...")
for i in range(args.warmup):
start = time.perf_counter()
outs = self.run_one_inference()
outs = self.run_inference() # Using inputs from self.inputs.
end = time.perf_counter()
print(" {} warmup: {} seconds".format(ordinal(i + 1), end - start))

perf_results = []
for i in range(args.n_iteration):
start = time.perf_counter()
outs = self.run_one_inference()
outs = self.run_inference() # Using inputs from self.inputs.
end = time.perf_counter()
elapsed = end - start
perf_results += [elapsed]
print(" {} iteration, {}, seconds".format(ordinal(i + 1), elapsed))

self.teardown(outs, perf_results)
# Print performance results and verify output.
self.process_perf_results(perf_results)
self.process_outputs(outs)
if output_name:
res = {output_name[i]: outs[i] for i in range(len(outs))}
return res
Expand All @@ -1091,10 +1100,10 @@ def main():
print(parser.format_usage())
exit(1)

# Create inference session and perform a run, which load, compute, and possibly
# verify data.
# Create inference session and perform a performance run test, which load,
# compute, and possibly verify data.
session = InferenceSession()
return session.run()
return session.run_performance_test()


if __name__ == "__main__":
Expand Down

0 comments on commit 46f08ca

Please sign in to comment.