Skip to content

Commit

Permalink
Working Performance test, Energy still in trial
Browse files Browse the repository at this point in the history
  • Loading branch information
TimothyNiven committed Dec 17, 2024
1 parent cf6c1ee commit e90d24c
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 46 deletions.
30 changes: 10 additions & 20 deletions benchmark/runner/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ def run_test(devices_config, dut_config, test_script, dataset_path, mode):
manager = DeviceManager(devices_config)
manager.scan()
power = manager.get("power", {}).get("instance")
print(f"Power instance: {power}")

if power and dut_config and dut_config.get("voltage"):
power.configure_voltage(dut_config["voltage"])
identify_dut(manager) # hangs in identify_dut()=>init_dut()=>time.sleep()
Expand Down Expand Up @@ -116,7 +118,14 @@ def normalize_probabilities(probabilities):

return probabilities

def summarize_result(result):
def summarize_result(result, mode="a"):
"""
Summarizes results based on mode:
- 'a' : Accuracy and AUC calculations
- 'p' : Performance metrics like runtime and throughput
- 'e' : Reserved for energy calculations (to be implemented)
"""
# Define the current time for performance mode print statements
num_correct = 0 # Initialize the counter for correct predictions

# Store true labels and predicted probabilities for AUC calculation
Expand Down Expand Up @@ -155,26 +164,7 @@ def summarize_result(result):
print(f"AUC: {auc_score:.4f}")
except ValueError as e:
print(f"AUC calculation failed: {e}")

else:
# For multi-class classification
# Dynamically handle the number of classes based on the unique values in true_labels
unique_classes = np.unique(true_labels)
num_classes = len(unique_classes)

# Adjust the probabilities to match the number of classes in true_labels
predicted_probabilities = np.array([prob[:num_classes] for prob in predicted_probabilities])

# Calculate accuracy
accuracy = num_correct / len(result)
print(f"Accuracy = {num_correct}/{len(result)} = {100*accuracy:4.2f}%")

# Compute AUC for multi-class classification using one-vs-rest (macro-average AUC)
try:
auc_score = roc_auc_score(true_labels, predicted_probabilities, multi_class="ovr", average="macro")
print(f"Macro-average AUC: {auc_score:.4f}")
except ValueError as e:
print(f"AUC calculation failed: {e}")

if __name__ == '__main__':
parser = argparse.ArgumentParser(prog="TestRunner", description=__doc__)
Expand Down
155 changes: 133 additions & 22 deletions benchmark/runner/script.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import re

import numpy as np
from datetime import datetime
from device_under_test import DUT # Import DUT class
from power_manager import PowerManager

class _ScriptStep:
"""Base class for script steps"""
Expand All @@ -13,12 +16,25 @@ def __init__(self, index=None):
self._index = None if index is None else int(index)

def run(self, io, dut, dataset, mode):
# Fetch the file and data
file_truth, data = dataset.get_file_by_index(self._index)

# Define the current time for formatted output
current_time = datetime.now()
formatted_time = current_time.strftime("%m%d.%H%M%S")

# Conditional print statements based on 'mode'
if data:
print(f"Loading file {file_truth.get('file'):30}, true class = {int(file_truth.get('class')):2}")
if mode == "a":
print(f"Loading file {file_truth.get('file'):30}, true class = {int(file_truth.get('class')):2}")
elif mode == "p":
print(f"{formatted_time} ulp-mlperf: Runtime requirements have been met.")
elif mode == "e":
pass # Do nothing for energy mode
dut.load(data)
else:
print(f"WARNING: No data returned from dataset read. Script index = {self._index}, Dataset index = {dataset._current_index}")
print(f"WARNING: No data returned from dataset read. Script index = {self._index}, Dataset index = {dataset._current_index}")

return file_truth


Expand Down Expand Up @@ -49,12 +65,14 @@ def run(self, io, dut, dataset, mode):

class _ScriptInferStep(_ScriptStep):
"""Step to execute infer on the DUT"""
def __init__(self, iterations=1, warmups=0):
def __init__(self, iterations=1, warmups=0, loop_count=None):
self._iterations = int(iterations)
self._warmups = int(warmups)
self._infer_results = None
self._power_samples = []
self._power_timestamps = []
self.throughput_values = []
self._loop_count = loop_count # Store loop_count passed to this step

def run(self, io, dut, dataset, mode): # mode passed to run
result = dut.infer(self._iterations, self._warmups)
Expand All @@ -66,32 +84,31 @@ def run(self, io, dut, dataset, mode): # mode passed to run
timestamps, samples = _ScriptInferStep._gather_power_results(dut.power_manager)
print(f"samples:{len(samples)} timestamps:{len(timestamps)}")
result.update(power=dict(samples=samples,
timestamps=timestamps
)
)

# Print accuracy results (old method inside print_accuracy_results)
timestamps=timestamps))

if mode == "a":
self._print_accuracy_results(infer_results)
elif mode == "e":
self._print_energy_results(infer_results) # Assuming you have this function
self._print_energy_results(infer_results)
elif mode == "p":
self._print_performance_results(infer_results) # Assuming you have this function
self._print_performance_results(infer_results)

return result

@staticmethod
def _gather_infer_results(cmd_results):
result = {}
total_inferences = 0
for res in cmd_results:
match = re.match(r'^m-results-\[([^]]+)\]$', res)
if match:
try:
# Split by comma and filter out empty strings
result["results"] = [float(x) for x in match.group(1).split(',') if x.strip()]
results = [float(x) for x in match.group(1).split(',') if x.strip()]
result["results"] = results
total_inferences += len(results)
except ValueError as e:
print(f"ERROR: Failed to parse infer results: {e}. Data: {match.group(1)}")
result["results"] = [] # Handle the error by returning an empty list
result["results"] = []
continue
match = re.match(r'^m-lap-us-([0-9]+)$', res)
if match:
Expand All @@ -101,8 +118,8 @@ def _gather_infer_results(cmd_results):
result["elapsed_time"] = result["end_time"] - result["start_time"]
else:
print("ERROR: Incomplete time data, missing start_time or end_time.")
result["total_inferences"] = total_inferences
return result


@staticmethod
def _gather_power_results(power):
Expand All @@ -119,16 +136,106 @@ def _gather_power_results(power):
return timeStamps, samples

def _print_accuracy_results(self, infer_results):
# This function will print the accuracy results using the old method
print(f" Results = {infer_results['results']}, time={infer_results['elapsed_time']} us")

def _print_energy_results(self, infer_results):
# Assuming energy-related data is available in infer_results, adapt as necessary
print("TEST ENERGY SUCCESS")
"""
Accumulates energy values for all loop iterations and calculates median energy per inference
at the end of all iterations.
Entire thing is under test I do not have the energy board
"""
# Use the total_inferences from _gather_infer_results results
num_inferences = self._iterations

# Get the current time in the desired format
current_time = datetime.now()
formatted_time = current_time.strftime("%m%d.%H%M%S")

# Initialize the PowerManager with the correct port and baud rate
port_device = "/dev/ttyUSB0" # replace with your serial port device
power_manager = PowerManager(port_device)

# Use the power manager to gather energy and power results
with power_manager:
timestamps, power_samples = _ScriptInferStep._gather_power_results(power_manager)

# Calculate total energy (sum of power * time intervals)
total_energy = sum([power_samples[i] * (timestamps[i+1][0] - timestamps[i][0])
for i in range(len(power_samples)-1)])

# Calculate average power (mean of the recorded power samples)
average_power = np.mean(power_samples) if power_samples else 0

# Calculate energy per inference (total energy divided by number of inferences)
energy_per_inference = total_energy / num_inferences if num_inferences > 0 else 0

# Print energy results for each window
print(f"{formatted_time} ulp-ml: Energy data for window {len(self.throughput_values)} at time {timestamps[-1][0]:.2f} for {timestamps[-1][0] - timestamps[0][0]:.2f} sec.:")
print(f"{formatted_time} ulp-ml: Energy : {total_energy:>13.3f} uJ")
print(f"{formatted_time} ulp-ml: Power : {average_power:>13.3f} uW")
print(f"{formatted_time} ulp-ml: Energy/Inf. : {energy_per_inference:>13.3f} uJ/inf.")

# Store energy values for calculating median
self.energy_values.append(energy_per_inference)

# Check if we've completed all loop iterations
if len(self.energy_values) == self._loop_count:
# Calculate the median energy per inference after all loop iterations
total_median_energy = np.median(self.energy_values)

# Store the result for later use
self.median_energy = total_median_energy

# Print the new formatted output with median energy per inference
print(f"{formatted_time} ulp-ml: ---------------------------------------------------------")
print(f"{formatted_time} ulp-ml: Median energy cost is {self.median_energy:>10.3f} uJ/inf.")
print(f"{formatted_time} ulp-ml: ---------------------------------------------------------")

def _print_performance_results(self, infer_results):
# Assuming performance-related data is available in infer_results, adapt as necessary
print("TEST PERFORMANCE SUCCESS")
"""
Accumulates throughput values for all loop iterations and calculates median throughput
at the end of all iterations.
"""
# Use the total_inferences from _gather_infer_results results
num_inferences = self._iterations

# Retrieve elapsed time (in microseconds)
elapsed_time_us = infer_results.get("elapsed_time", 0)

# Get the current time in the desired format
current_time = datetime.now()
formatted_time = current_time.strftime("%m%d.%H%M%S")

# Calculate throughput
if elapsed_time_us > 0:
elapsed_time_sec = elapsed_time_us / 1_000_000 # Convert to seconds
throughput = num_inferences / elapsed_time_sec # Calculate throughput
else:
elapsed_time_sec = 0
throughput = 0

# Add throughput to the list of throughput values
self.throughput_values.append(throughput)

# Print the old format performance results for each inference (every loop)
print(f"{formatted_time} ulp-mlperf: Performance results for window {len(self.throughput_values)}:")
print(f"{formatted_time} ulp-mlperf: # Inferences : {num_inferences:>13}")
print(f"{formatted_time} ulp-mlperf: Runtime : {elapsed_time_sec:>10.3f} sec.")
print(f"{formatted_time} ulp-mlperf: Throughput : {throughput:>10.3f} inf./sec.")

# Check if we've completed all loop iterations
if len(self.throughput_values) == self._loop_count:
# Calculate the median throughput after all loop iterations
total_median_throughput = np.median(self.throughput_values)

# Store the result for later use
self.median_throughput = total_median_throughput

# Print the new formatted output with median throughput
print(f"{formatted_time} ulp-mlperf: ---------------------------------------------------------")
print(f"{formatted_time} ulp-mlperf: Median throughput is {self.median_throughput:>10.3f} inf./sec.")
print(f"{formatted_time} ulp-mlperf: ---------------------------------------------------------")


class _ScriptStreamStep(_ScriptStep):
"""Step to stream audio from an enhanced interface board"""
Expand Down Expand Up @@ -162,9 +269,13 @@ def _create_step(self, step, contents):
if cmd == 'download':
return _ScriptDownloadStep(*args)
if cmd == 'loop':
return _ScriptLoopStep(self._parse_steps(contents), *args)
# Pass the loop_count to the loop step and its commands
loop_count = int(args[0]) if args else None
return _ScriptLoopStep(self._parse_steps(contents), loop_count)
if cmd == 'infer':
return _ScriptInferStep(*args)
# Pass the loop_count to the infer step
loop_count = args[-1] if args else None # Assuming loop_count is passed as last argument
return _ScriptInferStep(*args, loop_count=loop_count)

def run(self, io, dut, dataset, mode): # Pass mode to all steps
with io:
Expand Down
8 changes: 4 additions & 4 deletions benchmark/runner/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,28 @@ ad01:
script:
- loop 10:
- download
- infer 1 0
- infer 6 0
ic01:
name: image_classification
model: ic01
truth_file: y_labels.csv
script:
- loop 24:
- download
- infer 1 0
- infer 9 0
kws01:
name: keyword_spotting
model: kws01
truth_file: y_labels.csv
script:
- loop 20:
- download
- infer 1 0
- infer 8 0
vww01:
name: person_detection
model: vww01
truth_file: y_labels.csv
script:
- loop 2:
- download
- infer 1 0
- infer 5 1

0 comments on commit e90d24c

Please sign in to comment.