Working Performance test, Energy still in trial

mlcommons · Dec 17, 2024 · e90d24c · e90d24c
1 parent cf6c1ee
commit e90d24c
Show file tree

Hide file tree

Showing 3 changed files with 147 additions and 46 deletions.
diff --git a/benchmark/runner/main.py b/benchmark/runner/main.py
@@ -48,6 +48,8 @@ def run_test(devices_config, dut_config, test_script, dataset_path, mode):
     manager = DeviceManager(devices_config)
     manager.scan()
     power = manager.get("power", {}).get("instance")
+    print(f"Power instance: {power}")
+
     if power and dut_config and dut_config.get("voltage"):
         power.configure_voltage(dut_config["voltage"])
     identify_dut(manager)  # hangs in identify_dut()=>init_dut()=>time.sleep()
@@ -116,7 +118,14 @@ def normalize_probabilities(probabilities):
 
     return probabilities
 
-def summarize_result(result):
+def summarize_result(result, mode="a"):
+    """
+    Summarizes results based on mode:
+    - 'a' : Accuracy and AUC calculations
+    - 'p' : Performance metrics like runtime and throughput
+    - 'e' : Reserved for energy calculations (to be implemented)
+    """
+    # Define the current time for performance mode print statements
     num_correct = 0  # Initialize the counter for correct predictions
 
     # Store true labels and predicted probabilities for AUC calculation
@@ -155,26 +164,7 @@ def summarize_result(result):
             print(f"AUC: {auc_score:.4f}")
         except ValueError as e:
             print(f"AUC calculation failed: {e}")
-
-    else:
-        # For multi-class classification
-        # Dynamically handle the number of classes based on the unique values in true_labels
-        unique_classes = np.unique(true_labels)
-        num_classes = len(unique_classes)
-
-        # Adjust the probabilities to match the number of classes in true_labels
-        predicted_probabilities = np.array([prob[:num_classes] for prob in predicted_probabilities])
 
-        # Calculate accuracy
-        accuracy = num_correct / len(result)
-        print(f"Accuracy = {num_correct}/{len(result)} = {100*accuracy:4.2f}%")   
-
-        # Compute AUC for multi-class classification using one-vs-rest (macro-average AUC)
-        try:
-            auc_score = roc_auc_score(true_labels, predicted_probabilities, multi_class="ovr", average="macro")
-            print(f"Macro-average AUC: {auc_score:.4f}")
-        except ValueError as e:
-            print(f"AUC calculation failed: {e}")
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(prog="TestRunner", description=__doc__)

diff --git a/benchmark/runner/script.py b/benchmark/runner/script.py
@@ -1,5 +1,8 @@
 import re
-
+import numpy as np
+from datetime import datetime
+from device_under_test import DUT  # Import DUT class
+from power_manager import PowerManager
 
 class _ScriptStep:
     """Base class for script steps"""
@@ -13,12 +16,25 @@ def __init__(self, index=None):
         self._index = None if index is None else int(index)
 
     def run(self, io, dut, dataset, mode):
+        # Fetch the file and data
         file_truth, data = dataset.get_file_by_index(self._index)
+
+        # Define the current time for formatted output
+        current_time = datetime.now()
+        formatted_time = current_time.strftime("%m%d.%H%M%S")
+
+        # Conditional print statements based on 'mode'
         if data:
-            print(f"Loading file {file_truth.get('file'):30}, true class = {int(file_truth.get('class')):2}")
+            if mode == "a":
+                print(f"Loading file {file_truth.get('file'):30}, true class = {int(file_truth.get('class')):2}")
+            elif mode == "p":
+                print(f"{formatted_time} ulp-mlperf: Runtime requirements have been met.")
+            elif mode == "e":
+                pass  # Do nothing for energy mode
             dut.load(data)
         else:
-            print(f"WARNING: No data returned from dataset read.  Script index = {self._index}, Dataset index = {dataset._current_index}")
+            print(f"WARNING: No data returned from dataset read. Script index = {self._index}, Dataset index = {dataset._current_index}")
+
         return file_truth
 
 
@@ -49,12 +65,14 @@ def run(self, io, dut, dataset, mode):
 
 class _ScriptInferStep(_ScriptStep):
     """Step to execute infer on the DUT"""
-    def __init__(self, iterations=1, warmups=0):
+    def __init__(self, iterations=1, warmups=0, loop_count=None):
         self._iterations = int(iterations)
         self._warmups = int(warmups)
         self._infer_results = None
         self._power_samples = []
         self._power_timestamps = []
+        self.throughput_values = []
+        self._loop_count = loop_count  # Store loop_count passed to this step
 
     def run(self, io, dut, dataset, mode):  # mode passed to run
         result = dut.infer(self._iterations, self._warmups)
@@ -66,32 +84,31 @@ def run(self, io, dut, dataset, mode):  # mode passed to run
             timestamps, samples = _ScriptInferStep._gather_power_results(dut.power_manager)
             print(f"samples:{len(samples)} timestamps:{len(timestamps)}")
             result.update(power=dict(samples=samples,
-                                    timestamps=timestamps
-                                    )
-                        )
-
-        # Print accuracy results (old method inside print_accuracy_results)
+                                      timestamps=timestamps))
+
         if mode == "a":
             self._print_accuracy_results(infer_results)
         elif mode == "e":
-            self._print_energy_results(infer_results)  # Assuming you have this function
+            self._print_energy_results(infer_results)
         elif mode == "p":
-            self._print_performance_results(infer_results)  # Assuming you have this function
+            self._print_performance_results(infer_results)
 
         return result
 
     @staticmethod
     def _gather_infer_results(cmd_results):
         result = {}
+        total_inferences = 0
         for res in cmd_results:
             match = re.match(r'^m-results-\[([^]]+)\]$', res)
             if match:
                 try:
-                    # Split by comma and filter out empty strings
-                    result["results"] = [float(x) for x in match.group(1).split(',') if x.strip()]
+                    results = [float(x) for x in match.group(1).split(',') if x.strip()]
+                    result["results"] = results
+                    total_inferences += len(results)
                 except ValueError as e:
                     print(f"ERROR: Failed to parse infer results: {e}. Data: {match.group(1)}")
-                    result["results"] = []  # Handle the error by returning an empty list
+                    result["results"] = []
                 continue
             match = re.match(r'^m-lap-us-([0-9]+)$', res)
             if match:
@@ -101,8 +118,8 @@ def _gather_infer_results(cmd_results):
             result["elapsed_time"] = result["end_time"] - result["start_time"]
         else:
             print("ERROR: Incomplete time data, missing start_time or end_time.")
+        result["total_inferences"] = total_inferences
         return result
-
 
     @staticmethod
     def _gather_power_results(power):
@@ -119,16 +136,106 @@ def _gather_power_results(power):
         return timeStamps, samples
 
     def _print_accuracy_results(self, infer_results):
-        # This function will print the accuracy results using the old method
         print(f"    Results = {infer_results['results']}, time={infer_results['elapsed_time']} us")
 
     def _print_energy_results(self, infer_results):
-        # Assuming energy-related data is available in infer_results, adapt as necessary
-        print("TEST ENERGY SUCCESS")
+        """
+        Accumulates energy values for all loop iterations and calculates median energy per inference 
+        at the end of all iterations.
+        Entire thing is under test I do not have the energy board
+        """
+        # Use the total_inferences from _gather_infer_results results
+        num_inferences = self._iterations
+
+        # Get the current time in the desired format
+        current_time = datetime.now()
+        formatted_time = current_time.strftime("%m%d.%H%M%S")
+
+        # Initialize the PowerManager with the correct port and baud rate
+        port_device = "/dev/ttyUSB0"  # replace with your serial port device
+        power_manager = PowerManager(port_device)
+
+        # Use the power manager to gather energy and power results
+        with power_manager:
+            timestamps, power_samples = _ScriptInferStep._gather_power_results(power_manager)
+
+            # Calculate total energy (sum of power * time intervals)
+            total_energy = sum([power_samples[i] * (timestamps[i+1][0] - timestamps[i][0]) 
+                                for i in range(len(power_samples)-1)])
+
+            # Calculate average power (mean of the recorded power samples)
+            average_power = np.mean(power_samples) if power_samples else 0
+
+            # Calculate energy per inference (total energy divided by number of inferences)
+            energy_per_inference = total_energy / num_inferences if num_inferences > 0 else 0
+
+            # Print energy results for each window
+            print(f"{formatted_time} ulp-ml: Energy data for window {len(self.throughput_values)} at time {timestamps[-1][0]:.2f} for {timestamps[-1][0] - timestamps[0][0]:.2f} sec.:")
+            print(f"{formatted_time} ulp-ml:   Energy       : {total_energy:>13.3f} uJ")
+            print(f"{formatted_time} ulp-ml:   Power        : {average_power:>13.3f} uW")
+            print(f"{formatted_time} ulp-ml:   Energy/Inf.  : {energy_per_inference:>13.3f} uJ/inf.")
+
+            # Store energy values for calculating median
+            self.energy_values.append(energy_per_inference)
+
+        # Check if we've completed all loop iterations
+        if len(self.energy_values) == self._loop_count:
+            # Calculate the median energy per inference after all loop iterations
+            total_median_energy = np.median(self.energy_values)
+
+            # Store the result for later use
+            self.median_energy = total_median_energy
+
+            # Print the new formatted output with median energy per inference
+            print(f"{formatted_time} ulp-ml: ---------------------------------------------------------")
+            print(f"{formatted_time} ulp-ml: Median energy cost is {self.median_energy:>10.3f} uJ/inf.")
+            print(f"{formatted_time} ulp-ml: ---------------------------------------------------------")
 
     def _print_performance_results(self, infer_results):
-        # Assuming performance-related data is available in infer_results, adapt as necessary
-        print("TEST PERFORMANCE SUCCESS")
+        """
+        Accumulates throughput values for all loop iterations and calculates median throughput 
+        at the end of all iterations.
+        """
+        # Use the total_inferences from _gather_infer_results results
+        num_inferences = self._iterations
+
+        # Retrieve elapsed time (in microseconds)
+        elapsed_time_us = infer_results.get("elapsed_time", 0)
+
+        # Get the current time in the desired format
+        current_time = datetime.now()
+        formatted_time = current_time.strftime("%m%d.%H%M%S")
+
+        # Calculate throughput
+        if elapsed_time_us > 0:
+            elapsed_time_sec = elapsed_time_us / 1_000_000  # Convert to seconds
+            throughput = num_inferences / elapsed_time_sec  # Calculate throughput
+        else:
+            elapsed_time_sec = 0
+            throughput = 0
+
+        # Add throughput to the list of throughput values
+        self.throughput_values.append(throughput)
+
+        # Print the old format performance results for each inference (every loop)
+        print(f"{formatted_time} ulp-mlperf: Performance results for window {len(self.throughput_values)}:")
+        print(f"{formatted_time} ulp-mlperf:   # Inferences : {num_inferences:>13}")
+        print(f"{formatted_time} ulp-mlperf:   Runtime      : {elapsed_time_sec:>10.3f} sec.")
+        print(f"{formatted_time} ulp-mlperf:   Throughput   : {throughput:>10.3f} inf./sec.")
+
+        # Check if we've completed all loop iterations
+        if len(self.throughput_values) == self._loop_count:
+            # Calculate the median throughput after all loop iterations
+            total_median_throughput = np.median(self.throughput_values)
+
+            # Store the result for later use
+            self.median_throughput = total_median_throughput
+
+            # Print the new formatted output with median throughput
+            print(f"{formatted_time} ulp-mlperf: ---------------------------------------------------------")
+            print(f"{formatted_time} ulp-mlperf: Median throughput is {self.median_throughput:>10.3f} inf./sec.")
+            print(f"{formatted_time} ulp-mlperf: ---------------------------------------------------------")
+
 
 class _ScriptStreamStep(_ScriptStep):
     """Step to stream audio from an enhanced interface board"""
@@ -162,9 +269,13 @@ def _create_step(self, step, contents):
         if cmd == 'download':
             return _ScriptDownloadStep(*args)
         if cmd == 'loop':
-            return _ScriptLoopStep(self._parse_steps(contents), *args)
+            # Pass the loop_count to the loop step and its commands
+            loop_count = int(args[0]) if args else None
+            return _ScriptLoopStep(self._parse_steps(contents), loop_count)
         if cmd == 'infer':
-            return _ScriptInferStep(*args)
+            # Pass the loop_count to the infer step
+            loop_count = args[-1] if args else None  # Assuming loop_count is passed as last argument
+            return _ScriptInferStep(*args, loop_count=loop_count)
 
     def run(self, io, dut, dataset, mode):  # Pass mode to all steps
         with io:

diff --git a/benchmark/runner/tests.yaml b/benchmark/runner/tests.yaml
@@ -4,28 +4,28 @@ ad01:
   script:
     - loop 10: 
       - download
-      - infer 1 0
+      - infer 6 0
 ic01:
   name: image_classification
   model: ic01
   truth_file: y_labels.csv
   script:
     - loop 24: 
       - download
-      - infer 1 0
+      - infer 9 0
 kws01:
   name: keyword_spotting
   model: kws01
   truth_file: y_labels.csv
   script:
     - loop 20: 
       - download
-      - infer 1 0
+      - infer 8 0
 vww01:
   name: person_detection
   model: vww01
   truth_file: y_labels.csv
   script:
     - loop 2: 
       - download
-      - infer 1 0
+      - infer 5 1