[Benchmarking-Py] 2.0.3 Release - Adding Profiling for convert & build

tensorflow · Oct 21, 2022 · bfed396 · bfed396
1 parent b7e663d
commit bfed396
Show file tree

Hide file tree

Showing 8 changed files with 287 additions and 153 deletions.
diff --git a/tftrt/benchmarking-python/CHANGELOG.md b/tftrt/benchmarking-python/CHANGELOG.md
@@ -46,6 +46,12 @@ Description of the change
 
 <!-- YOU CAN EDIT FROM HERE -->
 
+## [2.0.3] - 2022.10.20 - @DEKHTIARJonathan
+
+- Profiling for TF-TRT build and convert added
+- Argument `tf_profile_export_path` renamed: `tftrt_build_profile_export_path`
+- NVIDIA PyCOCOTools Updated to version 0.7.1
+
 ## [2.0.2] - 2022.09.30 - @DEKHTIARJonathan
 
 - Bug Fixed in real data GPU Prefetcher that could cause a crash when the dataset

diff --git a/tftrt/benchmarking-python/benchmark_args.py b/tftrt/benchmarking-python/benchmark_args.py
@@ -308,7 +308,23 @@ def __init__(self):
         # =========================== TF Profiling =========================== #
 
         self._parser.add_argument(
-            "--tf_profile_export_path",
+            "--tftrt_build_profile_export_path",
+            type=str,
+            default=None,
+            help="If set, the script will export tf.profile files for further "
+            "performance analysis."
+        )
+
+        self._parser.add_argument(
+            "--tftrt_convert_profile_export_path",
+            type=str,
+            default=None,
+            help="If set, the script will export tf.profile files for further "
+            "performance analysis."
+        )
+
+        self._parser.add_argument(
+            "--inference_loop_profile_export_path",
             type=str,
             default=None,
             help="If set, the script will export tf.profile files for further "
@@ -451,6 +467,18 @@ def _post_process_args(self, args):
         if args.debug or args.debug_data_aggregation or args.debug_performance:
             logging.set_verbosity(logging.DEBUG)
 
+        if (args.inference_loop_profile_export_path or
+                args.tftrt_build_profile_export_path or
+                args.tftrt_convert_profile_export_path):
+            """Warm-up the profiler session.
+            The profiler session will set up profiling context, including loading CUPTI
+            library for GPU profiling. This is used for improving the accuracy of
+            the profiling results.
+            """
+            from tensorflow.python.profiler.profiler_v2 import warmup
+            logging.info("[PROFILER] Warming Up ...")
+            warmup()
+
         return args
 
     def parse_args(self):

diff --git a/tftrt/benchmarking-python/benchmark_info.py b/tftrt/benchmarking-python/benchmark_info.py
@@ -10,7 +10,7 @@
 # The `__version__` number shall be updated everytime core benchmarking files
 # are updated.
 # Please update CHANGELOG.md with a description of what this version changed.
-__version__ = "2.0.2"
+__version__ = "2.0.3"
 
 
 def get_commit_id():

diff --git a/tftrt/benchmarking-python/benchmark_profiling.py b/tftrt/benchmarking-python/benchmark_profiling.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# -*- coding: utf-8 -*-
+
+import functools
+import contextlib
+
+import tensorflow as tf
+
+from benchmark_logger import logging
+from benchmark_utils import timed_section
+
+
+class ProfilingCTX(object):
+
+    def __init__(self, export_dir=None, verbose=False, delay_ms=0):
+        self._started = False
+        self._export_dir = export_dir
+        self._verbose = verbose
+        self._delay_ms = delay_ms
+
+    def start(self):
+        if not self._started and self._export_dir is not None:
+            try:
+                profiler_opts = tf.profiler.experimental.ProfilerOptions(
+                    # Ajust TraceMe levels:
+                    # - 1: critical
+                    # - 2: info [default]
+                    # - 3: verbose
+                    host_tracer_level=3 if self._verbose else 2,
+                    # Enables python function call tracing
+                    # - 0: disabled [default]
+                    # - 1: enabled
+                    python_tracer_level=1 if self._verbose else 0,
+                    # Adjust device (TPU/GPU) tracer level:
+                    # - 0: disabled
+                    # - 1: enabled [default]
+                    device_tracer_level=1,
+                    delay_ms=self._delay_ms
+                )
+                tf.profiler.experimental.start(
+                    logdir=self._export_dir, options=profiler_opts
+                )
+                logging.info(
+                    "[PROFILER] Starting Profiling - Data will be stored in: "
+                    f"`{self._export_dir}`"
+                )
+                self._started = True
+
+            except tf.errors.AlreadyExistsError:
+                logging.warning(
+                    "[PROFILER] Could not start the profiler. It "
+                    "appears to have been previously been started."
+                )
+
+    def stop(self):
+        if self._started:
+            try:
+                tf.profiler.experimental.stop()
+                logging.info(
+                    "[PROFILER] Stopping Profiling - Data has been stored in: "
+                    f"`{self._export_dir}`"
+                )
+            # profiler has already been stopped or not started
+            except tf.errors.UnavailableError:
+                logging.warning(
+                    "[PROFILER] Could not stop the profiler. It "
+                    "appears to have been previously been stopped."
+                )
+                pass
+            self._started = False
+
+    def __enter__(self):
+        self.start()
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.stop()
+
+
+@contextlib.contextmanager
+def time_and_trace_ctx(message, step_num=None, _r=None):
+    with timed_section(message):
+        with tf.profiler.experimental.Trace(message, step_num=step_num, _r=_r):
+            yield