triton-inference-server · nv-braf · Nov 27, 2024 · Nov 22, 2024 · Nov 22, 2024 · Nov 26, 2024
diff --git a/genai-perf/genai_perf/config/generate/genai_perf_config.py b/genai-perf/genai_perf/config/generate/genai_perf_config.py
@@ -88,6 +88,18 @@ def get_obj_args(self) -> Namespace:
 
         return obj_args
 
+    ###########################################################################
+    # Representation Methods
+    ###########################################################################
+    def representation(self) -> str:
+        """
+        A string representation of the GAP options which will be
+        used when determining if a previous (checkpointed) run can be used
+        """
+        representation = " ".join([self.input.__str__(), self.output_tokens.__str__()])
+
+        return representation
+
     ###########################################################################
     # Checkpoint Methods
     ###########################################################################

diff --git a/genai-perf/genai_perf/config/run/results.py b/genai-perf/genai_perf/config/run/results.py
@@ -10,6 +10,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from copy import deepcopy
 from dataclasses import dataclass, field
 from typing import List
 
@@ -18,11 +19,18 @@
 from genai_perf.types import (
     CheckpointObject,
     GpuMetricObjectives,
+    ModelName,
     ModelWeights,
     PerfMetricObjectives,
+    RunConfigName,
 )
 
 
+@dataclass(frozen=True)
+class ResultsDefaults:
+    STARTING_ID = -1
+
+
 @dataclass
 class Results:
     """
@@ -80,11 +88,29 @@ def get_results_failing_constraints(self) -> "Results":
 
         return failing_results
 
+    def get_run_config_name_based_on_representation(
+        self, model_name: ModelName, representation: str
+    ) -> RunConfigName:
+        """
+        Returns the name of the RunConfig if the representation is found,
+        else creates a new name by incrementing the config ID
+        """
+        max_run_config_id = ResultsDefaults.STARTING_ID
+        for run_config in self.run_configs:
+            if representation == run_config.representation():
+                return run_config.name
+            else:
+                max_run_config_id = max(
+                    max_run_config_id, int(run_config.get_name_id())
+                )
+
+        return f"{model_name}_run_config_{max_run_config_id+1}"
+
     ###########################################################################
     # Set Accessor Methods
     ###########################################################################
     def add_run_config(self, run_config: RunConfig) -> None:
-        self.run_configs.append(run_config)
+        self.run_configs.append(deepcopy(run_config))
         self.run_configs.sort(reverse=True)
 
     def set_gpu_metric_objectives(
@@ -110,3 +136,13 @@ def set_model_weighting(self, model_weights: ModelWeights) -> None:
     def set_constraints(self, constraints: RunConstraints) -> None:
         for run_config in self.run_configs:
             run_config.set_constraints(constraints)
+
+    ###########################################################################
+    # Misc Methods
+    ###########################################################################
+    def found_representation(self, representation: str) -> bool:
+        for run_config in self.run_configs:
+            if representation == run_config.representation():
+                return True
+
+        return False
diff --git a/genai-perf/genai_perf/config/run/run_config.py b/genai-perf/genai_perf/config/run/run_config.py
@@ -46,10 +46,10 @@ class RunConfig:
     # triton_env: Dict[str, Any]
     # model_run_configs: List[ModelRunConfig]
 
-    name: RunConfigName
     genai_perf_config: GenAIPerfConfig
     perf_analyzer_config: PerfAnalyzerConfig
-    measurement: RunConfigMeasurement
+    name: RunConfigName = ""
+    measurement: RunConfigMeasurement = RunConfigMeasurement()
 
     ###########################################################################
     # Checkpoint Methods
@@ -88,7 +88,10 @@ def create_class_from_checkpoint(
         )
 
         run_config = RunConfig(
-            name, genai_perf_config, perf_analyzer_config, measurement
+            name=name,
+            genai_perf_config=genai_perf_config,
+            perf_analyzer_config=perf_analyzer_config,
+            measurement=measurement,
         )
 
         return run_config
@@ -138,6 +141,16 @@ def get_weighted_perf_metric_values(
             perf_metric_name, return_value
         )
 
+    def get_name_id(self) -> str:
+        """
+        Return the unique ID assigned to a RunConfig's name
+        by convention this is the final part of the string after
+        the underscore
+        """
+        name_fields = self.name.split("_")
+
+        return name_fields[-1]
+
     ###########################################################################
     # Set Accessor Methods
     ###########################################################################
@@ -164,6 +177,23 @@ def add_perf_metrics(
     ) -> None:
         self.measurement.add_perf_metrics(model_name, perf_metrics)
 
+    ###########################################################################
+    # Representation Methods
+    ###########################################################################
+    def representation(self) -> str:
+        """
+        A string representation of the RunConfig options which will be
+        used when determining if a previous (checkpointed) run can be used
+        """
+        representation = " ".join(
+            [
+                self.perf_analyzer_config.representation(),
+                self.genai_perf_config.representation(),
+            ]
+        )
+
+        return representation
+
     ###########################################################################
     # Constraint Methods
     ###########################################################################

diff --git a/genai-perf/genai_perf/measurements/run_config_measurement.py b/genai-perf/genai_perf/measurements/run_config_measurement.py
@@ -70,7 +70,7 @@ class RunConfigMeasurement:
 
     def __init__(
         self,
-        gpu_metrics: GpuRecords,
+        gpu_metrics: Optional[GpuRecords] = None,
         run_constraints: Optional[RunConstraints] = None,
     ):
         """
@@ -82,7 +82,7 @@ def __init__(
             A set of constraints (set by the user) used to determine if
             this is a valid measurement
         """
-        self._gpu_metrics = gpu_metrics
+        self._gpu_metrics = gpu_metrics if gpu_metrics else {}
         self._gpu_metric_objectives: Optional[GpuMetricObjectives] = (
             RunConfigMeasurementDefaults.METRIC_OBJECTIVE
         )

diff --git a/genai-perf/genai_perf/subcommand/analyze.py b/genai-perf/genai_perf/subcommand/analyze.py
@@ -90,63 +90,79 @@ def analyze_handler(args: Namespace) -> None:
         obj_args = perf_analyzer_config.get_obj_args()
 
         #
-        # Create Input/Artifacts
-        input_config_options = create_config_options(obj_args)
-        create_artifacts_dirs(obj_args)
-        tokenizer = get_tokenizer(
-            obj_args.tokenizer,
-            obj_args.tokenizer_trust_remote_code,
-            obj_args.tokenizer_revision,
-        )
-        generate_inputs(input_config_options)
-
-        #
-        # Run PA
-        run_perf_analyzer(
-            args=obj_args,
-            perf_analyzer_config=perf_analyzer_config,
-            telemetry_data_collector=telemetry_data_collector,
-        )
-
-        #
-        # Extract Perf Metrics
-        infer_mode, load_level = _determine_infer_mode_and_load_level(
-            obj_args, objectives, model_name
-        )
-        data_parser = calculate_metrics(obj_args, tokenizer)
-        perf_stats = data_parser.get_statistics(infer_mode, load_level)
-        perf_metrics = perf_stats.create_records()
-
-        #
-        # Extract Telemetry Metrics
-        # FIXME: Once I'm able to collect telemetry records will need
-        # to write a method to hook this up
-        # telemetry_stats = (
-        #     telemetry_data_collector.get_statistics()
-        #     if telemetry_data_collector
-        #     else None
-        # )
-        gpu_metrics: GpuRecords = {}
-
-        #
-        # Create RunConfigMeasurement
-        run_config_measurement = RunConfigMeasurement(gpu_metrics)
-        run_config_measurement.add_perf_metrics(model_name, perf_metrics)
-
-        #
-        # Create RunConfig
-        run_config_name = model_name + "_run_config_" + str(count)
-        run_config = RunConfig(
-            name=run_config_name,
+        # Check if this configuration has already been profiled (is in the checkpoint)
+        representation = RunConfig(
             genai_perf_config=genai_perf_config,
             perf_analyzer_config=perf_analyzer_config,
-            measurement=run_config_measurement,
+        ).representation()
+
+        run_config_found = results.found_representation(representation)
+        run_config_name = results.get_run_config_name_based_on_representation(
+            model_name, representation
         )
 
-        #
-        # Add to results and write checkpoint
-        results.add_run_config(run_config)
-        checkpoint.create_checkpoint_object()
+        if not run_config_found:
+            #
+            # Create Input/Artifacts
+            input_config_options = create_config_options(obj_args)
+            create_artifacts_dirs(obj_args)
+            tokenizer = get_tokenizer(
+                obj_args.tokenizer,
+                obj_args.tokenizer_trust_remote_code,
+                obj_args.tokenizer_revision,
+            )
+            generate_inputs(input_config_options)
+
+            #
+            # Run PA
+            run_perf_analyzer(
+                args=obj_args,
+                perf_analyzer_config=perf_analyzer_config,
+                telemetry_data_collector=telemetry_data_collector,
+            )
+
+            #
+            # Extract Perf Metrics
+            infer_mode, load_level = _determine_infer_mode_and_load_level(
+                obj_args, objectives, model_name
+            )
+            data_parser = calculate_metrics(obj_args, tokenizer)
+            perf_stats = data_parser.get_statistics(infer_mode, load_level)
+            perf_metrics = perf_stats.create_records()
+
+            #
+            # Extract Telemetry Metrics
+            # FIXME: Once I'm able to collect telemetry records will need
+            # to write a method to hook this up
+            # telemetry_stats = (
+            #     telemetry_data_collector.get_statistics()
+            #     if telemetry_data_collector
+            #     else None
+            # )
+            gpu_metrics: GpuRecords = {}
+
+            #
+            # Create RunConfigMeasurement
+            run_config_measurement = RunConfigMeasurement(gpu_metrics)
+            run_config_measurement.add_perf_metrics(model_name, perf_metrics)
+
+            #
+            # Create RunConfig
+            run_config = RunConfig(
+                name=run_config_name,
+                genai_perf_config=genai_perf_config,
+                perf_analyzer_config=perf_analyzer_config,
+                measurement=run_config_measurement,
+            )
+
+            #
+            # Add to results and write checkpoint
+            results.add_run_config(run_config)
+            checkpoint.create_checkpoint_object()
+        else:
+            logger.info(
+                f"{run_config_name} found in checkpoint - skipping profiling..."
+            )
 
 
 def _setup_config(args: Namespace) -> ConfigCommand:

diff --git a/genai-perf/tests/test_genai_perf_config.py b/genai-perf/tests/test_genai_perf_config.py
@@ -74,6 +74,20 @@ def test_default_config_and_objective_capture(self):
             expected_output_tokens_config, self._default_genai_perf_config.output_tokens
         )
 
+    ###########################################################################
+    # Test Representation
+    ###########################################################################
+    def test_representation(self):
+        """
+        Test that the representation is created correctly
+        """
+        expected_representation = " ".join(
+            [ConfigInput(num_prompts=50).__str__(), ConfigOutputTokens().__str__()]
+        )
+        representation = self._default_genai_perf_config.representation()
+
+        self.assertEqual(expected_representation, representation)
+
     ###########################################################################
     # Checkpoint Tests
     ###########################################################################