Add support for the current CLI to PA Config Generator (#182)

* Added support for the CLI to PA config generator * Fixing codeQL issues * Removing redundant extra_args check
triton-inference-server · Nov 18, 2024 · 3f92904 · 3f92904
1 parent a850d67
commit 3f92904
Showing 3 changed files with 293 additions and 47 deletions.
diff --git a/genai-perf/genai_perf/config/generate/perf_analyzer_config.py b/genai-perf/genai_perf/config/generate/perf_analyzer_config.py
@@ -12,14 +12,69 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from argparse import Namespace
 from copy import deepcopy
 from dataclasses import dataclass
-from typing import Any, Dict, List
+from pathlib import Path
+from typing import Any, Dict, List, Optional
 
 from genai_perf.config.generate.search_parameter import SearchUsage
 from genai_perf.config.input.config_command import ConfigCommand, ConfigPerfAnalyzer
+from genai_perf.constants import DEFAULT_ARTIFACT_DIR
 from genai_perf.exceptions import GenAIPerfException
+from genai_perf.inputs.input_constants import DEFAULT_INPUT_DATA_JSON
+from genai_perf.logging import logging
 from genai_perf.types import CheckpointObject, ModelName, ModelObjectiveParameters
+from genai_perf.utils import convert_option_name
+from genai_perf.wrapper import Profiler
+
+# This is the list of GAP CLI args that are not used when creating
+# the PA command line
+perf_analyzer_ignore_args = [
+    "artifact_dir",
+    "backend",
+    "batch_size_image",
+    "batch_size_text",
+    "concurrency",
+    "endpoint_type",
+    "extra_inputs",
+    "formatted_model_name",
+    "func",
+    "generate_plots",
+    "goodput",
+    "image_format",
+    "image_height_mean",
+    "image_height_stddev",
+    "image_width_mean",
+    "image_width_stddev",
+    "input_dataset",
+    "input_file",
+    "input_format",
+    "model",
+    "model_selection_strategy",
+    "num_prompts",
+    "output_format",
+    "output_tokens_mean",
+    "output_tokens_mean_deterministic",
+    "output_tokens_stddev",
+    "profile_export_file",
+    "prompt_source",
+    "random_seed",
+    "request_rate",
+    "server_metrics_url",
+    # The 'streaming' passed in to this script is to determine if the
+    # LLM response should be streaming. That is different than the
+    # 'streaming' that PA takes, which means something else (and is
+    # required for decoupled models into triton).
+    "streaming",
+    "subcommand",
+    "synthetic_input_files",
+    "synthetic_input_tokens_mean",
+    "synthetic_input_tokens_stddev",
+    "tokenizer",
+    "tokenizer_trust_remote_code",
+    "tokenizer_revision",
+]
 
 
 @dataclass
@@ -34,14 +89,34 @@ def __init__(
         config: ConfigCommand,
         model_objective_parameters: ModelObjectiveParameters,
         model_name: ModelName,
+        args: Namespace = Namespace(),
+        extra_args: Optional[List[str]] = None,
     ):
         self._model_name = model_name
+        self._args = deepcopy(args)
+        self._set_options_based_on_cli(args, extra_args)
         self._set_options_based_on_config(config)
         self._set_options_based_on_objective(model_objective_parameters)
+        self._set_artifact_paths()
 
     ###########################################################################
     # Set Options Methods
     ###########################################################################
+    def _set_options_based_on_cli(
+        self, args: Namespace, extra_args: Optional[List[str]] = None
+    ) -> None:
+        self._cli_args = []
+
+        # When restoring from a checkpoint there won't be any args
+        if not hasattr(self._args, "subcommand"):
+            return
+
+        self._cli_args += self._add_required_args(args)
+        self._cli_args += Profiler.add_protocol_args(args)
+        self._cli_args += Profiler.add_inference_load_args(args)
+        self._cli_args += self._add_misc_args(args)
+        self._cli_args += self._add_extra_args(extra_args)
+
     def _set_options_based_on_config(self, config: ConfigCommand) -> None:
         self._config: ConfigPerfAnalyzer = config.perf_analyzer
 
@@ -54,6 +129,139 @@ def _set_options_based_on_objective(
                 if parameter.usage == SearchUsage.RUNTIME_PA:
                     self._parameters[name] = parameter.get_value_based_on_category()
 
+    def _set_artifact_paths(self) -> None:
+        # When restoring from a checkpoint there won't be any args
+        if not hasattr(self._args, "subcommand"):
+            return
+
+        if self._args.artifact_dir == Path(DEFAULT_ARTIFACT_DIR):
+            artifact_name = self._get_artifact_model_name()
+            artifact_name += self._get_artifact_service_kind()
+            artifact_name += self._get_artifact_stimulus_type()
+
+            self._args.artifact_dir = self._args.artifact_dir / Path(
+                "-".join(artifact_name)
+            )
+
+        if self._args.profile_export_file.parent != Path(""):
+            raise ValueError(
+                "Please use --artifact-dir option to define intermediary paths to "
+                "the profile export file."
+            )
+
+        self._args.profile_export_file = (
+            self._args.artifact_dir / self._args.profile_export_file
+        )
+
+        self._cli_args += [
+            f"--input-data",
+            f"{self._args.artifact_dir / DEFAULT_INPUT_DATA_JSON}",
+            f"--profile-export-file",
+            f"{self._args.profile_export_file}",
+        ]
+
+    def _get_artifact_model_name(self) -> List[str]:
+        # Preprocess Huggingface model names that include '/' in their model name.
+        if (self._args.formatted_model_name is not None) and (
+            "/" in self._args.formatted_model_name
+        ):
+            filtered_name = "_".join(self._args.formatted_model_name.split("/"))
+            logger = logging.getLogger(__name__)
+            logger.info(
+                f"Model name '{self._args.formatted_model_name}' cannot be used to create artifact "
+                f"directory. Instead, '{filtered_name}' will be used."
+            )
+            model_name = [f"{filtered_name}"]
+        else:
+            model_name = [f"{self._args.formatted_model_name}"]
+
+        return model_name
+
+    def _get_artifact_service_kind(self) -> List[str]:
+        if self._args.service_kind == "openai":
+            service_kind = [f"{self._args.service_kind}-{self._args.endpoint_type}"]
+        elif self._args.service_kind == "triton":
+            service_kind = [
+                f"{self._args.service_kind}-{self._args.backend.to_lowercase()}"
+            ]
+        elif self._args.service_kind == "tensorrtllm_engine":
+            service_kind = [f"{self._args.service_kind}"]
+        else:
+            raise ValueError(f"Unknown service kind '{self._args.service_kind}'.")
+
+        return service_kind
+
+    def _get_artifact_stimulus_type(self) -> List[str]:
+        if self._args.concurrency:
+            stimulus = [f"concurrency{self._args.concurrency}"]
+        elif self._args.request_rate:
+            stimulus = [f"request_rate{self._args.request_rate}"]
+        elif "concurrency" in self._parameters:
+            concurrency = str(self._parameters["concurrency"])
+            stimulus = [f"concurrency{concurrency}"]
+        elif "request_rate" in self._parameters:
+            request_rate = str(self._parameters["request_rate"])
+            stimulus = [f"request_rate{request_rate}"]
+
+        return stimulus
+
+    def _add_required_args(self, args: Namespace) -> List[str]:
+        required_args = [
+            f"-m",
+            f"{args.formatted_model_name}",
+            f"--async",
+        ]
+
+        return required_args
+
+    def _add_misc_args(self, args: Namespace) -> List[str]:
+        misc_args = []
+
+        for arg, value in vars(args).items():
+            if arg in perf_analyzer_ignore_args:
+                pass
+            elif self._arg_is_tensorrtllm_engine(arg, value):
+                misc_args += self._add_tensorrtllm_engine_args()
+            elif value is None or value is False:
+                pass
+            elif value is True:
+                misc_args += self._add_boolean_arg(arg)
+            else:
+                misc_args += self._add_non_boolean_arg(arg, value)
+
+        return misc_args
+
+    def _add_boolean_arg(self, arg: str) -> List[str]:
+        if len(arg) == 1:
+            return [f"-{arg}"]
+        else:
+            return [f"--{arg}"]
+
+    def _add_non_boolean_arg(self, arg: str, value: Any) -> List[str]:
+        if len(arg) == 1:
+            return [f"-{arg}", f"{value}"]
+        else:
+            converted_arg = convert_option_name(arg)
+            return [f"--{converted_arg}", f"{value}"]
+
+    def _add_tensorrtllm_engine_args(self) -> List[str]:
+        # GAP needs to call PA using triton_c_api service kind when running
+        # against tensorrtllm engine.
+        return ["--service-kind", "triton_c_api", "--streaming"]
+
+    def _arg_is_tensorrtllm_engine(self, arg: str, value: str) -> bool:
+        return arg == "service_kind" and value == "tensorrtllm_engine"
+
+    def _add_extra_args(self, extra_args: Optional[List[str]]) -> List[str]:
+        if not extra_args:
+            return []
+
+        args = []
+        for extra_arg in extra_args:
+            args += [f"{extra_arg}"]
+
+        return args
+
     ###########################################################################
     # Get Accessor Methods
     ###########################################################################
@@ -63,14 +271,33 @@ def get_parameters(self) -> Dict[str, Any]:
         """
         return self._parameters
 
+    def get_obj_args(self) -> Namespace:
+        """
+        Returns args that can be used by the existing CLI based methods in GAP
+        These will include any objectives that are set via parameters
+        """
+        obj_args = deepcopy(self._args)
+        if "concurrency" in self._parameters:
+            obj_args.concurrency = self._parameters["concurrency"]
+        if "request_rate" in self._parameters:
+            obj_args.request_rate = self._parameters["request_rate"]
+        if "runtime_batch_size" in self._parameters:
+            obj_args.batch_size = self._parameters["runtime_batch_size"]
+
+        return obj_args
+
     ###########################################################################
     # CLI String Creation Methods
     ###########################################################################
     def create_command(self) -> List[str]:
         """
         Returns the PA command a list of strings
         """
-        cli_args = self._create_required_args()
+
+        cli_args = [self._config.path]
+        # FIXME: For now these will come from the CLI until support for a config file is added
+        # cli_args = self._create_required_args()
+        cli_args += self._cli_args
         cli_args += self._create_parameter_args()
 
         return cli_args
@@ -108,7 +335,7 @@ def _convert_objective_to_cli_option(self, objective_name: str) -> str:
         obj_to_cli_dict = {
             "runtime_batch_size": "--batch-size",
             "concurrency": "--concurrency-range",
-            "request-rate": "--request-rate-range",
+            "request_rate": "--request-rate-range",
         }
 
         try:
@@ -130,26 +357,29 @@ def representation(self) -> str:
             "--metrics-url",
             "--latency-report-file",
             "--measurement-request-count",
+            "--input-data",
+            "--profile-export-file",
+            "-i",
+            "-u",
         ]
         options_only_to_remove = ["--verbose", "--extra-verbose", "--verbose-csv"]
 
-        representation = self.create_cli_string()
+        command = self.create_command()
 
-        # Remove the PA call path which is always the first token
-        representation_list = representation.split(" ")
-        representation_list.pop(0)
-        representation = " ".join(representation_list)
+        # Remove the PA call path which is always the first item
+        command.pop(0)
 
         for option_with_arg in options_with_arg_to_remove:
-            representation = self._remove_option_from_cli_string(
-                option_with_arg, representation, with_arg=True
-            )
+            if option_with_arg in command:
+                index = command.index(option_with_arg)
+                del command[index : index + 2]
 
         for option_only in options_only_to_remove:
-            representation = self._remove_option_from_cli_string(
-                option_only, representation, with_arg=False
-            )
+            if option_only in command:
+                index = command.index(option_only)
+                del command[index]
 
+        representation = " ".join(command)
         return representation
 
     def _remove_option_from_cli_string(
@@ -178,6 +408,9 @@ def create_checkpoint_object(self) -> CheckpointObject:
         """
         pa_config_dict = deepcopy(self.__dict__)
 
+        # Values set on the CLI are not kept (they can vary from run to run)
+        del pa_config_dict["_args"]
+
         return pa_config_dict
 
     @classmethod
@@ -197,5 +430,6 @@ def create_class_from_checkpoint(
             **perf_analyzer_config_dict["_config"]
         )
         perf_analyzer_config._parameters = perf_analyzer_config_dict["_parameters"]
+        perf_analyzer_config._cli_args = perf_analyzer_config_dict["_cli_args"]
 
         return perf_analyzer_config
diff --git a/genai-perf/genai_perf/constants.py b/genai-perf/genai_perf/constants.py
@@ -33,3 +33,4 @@
 DEFAULT_ARTIFACT_DIR = "artifacts"
 DEFAULT_COMPARE_DIR = "compare"
 DEFAULT_PARQUET_FILE = "all_data"
+DEFAULT_PROFILE_EXPORT_FILE = "profile_export.json"
diff --git a/genai-perf/tests/test_perf_analyzer_config.py b/genai-perf/tests/test_perf_analyzer_config.py
@@ -16,6 +16,7 @@
 import unittest
 from unittest.mock import MagicMock, patch
 
+from genai_perf import parser
 from genai_perf.checkpoint.checkpoint import checkpoint_encoder
 from genai_perf.config.generate.objective_parameter import (
     ObjectiveCategory,
@@ -27,6 +28,7 @@
 
 
 class TestPerfAnalyzerConfig(unittest.TestCase):
+
     ###########################################################################
     # Setup & Teardown
     ###########################################################################
@@ -49,8 +51,20 @@ def setUp(self):
                 ),
             }
         }
+        cli = [
+            "genai-perf",
+            "analyze",
+            "-m",
+            "test_model",
+            "--service-kind",
+            "triton",
+        ]
+        with patch("sys.argv", cli):
+            args, extra_args = parser.parse_args()
 
         self._default_perf_analyzer_config = PerfAnalyzerConfig(
+            args=args,
+            extra_args=extra_args,
             config=self._config,
             model_objective_parameters=self._objective_parameters,
             model_name="test_model",
@@ -89,8 +103,26 @@ def test_default_command_creation(self):
             self._config.perf_analyzer.path,
             "-m",
             "test_model",
+            "--async",
+            "-i",
+            "grpc",
+            "--streaming",
+            "-u",
+            "localhost:8001",
+            "--shape",
+            "max_tokens:1",
+            "--shape",
+            "text_input:1",
+            "--service-kind",
+            "triton",
+            "--measurement-interval",
+            "10000",
             "--stability-percentage",
-            str(self._config.perf_analyzer.stability_threshold),
+            "999",
+            "--input-data",
+            "artifacts/test_model-triton-tensorrtllm-concurrency64/inputs.json",
+            "--profile-export-file",
+            "artifacts/test_model-triton-tensorrtllm-concurrency64/profile_export.json",
             "--batch-size",
             "1",
             "--concurrency-range",
@@ -111,8 +143,18 @@ def test_default_representation(self):
             [
                 "-m",
                 "test_model",
+                "--async",
+                "--streaming",
+                "--shape",
+                "max_tokens:1",
+                "--shape",
+                "text_input:1",
+                "--service-kind",
+                "triton",
+                "--measurement-interval",
+                "10000",
                 "--stability-percentage",
-                str(self._config.perf_analyzer.stability_threshold),
+                "999",
                 "--batch-size",
                 "1",
                 "--concurrency-range",
@@ -123,37 +165,6 @@ def test_default_representation(self):
 
         self.assertEqual(expected_representation, representation)
 
-    @patch(
-        "genai_perf.config.generate.perf_analyzer_config.PerfAnalyzerConfig.create_cli_string",
-        MagicMock(
-            return_value=" ".join(
-                [
-                    "perf_analyzer",
-                    "--url",
-                    "url_string",
-                    "--metrics-url",
-                    "url_string",
-                    "--latency-report-file",
-                    "file_string",
-                    "--measurement-request-count",
-                    "mrc_string",
-                    "--verbose",
-                    "--extra-verbose",
-                    "--verbose-csv",
-                ]
-            )
-        ),
-    )
-    def test_with_removal_representation(self):
-        """
-        Test that the representation is created correctly when every
-        possible value that should be removed is added
-        """
-        representation = self._default_perf_analyzer_config.representation()
-
-        expected_representation = ""
-        self.assertEqual(expected_representation, representation)
-
     ###########################################################################
     # Checkpoint Tests
     ###########################################################################