diff --git a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py index a6f74063..1d0db66d 100644 --- a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py +++ b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py @@ -52,7 +52,10 @@ def convert( for file_data in generic_dataset.files_data.values(): for row in file_data.rows: - token_ids = config.tokenizer.encode(row.texts[0]) + if not config.apply_chat_template: + token_ids = config.tokenizer.encode(row.texts[0]) + else: + token_ids = config.tokenizer.apply_chat_template(row.texts[0]) payload = { "input_ids": { "content": token_ids, @@ -82,4 +85,7 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None: payload["min_length"] = [num_tokens] for key, value in config.extra_inputs.items(): - payload[key] = [value] + if key == "triton_converter_set_end_id" and value: + payload["end_id"] = [config.tokenizer._tokenizer.eos_token_id] + else: + payload[key] = [value] diff --git a/genai-perf/genai_perf/inputs/inputs_config.py b/genai-perf/genai_perf/inputs/inputs_config.py index 133c0121..0993668c 100644 --- a/genai-perf/genai_perf/inputs/inputs_config.py +++ b/genai-perf/genai_perf/inputs/inputs_config.py @@ -142,3 +142,6 @@ class InputsConfig: # Seed used to generate random values random_seed: int = DEFAULT_RANDOM_SEED + + # whether to apply chat template in triton converter + apply_chat_template: bool = False \ No newline at end of file diff --git a/genai-perf/genai_perf/main.py b/genai-perf/genai_perf/main.py index 916df005..4379dd67 100755 --- a/genai-perf/genai_perf/main.py +++ b/genai-perf/genai_perf/main.py @@ -97,6 +97,7 @@ def create_config_options(args: Namespace) -> InputsConfig: batch_size_image=args.batch_size_image, batch_size_text=args.batch_size_text, output_dir=args.artifact_dir, + apply_chat_template=args.triton_converter_apply_chat_template, ) diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index e9df28a1..d3ac59c2 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -571,6 +571,13 @@ def _add_image_input_args(parser): "If format is not selected, format of generated image is selected at random", ) + input_group.add_argument( + "--triton-converter-apply-chat-template", + action="store_true", + required=False, + help="If specified, the input to trtllm engines in triton server will " + "be wrapped with chat template." + ) def _add_profile_args(parser): profile_group = parser.add_argument_group("Profiling") diff --git a/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py b/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py index e8d8503a..1395b02a 100755 --- a/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py +++ b/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py @@ -282,8 +282,11 @@ def _get_tensorrtllm_engine_token_counts( for r in res_outputs: if isinstance(r["output_ids"], list): token_ids += r["output_ids"] - else: + elif isinstance(r["output_ids"], int): token_ids.append(r["output_ids"]) + else: + # for the empty first/last responses + token_ids.append(0) return token_ids, len(token_ids) def _get_triton_output_tokens(self, res_outputs: List[Dict]) -> List[str]: diff --git a/genai-perf/genai_perf/tokenizer.py b/genai-perf/genai_perf/tokenizer.py index 2467801e..42494e95 100644 --- a/genai-perf/genai_perf/tokenizer.py +++ b/genai-perf/genai_perf/tokenizer.py @@ -68,6 +68,9 @@ def __call__(self, text, **kwargs) -> "BatchEncoding": def encode(self, text, **kwargs) -> List[int]: self._encode_args.update(kwargs) return self._tokenizer.encode(text, **self._encode_args) + + def apply_chat_template(self, text) -> List[int]: + return self._tokenizer.encode(self._tokenizer.apply_chat_template([{"role": "user", "content": text}], tokenize=False), add_special_tokens=False) def decode(self, token_ids, **kwargs) -> str: self._decode_args.update(kwargs) diff --git a/genai-perf/genai_perf/wrapper.py b/genai-perf/genai_perf/wrapper.py index 3c23499f..6bbca4a2 100644 --- a/genai-perf/genai_perf/wrapper.py +++ b/genai-perf/genai_perf/wrapper.py @@ -108,6 +108,7 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s "tokenizer", "tokenizer_trust_remote_code", "tokenizer_revision", + "triton_converter_apply_chat_template" ] utils.remove_file(args.profile_export_file)