From 6122f9d495446cd4d17343d54a09923eb772ccdf Mon Sep 17 00:00:00 2001 From: hongkuan Date: Thu, 14 Nov 2024 16:47:03 -0800 Subject: [PATCH 01/17] add feature to set end_id for trtllm engine input convert --- .../inputs/converters/tensorrtllm_engine_converter.py | 2 ++ genai-perf/genai_perf/inputs/inputs_config.py | 3 +++ genai-perf/genai_perf/main.py | 1 + genai-perf/genai_perf/parser.py | 7 +++++++ 4 files changed, 13 insertions(+) diff --git a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py index a6f74063..579a983e 100644 --- a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py +++ b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py @@ -80,6 +80,8 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None: payload["request_output_len"] = [num_tokens] if config.output_tokens_deterministic: payload["min_length"] = [num_tokens] + if config.set_end_id: + payload["end_id"] = config.tokenizer._tokenizer.eos_token_id for key, value in config.extra_inputs.items(): payload[key] = [value] diff --git a/genai-perf/genai_perf/inputs/inputs_config.py b/genai-perf/genai_perf/inputs/inputs_config.py index 133c0121..908561b6 100644 --- a/genai-perf/genai_perf/inputs/inputs_config.py +++ b/genai-perf/genai_perf/inputs/inputs_config.py @@ -142,3 +142,6 @@ class InputsConfig: # Seed used to generate random values random_seed: int = DEFAULT_RANDOM_SEED + + # whether to set end_id in triton converter + set_end_id: bool = False diff --git a/genai-perf/genai_perf/main.py b/genai-perf/genai_perf/main.py index 916df005..4c2bf992 100755 --- a/genai-perf/genai_perf/main.py +++ b/genai-perf/genai_perf/main.py @@ -97,6 +97,7 @@ def create_config_options(args: Namespace) -> InputsConfig: batch_size_image=args.batch_size_image, batch_size_text=args.batch_size_text, output_dir=args.artifact_dir, + set_end_id=args.triton_converter_set_end_id, ) diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index e9df28a1..2a01c39c 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -739,6 +739,13 @@ def _add_output_args(parser): "export file is profile_export.json, the genai-perf file will be " "exported to profile_export_genai_perf.csv.", ) + output_group.add_argument( + "--triton-converter-set-end-id", + action="store_true", + required=False, + help="If specified, the input to trtllm engines in triton server will " + "contain end_id set to EOS token." + ) def _add_other_args(parser): From bd7949390e8ac8bde0e54fd6d08b68b28859dc84 Mon Sep 17 00:00:00 2001 From: hongkuan Date: Thu, 14 Nov 2024 17:18:30 -0800 Subject: [PATCH 02/17] move to input group --- genai-perf/genai_perf/parser.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index 2a01c39c..9f45acb2 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -571,6 +571,14 @@ def _add_image_input_args(parser): "If format is not selected, format of generated image is selected at random", ) + input_group.add_argument( + "--triton-converter-set-end-id", + action="store_true", + required=False, + help="If specified, the input to trtllm engines in triton server will " + "contain end_id set to EOS token." + ) + def _add_profile_args(parser): profile_group = parser.add_argument_group("Profiling") @@ -739,13 +747,6 @@ def _add_output_args(parser): "export file is profile_export.json, the genai-perf file will be " "exported to profile_export_genai_perf.csv.", ) - output_group.add_argument( - "--triton-converter-set-end-id", - action="store_true", - required=False, - help="If specified, the input to trtllm engines in triton server will " - "contain end_id set to EOS token." - ) def _add_other_args(parser): From df0be1c46bb3ac07f52c867aa2f2f7aedcecb888 Mon Sep 17 00:00:00 2001 From: hongkuan Date: Thu, 14 Nov 2024 17:22:34 -0800 Subject: [PATCH 03/17] add to skip_arg --- genai-perf/genai_perf/wrapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/genai-perf/genai_perf/wrapper.py b/genai-perf/genai_perf/wrapper.py index 3c23499f..574eb7b3 100644 --- a/genai-perf/genai_perf/wrapper.py +++ b/genai-perf/genai_perf/wrapper.py @@ -108,6 +108,7 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s "tokenizer", "tokenizer_trust_remote_code", "tokenizer_revision", + "triton-converter-set-end-id", ] utils.remove_file(args.profile_export_file) From f6522711c9c0c10b4f7fdb9d4ab466de26827c7f Mon Sep 17 00:00:00 2001 From: hongkuan Date: Thu, 14 Nov 2024 17:23:28 -0800 Subject: [PATCH 04/17] typo --- genai-perf/genai_perf/wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genai-perf/genai_perf/wrapper.py b/genai-perf/genai_perf/wrapper.py index 574eb7b3..03f06e62 100644 --- a/genai-perf/genai_perf/wrapper.py +++ b/genai-perf/genai_perf/wrapper.py @@ -108,7 +108,7 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s "tokenizer", "tokenizer_trust_remote_code", "tokenizer_revision", - "triton-converter-set-end-id", + "triton_converter_set_end_id", ] utils.remove_file(args.profile_export_file) From e738910945d6fc0f259500348c42329f9be4bea8 Mon Sep 17 00:00:00 2001 From: hongkuan Date: Thu, 14 Nov 2024 17:26:22 -0800 Subject: [PATCH 05/17] pass end in in list --- .../inputs/converters/tensorrtllm_engine_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py index 579a983e..2604f1bf 100644 --- a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py +++ b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py @@ -81,7 +81,7 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None: if config.output_tokens_deterministic: payload["min_length"] = [num_tokens] if config.set_end_id: - payload["end_id"] = config.tokenizer._tokenizer.eos_token_id + payload["end_id"] = [config.tokenizer._tokenizer.eos_token_id] for key, value in config.extra_inputs.items(): payload[key] = [value] From 144f16522949c90038a23272459e709af0028c49 Mon Sep 17 00:00:00 2001 From: hongkuan Date: Fri, 15 Nov 2024 10:49:29 -0800 Subject: [PATCH 06/17] add chat template --- .../inputs/converters/tensorrtllm_engine_converter.py | 5 ++++- genai-perf/genai_perf/inputs/inputs_config.py | 3 +++ genai-perf/genai_perf/main.py | 1 + genai-perf/genai_perf/parser.py | 7 +++++++ genai-perf/genai_perf/tokenizer.py | 3 +++ genai-perf/genai_perf/wrapper.py | 1 + 6 files changed, 19 insertions(+), 1 deletion(-) diff --git a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py index 2604f1bf..47679744 100644 --- a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py +++ b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py @@ -52,7 +52,10 @@ def convert( for file_data in generic_dataset.files_data.values(): for row in file_data.rows: - token_ids = config.tokenizer.encode(row.texts[0]) + if not config.apply_chat_template: + token_ids = config.tokenizer.encode(row.texts[0]) + else: + token_ids = config.tokenizer.apply_chat_template([{"role": "user", "context": row.texts[0]}]) payload = { "input_ids": { "content": token_ids, diff --git a/genai-perf/genai_perf/inputs/inputs_config.py b/genai-perf/genai_perf/inputs/inputs_config.py index 908561b6..a1a4a7b0 100644 --- a/genai-perf/genai_perf/inputs/inputs_config.py +++ b/genai-perf/genai_perf/inputs/inputs_config.py @@ -145,3 +145,6 @@ class InputsConfig: # whether to set end_id in triton converter set_end_id: bool = False + + # whether to apply chat template in triton converter + apply_chat_template: bool = False \ No newline at end of file diff --git a/genai-perf/genai_perf/main.py b/genai-perf/genai_perf/main.py index 4c2bf992..b7c28dd0 100755 --- a/genai-perf/genai_perf/main.py +++ b/genai-perf/genai_perf/main.py @@ -98,6 +98,7 @@ def create_config_options(args: Namespace) -> InputsConfig: batch_size_text=args.batch_size_text, output_dir=args.artifact_dir, set_end_id=args.triton_converter_set_end_id, + apply_chat_template=args.triton_converter_apply_chat_template, ) diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index 9f45acb2..e2b3ff7c 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -579,6 +579,13 @@ def _add_image_input_args(parser): "contain end_id set to EOS token." ) + input_group.add_argument( + "--triton-converter-apply-chat-template", + action="store_true", + required=False, + help="If specified, the input to trtllm engines in triton server will " + "be wrapped with chat template." + ) def _add_profile_args(parser): profile_group = parser.add_argument_group("Profiling") diff --git a/genai-perf/genai_perf/tokenizer.py b/genai-perf/genai_perf/tokenizer.py index 2467801e..a0a28d06 100644 --- a/genai-perf/genai_perf/tokenizer.py +++ b/genai-perf/genai_perf/tokenizer.py @@ -68,6 +68,9 @@ def __call__(self, text, **kwargs) -> "BatchEncoding": def encode(self, text, **kwargs) -> List[int]: self._encode_args.update(kwargs) return self._tokenizer.encode(text, **self._encode_args) + + def apply_chat_template(self, text, **kwargs) -> List[int]: + return self._tokenizer.apply_chat_template(text, **kwargs) def decode(self, token_ids, **kwargs) -> str: self._decode_args.update(kwargs) diff --git a/genai-perf/genai_perf/wrapper.py b/genai-perf/genai_perf/wrapper.py index 03f06e62..33986791 100644 --- a/genai-perf/genai_perf/wrapper.py +++ b/genai-perf/genai_perf/wrapper.py @@ -109,6 +109,7 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s "tokenizer_trust_remote_code", "tokenizer_revision", "triton_converter_set_end_id", + "triton_converter_apply_chat_template" ] utils.remove_file(args.profile_export_file) From 1777fa1768752153bac3c714e65705c33764483f Mon Sep 17 00:00:00 2001 From: hongkuan Date: Fri, 15 Nov 2024 11:01:41 -0800 Subject: [PATCH 07/17] typo --- .../inputs/converters/tensorrtllm_engine_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py index 47679744..be5a66a7 100644 --- a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py +++ b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py @@ -55,7 +55,7 @@ def convert( if not config.apply_chat_template: token_ids = config.tokenizer.encode(row.texts[0]) else: - token_ids = config.tokenizer.apply_chat_template([{"role": "user", "context": row.texts[0]}]) + token_ids = config.tokenizer.apply_chat_template([{"role": "user", "content": row.texts[0]}]) payload = { "input_ids": { "content": token_ids, From d6be758a2d645176fff79df6d4cace6201fefd64 Mon Sep 17 00:00:00 2001 From: hongkuan Date: Fri, 15 Nov 2024 15:17:18 -0800 Subject: [PATCH 08/17] correct chat template --- .../inputs/converters/tensorrtllm_engine_converter.py | 2 +- genai-perf/genai_perf/tokenizer.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py index be5a66a7..7b4bd0ec 100644 --- a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py +++ b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py @@ -55,7 +55,7 @@ def convert( if not config.apply_chat_template: token_ids = config.tokenizer.encode(row.texts[0]) else: - token_ids = config.tokenizer.apply_chat_template([{"role": "user", "content": row.texts[0]}]) + token_ids = config.tokenizer.apply_chat_template(row.texts[0]) payload = { "input_ids": { "content": token_ids, diff --git a/genai-perf/genai_perf/tokenizer.py b/genai-perf/genai_perf/tokenizer.py index a0a28d06..f34666b8 100644 --- a/genai-perf/genai_perf/tokenizer.py +++ b/genai-perf/genai_perf/tokenizer.py @@ -69,8 +69,8 @@ def encode(self, text, **kwargs) -> List[int]: self._encode_args.update(kwargs) return self._tokenizer.encode(text, **self._encode_args) - def apply_chat_template(self, text, **kwargs) -> List[int]: - return self._tokenizer.apply_chat_template(text, **kwargs) + def apply_chat_template(self, text) -> List[int]: + return self._tokenizer.encode(self._tokenizer.apply_chat_template([{"role": "user", "content": text}]), add_special_tokens=False) def decode(self, token_ids, **kwargs) -> str: self._decode_args.update(kwargs) From d067b04d0b4644f35e8e69a314e1d9fb3a035f27 Mon Sep 17 00:00:00 2001 From: hongkuan Date: Fri, 15 Nov 2024 15:31:09 -0800 Subject: [PATCH 09/17] typo --- genai-perf/genai_perf/tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genai-perf/genai_perf/tokenizer.py b/genai-perf/genai_perf/tokenizer.py index f34666b8..42494e95 100644 --- a/genai-perf/genai_perf/tokenizer.py +++ b/genai-perf/genai_perf/tokenizer.py @@ -70,7 +70,7 @@ def encode(self, text, **kwargs) -> List[int]: return self._tokenizer.encode(text, **self._encode_args) def apply_chat_template(self, text) -> List[int]: - return self._tokenizer.encode(self._tokenizer.apply_chat_template([{"role": "user", "content": text}]), add_special_tokens=False) + return self._tokenizer.encode(self._tokenizer.apply_chat_template([{"role": "user", "content": text}], tokenize=False), add_special_tokens=False) def decode(self, token_ids, **kwargs) -> str: self._decode_args.update(kwargs) From c590a6e5fc74a846df770af82f32080de55bb2be Mon Sep 17 00:00:00 2001 From: hongkuan Date: Fri, 15 Nov 2024 17:50:45 -0800 Subject: [PATCH 10/17] remove triton_converter_set_end_id from cli arg --- .../inputs/converters/tensorrtllm_engine_converter.py | 2 +- genai-perf/genai_perf/main.py | 2 -- genai-perf/genai_perf/parser.py | 8 -------- genai-perf/genai_perf/wrapper.py | 1 - 4 files changed, 1 insertion(+), 12 deletions(-) diff --git a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py index 7b4bd0ec..7d6877eb 100644 --- a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py +++ b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py @@ -83,7 +83,7 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None: payload["request_output_len"] = [num_tokens] if config.output_tokens_deterministic: payload["min_length"] = [num_tokens] - if config.set_end_id: + if config.extra_inputs.getattr("triton_converter_set_end_id", False): payload["end_id"] = [config.tokenizer._tokenizer.eos_token_id] for key, value in config.extra_inputs.items(): diff --git a/genai-perf/genai_perf/main.py b/genai-perf/genai_perf/main.py index b7c28dd0..916df005 100755 --- a/genai-perf/genai_perf/main.py +++ b/genai-perf/genai_perf/main.py @@ -97,8 +97,6 @@ def create_config_options(args: Namespace) -> InputsConfig: batch_size_image=args.batch_size_image, batch_size_text=args.batch_size_text, output_dir=args.artifact_dir, - set_end_id=args.triton_converter_set_end_id, - apply_chat_template=args.triton_converter_apply_chat_template, ) diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index e2b3ff7c..d3ac59c2 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -571,14 +571,6 @@ def _add_image_input_args(parser): "If format is not selected, format of generated image is selected at random", ) - input_group.add_argument( - "--triton-converter-set-end-id", - action="store_true", - required=False, - help="If specified, the input to trtllm engines in triton server will " - "contain end_id set to EOS token." - ) - input_group.add_argument( "--triton-converter-apply-chat-template", action="store_true", diff --git a/genai-perf/genai_perf/wrapper.py b/genai-perf/genai_perf/wrapper.py index 33986791..6bbca4a2 100644 --- a/genai-perf/genai_perf/wrapper.py +++ b/genai-perf/genai_perf/wrapper.py @@ -108,7 +108,6 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s "tokenizer", "tokenizer_trust_remote_code", "tokenizer_revision", - "triton_converter_set_end_id", "triton_converter_apply_chat_template" ] From 79f869d90798fdfcb7ca47ba39d3c63ca84d5011 Mon Sep 17 00:00:00 2001 From: hongkuan Date: Fri, 15 Nov 2024 17:57:41 -0800 Subject: [PATCH 11/17] typo --- .../inputs/converters/tensorrtllm_engine_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py index 7d6877eb..3cbb43aa 100644 --- a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py +++ b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py @@ -83,7 +83,7 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None: payload["request_output_len"] = [num_tokens] if config.output_tokens_deterministic: payload["min_length"] = [num_tokens] - if config.extra_inputs.getattr("triton_converter_set_end_id", False): + if getattr(config.extra_inputs, "triton_converter_set_end_id", False): payload["end_id"] = [config.tokenizer._tokenizer.eos_token_id] for key, value in config.extra_inputs.items(): From f1d37084844a4236306ffbc079215bb124e1cf72 Mon Sep 17 00:00:00 2001 From: hongkuan Date: Fri, 15 Nov 2024 17:59:55 -0800 Subject: [PATCH 12/17] add back to ignore list --- genai-perf/genai_perf/wrapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/genai-perf/genai_perf/wrapper.py b/genai-perf/genai_perf/wrapper.py index 6bbca4a2..33986791 100644 --- a/genai-perf/genai_perf/wrapper.py +++ b/genai-perf/genai_perf/wrapper.py @@ -108,6 +108,7 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s "tokenizer", "tokenizer_trust_remote_code", "tokenizer_revision", + "triton_converter_set_end_id", "triton_converter_apply_chat_template" ] From ca20c30a922b59dfacbfb78c68570a3485880512 Mon Sep 17 00:00:00 2001 From: hongkuan Date: Fri, 15 Nov 2024 18:07:56 -0800 Subject: [PATCH 13/17] fix --- .../inputs/converters/tensorrtllm_engine_converter.py | 4 ++-- genai-perf/genai_perf/wrapper.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py index 3cbb43aa..3c03b44f 100644 --- a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py +++ b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py @@ -83,8 +83,8 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None: payload["request_output_len"] = [num_tokens] if config.output_tokens_deterministic: payload["min_length"] = [num_tokens] - if getattr(config.extra_inputs, "triton_converter_set_end_id", False): - payload["end_id"] = [config.tokenizer._tokenizer.eos_token_id] for key, value in config.extra_inputs.items(): + if key == "triton_converter_set_end_id" and value: + payload["end_id"] = [config.tokenizer._tokenizer.eos_token_id] payload[key] = [value] diff --git a/genai-perf/genai_perf/wrapper.py b/genai-perf/genai_perf/wrapper.py index 33986791..6bbca4a2 100644 --- a/genai-perf/genai_perf/wrapper.py +++ b/genai-perf/genai_perf/wrapper.py @@ -108,7 +108,6 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s "tokenizer", "tokenizer_trust_remote_code", "tokenizer_revision", - "triton_converter_set_end_id", "triton_converter_apply_chat_template" ] From 36338f944527f9b20d3d67cebd1a2a39a15f05cc Mon Sep 17 00:00:00 2001 From: hongkuan Date: Fri, 15 Nov 2024 18:09:17 -0800 Subject: [PATCH 14/17] typo --- .../inputs/converters/tensorrtllm_engine_converter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py index 3c03b44f..1d0db66d 100644 --- a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py +++ b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py @@ -87,4 +87,5 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None: for key, value in config.extra_inputs.items(): if key == "triton_converter_set_end_id" and value: payload["end_id"] = [config.tokenizer._tokenizer.eos_token_id] - payload[key] = [value] + else: + payload[key] = [value] From d2b2b639364fb519dc3f4c1527a29e04fc8a05a0 Mon Sep 17 00:00:00 2001 From: hongkuan Date: Fri, 15 Nov 2024 18:18:35 -0800 Subject: [PATCH 15/17] typo --- genai-perf/genai_perf/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/genai-perf/genai_perf/main.py b/genai-perf/genai_perf/main.py index 916df005..4379dd67 100755 --- a/genai-perf/genai_perf/main.py +++ b/genai-perf/genai_perf/main.py @@ -97,6 +97,7 @@ def create_config_options(args: Namespace) -> InputsConfig: batch_size_image=args.batch_size_image, batch_size_text=args.batch_size_text, output_dir=args.artifact_dir, + apply_chat_template=args.triton_converter_apply_chat_template, ) From bf14733976115ff83e8c413b0ff171f93ffe70a9 Mon Sep 17 00:00:00 2001 From: hongkuan Date: Mon, 18 Nov 2024 09:57:16 -0800 Subject: [PATCH 16/17] fix measurement for empty first/last response --- .../profile_data_parser/llm_profile_data_parser.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py b/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py index e8d8503a..1395b02a 100755 --- a/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py +++ b/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py @@ -282,8 +282,11 @@ def _get_tensorrtllm_engine_token_counts( for r in res_outputs: if isinstance(r["output_ids"], list): token_ids += r["output_ids"] - else: + elif isinstance(r["output_ids"], int): token_ids.append(r["output_ids"]) + else: + # for the empty first/last responses + token_ids.append(0) return token_ids, len(token_ids) def _get_triton_output_tokens(self, res_outputs: List[Dict]) -> List[str]: From 22e2322c027877ccfdc66b850470a18709ffc52b Mon Sep 17 00:00:00 2001 From: hongkuan Date: Mon, 18 Nov 2024 13:35:50 -0800 Subject: [PATCH 17/17] remove unused config in InputsConfig --- genai-perf/genai_perf/inputs/inputs_config.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/genai-perf/genai_perf/inputs/inputs_config.py b/genai-perf/genai_perf/inputs/inputs_config.py index a1a4a7b0..0993668c 100644 --- a/genai-perf/genai_perf/inputs/inputs_config.py +++ b/genai-perf/genai_perf/inputs/inputs_config.py @@ -143,8 +143,5 @@ class InputsConfig: # Seed used to generate random values random_seed: int = DEFAULT_RANDOM_SEED - # whether to set end_id in triton converter - set_end_id: bool = False - # whether to apply chat template in triton converter apply_chat_template: bool = False \ No newline at end of file