From 576f4f5eb779cce1f2aa6d21e42e97e5bcceed84 Mon Sep 17 00:00:00 2001 From: Izzy Putterman Date: Wed, 6 Nov 2024 22:11:10 -0800 Subject: [PATCH 1/3] First Pass --- .../openai_completions_converter.py | 7 +++- genai-perf/genai_perf/inputs/inputs_config.py | 6 ++++ .../inputs/retrievers/generic_dataset.py | 5 ++- .../retrievers/synthetic_data_retriever.py | 36 +++++++++++++++---- .../retrievers/synthetic_prompt_generator.py | 36 +++++++++++++++---- genai-perf/genai_perf/main.py | 2 ++ genai-perf/genai_perf/parser.py | 16 +++++++++ genai-perf/genai_perf/wrapper.py | 11 ++++++ 8 files changed, 104 insertions(+), 15 deletions(-) diff --git a/genai-perf/genai_perf/inputs/converters/openai_completions_converter.py b/genai-perf/genai_perf/inputs/converters/openai_completions_converter.py index 29813ae4..4d2afbc6 100644 --- a/genai-perf/genai_perf/inputs/converters/openai_completions_converter.py +++ b/genai-perf/genai_perf/inputs/converters/openai_completions_converter.py @@ -29,7 +29,7 @@ from genai_perf.inputs.converters.base_converter import BaseConverter from genai_perf.inputs.input_constants import DEFAULT_OUTPUT_TOKENS_MEAN from genai_perf.inputs.inputs_config import InputsConfig -from genai_perf.inputs.retrievers.generic_dataset import GenericDataset +from genai_perf.inputs.retrievers.generic_dataset import DataRow, GenericDataset from genai_perf.utils import sample_bounded_normal @@ -50,6 +50,7 @@ def convert( "prompt": prompt, } self._add_request_params(payload, config) + self._override_extra(payload, row) request_body["data"].append({"payload": [payload]}) return request_body @@ -67,3 +68,7 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None: ) for key, value in config.extra_inputs.items(): payload[key] = value + + def _override_extra(self, payload: Dict, row: DataRow) -> None: + for key, value in row.extra_args.items(): + payload[key] = value diff --git a/genai-perf/genai_perf/inputs/inputs_config.py b/genai-perf/genai_perf/inputs/inputs_config.py index 133c0121..56189514 100644 --- a/genai-perf/genai_perf/inputs/inputs_config.py +++ b/genai-perf/genai_perf/inputs/inputs_config.py @@ -77,6 +77,12 @@ class InputsConfig: # The filename where the input data is available input_filename: Optional[Path] = Path("") + # TODO + schedule_file: Optional[Path] = None + + # TODO + block_size: int = 512 + # The filenames used for synthetic data generation synthetic_input_filenames: Optional[List[str]] = field(default_factory=list) diff --git a/genai-perf/genai_perf/inputs/retrievers/generic_dataset.py b/genai-perf/genai_perf/inputs/retrievers/generic_dataset.py index 126c2dd2..f51a5e4f 100644 --- a/genai-perf/genai_perf/inputs/retrievers/generic_dataset.py +++ b/genai-perf/genai_perf/inputs/retrievers/generic_dataset.py @@ -25,7 +25,9 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from dataclasses import dataclass, field -from typing import Dict, List, TypeAlias +from typing import Any, Dict, List + +from typing_extensions import TypeAlias Filename: TypeAlias = str TypeOfData: TypeAlias = str @@ -38,6 +40,7 @@ class DataRow: texts: List[str] = field(default_factory=list) images: List[str] = field(default_factory=list) + extra_args: Dict[str, Any] = {} def to_dict(self) -> DataRowDict: """ diff --git a/genai-perf/genai_perf/inputs/retrievers/synthetic_data_retriever.py b/genai-perf/genai_perf/inputs/retrievers/synthetic_data_retriever.py index 9813001b..274834e7 100644 --- a/genai-perf/genai_perf/inputs/retrievers/synthetic_data_retriever.py +++ b/genai-perf/genai_perf/inputs/retrievers/synthetic_data_retriever.py @@ -25,6 +25,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import json from typing import List from genai_perf.inputs.input_constants import DEFAULT_SYNTHETIC_FILENAME @@ -51,16 +52,39 @@ def retrieve_data(self) -> GenericDataset: files = self.config.synthetic_input_filenames or [DEFAULT_SYNTHETIC_FILENAME] synthetic_dataset = GenericDataset(files_data={}) + prompt_desc = [] + if self.config.schedule_file is not None: + with open(self.config.schedule_file, "r") as f: + for j, line in enumerate(f): + if j == self.config.num_prompts: + break + prompt_desc.append(json.loads(line)) + for file in files: data_rows: List[DataRow] = [] - for _ in range(self.config.num_prompts): + for i in range(self.config.num_prompts): row = DataRow(texts=[], images=[]) - prompt = SyntheticPromptGenerator.create_synthetic_prompt( - self.config.tokenizer, - self.config.prompt_tokens_mean, - self.config.prompt_tokens_stddev, - ) + if prompt_desc: + prompt = SyntheticPromptGenerator.create_synthetic_prompt( + self.config.tokenizer, + prompt_desc[i]["input_length"], + 0, + prompt_desc[i].get("hash_ids", None), + self.config.block_size, + ) + # Generic processing needed here probably + row.extra_args["max_tokens"] = prompt_desc[i].get( + "output_length", None + ) + row.extra_args["model"] = prompt_desc[i].get("model", None) + else: + prompt = SyntheticPromptGenerator.create_synthetic_prompt( + self.config.tokenizer, + self.config.prompt_tokens_mean, + self.config.prompt_tokens_stddev, + ) + for _ in range(self.config.batch_size_text): row.texts.append(prompt) diff --git a/genai-perf/genai_perf/inputs/retrievers/synthetic_prompt_generator.py b/genai-perf/genai_perf/inputs/retrievers/synthetic_prompt_generator.py index 68b77fdc..5d816d7c 100644 --- a/genai-perf/genai_perf/inputs/retrievers/synthetic_prompt_generator.py +++ b/genai-perf/genai_perf/inputs/retrievers/synthetic_prompt_generator.py @@ -17,18 +17,22 @@ import pathlib import random import re -from typing import List +from typing import Dict, List, Optional from genai_perf.tokenizer import Tokenizer class SyntheticPromptGenerator: + cache: Dict[int, str] = {} + @classmethod def create_synthetic_prompt( cls, tokenizer: Tokenizer, prompt_tokens_mean: int = 550, prompt_tokens_stddev: int = 250, + prompt_hash_list: Optional[List[int]] = None, + block_size: Optional[int] = None, ) -> str: """ Generate a prompt that randomly samples lines from @@ -49,9 +53,26 @@ def create_synthetic_prompt( ) farewell_lines = SyntheticPromptGenerator._create_farewell_lines() - prompt = SyntheticPromptGenerator._create_prompt_from_lines( - num_prompt_tokens, farewell_lines, tokenizer - ) + if block_size is not None: + assert prompt_hash_list, "Need hash of prompt list to continue" + final_prompt = [] + size_to_use = block_size + for j, hash_index in enumerate(prompt_hash_list): + if j == len(prompt_hash_list) - 1: + size_to_use = prompt_tokens_mean - (j * block_size) + if hash_index not in cls.cache: + prompt = SyntheticPromptGenerator._create_prompt_from_lines( + size_to_use, farewell_lines, tokenizer + ) + cls.cache[hash_index] = prompt + + final_prompt.append(cls.cache[hash_index]) + prompt = " ".join(final_prompt) + + else: + prompt = SyntheticPromptGenerator._create_prompt_from_lines( + num_prompt_tokens, farewell_lines, tokenizer + ) return prompt @@ -110,9 +131,10 @@ def word_generator(): prompt += final_line # Final tweaks - diff = requested_prompt_tokens - get_token_length(prompt) - for _ in range(diff): - prompt = "hi " + prompt + for _ in range(2): + diff = requested_prompt_tokens - get_token_length(prompt) + for _ in range(diff): + prompt = "hi " + prompt return prompt diff --git a/genai-perf/genai_perf/main.py b/genai-perf/genai_perf/main.py index 916df005..6467fc7c 100755 --- a/genai-perf/genai_perf/main.py +++ b/genai-perf/genai_perf/main.py @@ -74,6 +74,8 @@ def create_config_options(args: Namespace) -> InputsConfig: model_name=args.model, model_selection_strategy=args.model_selection_strategy, input_filename=args.input_file, + schedule_file=args.schedule_file, + block_size=args.block_size, synthetic_input_filenames=args.synthetic_input_files, starting_index=DEFAULT_STARTING_INDEX, length=args.num_prompts, diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index e9df28a1..dd622859 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -448,6 +448,22 @@ def _add_input_args(parser): 'in JSONL format. Example: {"text": "Your prompt here"}', ) + prompt_source_group.add_argument( + "--schedule-file", + type=file_or_directory, + default=None, + required=False, + help="Fixed Schedule TODO", + ) + + prompt_source_group.add_argument( + "--block-size", + type=int, + default=512, + required=False, + help="Fixed Schedule TODO", + ) + input_group.add_argument( "--num-prompts", type=positive_integer, diff --git a/genai-perf/genai_perf/wrapper.py b/genai-perf/genai_perf/wrapper.py index a1888b79..f21dda88 100644 --- a/genai-perf/genai_perf/wrapper.py +++ b/genai-perf/genai_perf/wrapper.py @@ -24,6 +24,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import json import subprocess from argparse import Namespace from typing import List, Optional @@ -61,6 +62,16 @@ def add_inference_load_args(args: Namespace) -> List[str]: cmd += ["--concurrency-range", f"{args.concurrency}"] elif args.request_rate: cmd += ["--request-rate-range", f"{args.request_rate}"] + + if args.schedule_file is not None: + assert args.request_rate, "Must use request rate with fixed schedule" + timings = [] + with open(args.schedule_file, "r") as f: + for j, line in enumerate(f): + if j == args.num_prompts: + break + timings.append(float(json.loads(line)["timestamp"]) / 1000) + cmd += ["--schedule", ",".join(map(str, timings))] return cmd @staticmethod From 19ebf0ef0e23acd629f2f3c88e774c572e91ee69 Mon Sep 17 00:00:00 2001 From: Izzy Putterman Date: Tue, 26 Nov 2024 14:15:29 -0800 Subject: [PATCH 2/3] Small fixes --- .../genai_perf/inputs/retrievers/generic_dataset.py | 2 +- genai-perf/genai_perf/parser.py | 4 ++-- genai-perf/genai_perf/wrapper.py | 10 ++++++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/genai-perf/genai_perf/inputs/retrievers/generic_dataset.py b/genai-perf/genai_perf/inputs/retrievers/generic_dataset.py index f51a5e4f..f32b3aef 100644 --- a/genai-perf/genai_perf/inputs/retrievers/generic_dataset.py +++ b/genai-perf/genai_perf/inputs/retrievers/generic_dataset.py @@ -40,7 +40,7 @@ class DataRow: texts: List[str] = field(default_factory=list) images: List[str] = field(default_factory=list) - extra_args: Dict[str, Any] = {} + extra_args: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> DataRowDict: """ diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index dd622859..e0562a8b 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -448,7 +448,7 @@ def _add_input_args(parser): 'in JSONL format. Example: {"text": "Your prompt here"}', ) - prompt_source_group.add_argument( + input_group.add_argument( "--schedule-file", type=file_or_directory, default=None, @@ -456,7 +456,7 @@ def _add_input_args(parser): help="Fixed Schedule TODO", ) - prompt_source_group.add_argument( + input_group.add_argument( "--block-size", type=int, default=512, diff --git a/genai-perf/genai_perf/wrapper.py b/genai-perf/genai_perf/wrapper.py index f21dda88..86e548cf 100644 --- a/genai-perf/genai_perf/wrapper.py +++ b/genai-perf/genai_perf/wrapper.py @@ -25,6 +25,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import json +import os import subprocess from argparse import Namespace from typing import List, Optional @@ -70,8 +71,13 @@ def add_inference_load_args(args: Namespace) -> List[str]: for j, line in enumerate(f): if j == args.num_prompts: break - timings.append(float(json.loads(line)["timestamp"]) / 1000) - cmd += ["--schedule", ",".join(map(str, timings))] + timings.append(float(json.loads(line)["timestamp"]) * 1000) + with open(os.path.join(args.artifact_dir, "timing.txt"), "w") as f: + f.write("\n".join(map(str, timings))) + cmd += [ + "--request-intervals", + os.path.join(args.artifact_dir, "timing.txt"), + ] return cmd @staticmethod From 2643c3aa51979084234bf2ca304507876d0a6576 Mon Sep 17 00:00:00 2001 From: Izzy Putterman Date: Tue, 26 Nov 2024 15:51:31 -0800 Subject: [PATCH 3/3] Small fixes --- genai-perf/genai_perf/inputs/inputs_config.py | 2 +- .../genai_perf/inputs/retrievers/synthetic_data_retriever.py | 2 +- genai-perf/genai_perf/main.py | 3 +++ genai-perf/genai_perf/parser.py | 4 ++-- genai-perf/genai_perf/wrapper.py | 4 +++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/genai-perf/genai_perf/inputs/inputs_config.py b/genai-perf/genai_perf/inputs/inputs_config.py index 56189514..fdbf91cb 100644 --- a/genai-perf/genai_perf/inputs/inputs_config.py +++ b/genai-perf/genai_perf/inputs/inputs_config.py @@ -78,7 +78,7 @@ class InputsConfig: input_filename: Optional[Path] = Path("") # TODO - schedule_file: Optional[Path] = None + schedule_file: Optional[str] = None # TODO block_size: int = 512 diff --git a/genai-perf/genai_perf/inputs/retrievers/synthetic_data_retriever.py b/genai-perf/genai_perf/inputs/retrievers/synthetic_data_retriever.py index 274834e7..f0d0dfbb 100644 --- a/genai-perf/genai_perf/inputs/retrievers/synthetic_data_retriever.py +++ b/genai-perf/genai_perf/inputs/retrievers/synthetic_data_retriever.py @@ -77,7 +77,7 @@ def retrieve_data(self) -> GenericDataset: row.extra_args["max_tokens"] = prompt_desc[i].get( "output_length", None ) - row.extra_args["model"] = prompt_desc[i].get("model", None) + # row.extra_args["model"] = prompt_desc[i].get("model", None) else: prompt = SyntheticPromptGenerator.create_synthetic_prompt( self.config.tokenizer, diff --git a/genai-perf/genai_perf/main.py b/genai-perf/genai_perf/main.py index 6467fc7c..8ee20e77 100755 --- a/genai-perf/genai_perf/main.py +++ b/genai-perf/genai_perf/main.py @@ -155,6 +155,9 @@ def report_output( elif args.request_rate: infer_mode = "request_rate" load_level = f"{args.request_rate}" + elif args.schedule_file: + infer_mode = "request_interval" + load_level = f"{args.schedule_file}" else: raise GenAIPerfException("No valid infer mode specified") diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index e0562a8b..af21e9ba 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -213,7 +213,7 @@ def _check_load_manager_args(args: argparse.Namespace) -> argparse.Namespace: Check inference load args """ # If no concurrency or request rate is set, default to 1 - if not args.concurrency and not args.request_rate: + if not args.concurrency and not args.request_rate and not args.schedule_file: args.concurrency = 1 return args @@ -450,7 +450,7 @@ def _add_input_args(parser): input_group.add_argument( "--schedule-file", - type=file_or_directory, + type=str, default=None, required=False, help="Fixed Schedule TODO", diff --git a/genai-perf/genai_perf/wrapper.py b/genai-perf/genai_perf/wrapper.py index 86e548cf..84896460 100644 --- a/genai-perf/genai_perf/wrapper.py +++ b/genai-perf/genai_perf/wrapper.py @@ -65,7 +65,7 @@ def add_inference_load_args(args: Namespace) -> List[str]: cmd += ["--request-rate-range", f"{args.request_rate}"] if args.schedule_file is not None: - assert args.request_rate, "Must use request rate with fixed schedule" + # assert args.request_rate, "Must use request rate with fixed schedule" timings = [] with open(args.schedule_file, "r") as f: for j, line in enumerate(f): @@ -87,6 +87,7 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s "backend", "batch_size_image", "batch_size_text", + "block_size", "concurrency", "endpoint_type", "extra_inputs", @@ -112,6 +113,7 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s "prompt_source", "random_seed", "request_rate", + "schedule_file", "server_metrics_url", # The 'streaming' passed in to this script is to determine if the # LLM response should be streaming. That is different than the