From 44143e7c25fbf3faaf5b20a79180ed91c0fdd2f8 Mon Sep 17 00:00:00 2001 From: Brendan Slabe Date: Thu, 15 Aug 2024 20:31:29 +0000 Subject: [PATCH] configurable pipeline starting point, request rates configurable --- .../container/latency_throughput_curve.sh | 8 +-- .../benchmark/tools/latency-profile/main.tf | 5 ++ .../latency-profile-generator.yaml.tpl | 2 + .../tools/latency-profile/variables.tf | 58 +++++++++++++++++++ .../benchmark/tools/profile-generator/main.tf | 0 .../tools/profile-generator/variables.tf | 0 6 files changed, 69 insertions(+), 4 deletions(-) create mode 100644 benchmarks/benchmark/tools/profile-generator/main.tf create mode 100644 benchmarks/benchmark/tools/profile-generator/variables.tf diff --git a/benchmarks/benchmark/tools/latency-profile/container/latency_throughput_curve.sh b/benchmarks/benchmark/tools/latency-profile/container/latency_throughput_curve.sh index 707f8271c..e7ae88b8a 100755 --- a/benchmarks/benchmark/tools/latency-profile/container/latency_throughput_curve.sh +++ b/benchmarks/benchmark/tools/latency-profile/container/latency_throughput_curve.sh @@ -19,9 +19,9 @@ export IP=$IP huggingface-cli login --token "$HF_TOKEN" --add-to-git-credential -timestamp=$(date +"%Y-%m-%d_%H-%M-%S") -output_file="latency-profile-${timestamp}.txt" -for ((i = 1 ; i <= 2 ; i*=2 )); do - python3 benchmark_serving.py --host="$IP" --port="$PORT" --dataset=ShareGPT_V3_unfiltered_cleaned_split.json --tokenizer="$TOKENIZER" --request-rate=$i --backend="$BACKEND" --num-prompts=2 --max-input-length=$INPUT_LENGTH --max-output-length=$OUTPUT_LENGTH > $output_file +for request_rate in $(echo $REQUEST_RATES | tr ',' ' '); do + timestamp=$(date +"%Y-%m-%d_%H-%M-%S") + output_file="latency-profile-${timestamp}.txt" + python3 benchmark_serving.py --host="$IP" --port="$PORT" --dataset=ShareGPT_V3_unfiltered_cleaned_split.json --tokenizer="$TOKENIZER" --request-rate=$request_rate --backend="$BACKEND" --num-prompts=$((request_rate * 30)) --max-input-length=$INPUT_LENGTH --max-output-length=$OUTPUT_LENGTH > $output_file done diff --git a/benchmarks/benchmark/tools/latency-profile/main.tf b/benchmarks/benchmark/tools/latency-profile/main.tf index 42cf3146f..3906c6417 100644 --- a/benchmarks/benchmark/tools/latency-profile/main.tf +++ b/benchmarks/benchmark/tools/latency-profile/main.tf @@ -14,6 +14,10 @@ * limitations under the License. */ +## BEFORE APPLYING TEMPLATES + +# 1) Assure that we need to upload the new data point if either there is none of the existing one is unsatisfactory +# 2) Use the `catalog generate` tool to generate the manifests and pipe them to `kubectl apply -f`, assure kubectl succeeds locals { templates = [ for f in fileset(local.templates_path, "*tpl") : @@ -38,6 +42,7 @@ locals { max_num_prompts = var.max_num_prompts max_output_len = var.max_output_len max_prompt_len = var.max_prompt_len + request_rates = join(",", [for number in var.request_rates : tostring(number)]) tokenizer = var.tokenizer hugging_face_token_b64 = var.hugging_face_token_b64 k8s_hf_secret_list = var.k8s_hf_secret == null ? [] : [var.k8s_hf_secret] diff --git a/benchmarks/benchmark/tools/latency-profile/manifest-templates/latency-profile-generator.yaml.tpl b/benchmarks/benchmark/tools/latency-profile/manifest-templates/latency-profile-generator.yaml.tpl index 53f189854..c3fb03b83 100644 --- a/benchmarks/benchmark/tools/latency-profile/manifest-templates/latency-profile-generator.yaml.tpl +++ b/benchmarks/benchmark/tools/latency-profile/manifest-templates/latency-profile-generator.yaml.tpl @@ -37,6 +37,8 @@ spec: value: ${max_prompt_len} - name: OUTPUT_LENGTH value: ${max_output_len} + - name: REQUEST_RATES + value: ${request_rates} - name: OUTPUT_BUCKET value: ${output_bucket} - name: HF_TOKEN diff --git a/benchmarks/benchmark/tools/latency-profile/variables.tf b/benchmarks/benchmark/tools/latency-profile/variables.tf index 586150440..896934cf0 100644 --- a/benchmarks/benchmark/tools/latency-profile/variables.tf +++ b/benchmarks/benchmark/tools/latency-profile/variables.tf @@ -118,6 +118,13 @@ variable "max_prompt_len" { } } +variable "request_rates" { + description = "" + type = list(number) + default = [1, 2] + nullable = false +} + variable "tokenizer" { description = "Benchmark server configuration for tokenizer." type = string @@ -155,4 +162,55 @@ variable "hugging_face_token_b64" { description = "Base 64 encoded hugging face token; stored in Secret Manager. Security considerations: https://kubernetes.io/docs/concepts/security/secrets-good-practices/" type = string nullable = false +} + +variable "pipeline_config" { + description = "All combinations of model/model_server/accelerators to benchmark" + type = object({ + valid_models = list(string) + valid_accelerators = list(string) + request_rates = list(number) + + config = list(object({ + model_server = string # Model server name + model_server_configs = list(object({ + models = list(string) # model name + model_configs = list(object({ + accelerators = list(string) # Accelerator name + accelerator_configs = list(object({ + accelerator_count = number # Number of accelerators + })) + })) + })) + })) + }) + + validation { + condition = alltrue([ + for cfg in var.pipeline_config.config : alltrue([ + for model_server_config in cfg.model_server_configs : ( + alltrue([ + for model_config in model_server_config.model_configs : + alltrue([for accelerator in model_config.accelerators : + contains(var.pipeline_config.valid_accelerators, accelerator)]) + ]) + ) + ]) + ]) + error_message = "Each accelerator must be in the valid_accelerators list." + } + + validation { + condition = alltrue([ + for cfg in var.pipeline_config.config : alltrue([ + for model_server_config in cfg.model_server_configs : ( + alltrue([ + for model in model_server_config.models : + contains(var.pipeline_config.valid_models, model) + ]) + ) + ]) + ]) + error_message = "Each model must be in the valid_models list." + } } \ No newline at end of file diff --git a/benchmarks/benchmark/tools/profile-generator/main.tf b/benchmarks/benchmark/tools/profile-generator/main.tf new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/benchmark/tools/profile-generator/variables.tf b/benchmarks/benchmark/tools/profile-generator/variables.tf new file mode 100644 index 000000000..e69de29bb