Skip to content

Commit

Permalink
configurable pipeline starting point, request rates configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
Bslabe123 committed Aug 15, 2024
1 parent 3e62239 commit 44143e7
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ export IP=$IP

huggingface-cli login --token "$HF_TOKEN" --add-to-git-credential

timestamp=$(date +"%Y-%m-%d_%H-%M-%S")
output_file="latency-profile-${timestamp}.txt"
for ((i = 1 ; i <= 2 ; i*=2 )); do
python3 benchmark_serving.py --host="$IP" --port="$PORT" --dataset=ShareGPT_V3_unfiltered_cleaned_split.json --tokenizer="$TOKENIZER" --request-rate=$i --backend="$BACKEND" --num-prompts=2 --max-input-length=$INPUT_LENGTH --max-output-length=$OUTPUT_LENGTH > $output_file
for request_rate in $(echo $REQUEST_RATES | tr ',' ' '); do
timestamp=$(date +"%Y-%m-%d_%H-%M-%S")
output_file="latency-profile-${timestamp}.txt"
python3 benchmark_serving.py --host="$IP" --port="$PORT" --dataset=ShareGPT_V3_unfiltered_cleaned_split.json --tokenizer="$TOKENIZER" --request-rate=$request_rate --backend="$BACKEND" --num-prompts=$((request_rate * 30)) --max-input-length=$INPUT_LENGTH --max-output-length=$OUTPUT_LENGTH > $output_file
done

5 changes: 5 additions & 0 deletions benchmarks/benchmark/tools/latency-profile/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
* limitations under the License.
*/

## BEFORE APPLYING TEMPLATES

# 1) Assure that we need to upload the new data point if either there is none of the existing one is unsatisfactory
# 2) Use the `catalog generate` tool to generate the manifests and pipe them to `kubectl apply -f`, assure kubectl succeeds
locals {
templates = [
for f in fileset(local.templates_path, "*tpl") :
Expand All @@ -38,6 +42,7 @@ locals {
max_num_prompts = var.max_num_prompts
max_output_len = var.max_output_len
max_prompt_len = var.max_prompt_len
request_rates = join(",", [for number in var.request_rates : tostring(number)])
tokenizer = var.tokenizer
hugging_face_token_b64 = var.hugging_face_token_b64
k8s_hf_secret_list = var.k8s_hf_secret == null ? [] : [var.k8s_hf_secret]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ spec:
value: ${max_prompt_len}
- name: OUTPUT_LENGTH
value: ${max_output_len}
- name: REQUEST_RATES
value: ${request_rates}
- name: OUTPUT_BUCKET
value: ${output_bucket}
- name: HF_TOKEN
Expand Down
58 changes: 58 additions & 0 deletions benchmarks/benchmark/tools/latency-profile/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,13 @@ variable "max_prompt_len" {
}
}

variable "request_rates" {
description = ""
type = list(number)
default = [1, 2]
nullable = false
}

variable "tokenizer" {
description = "Benchmark server configuration for tokenizer."
type = string
Expand Down Expand Up @@ -155,4 +162,55 @@ variable "hugging_face_token_b64" {
description = "Base 64 encoded hugging face token; stored in Secret Manager. Security considerations: https://kubernetes.io/docs/concepts/security/secrets-good-practices/"
type = string
nullable = false
}

variable "pipeline_config" {
description = "All combinations of model/model_server/accelerators to benchmark"
type = object({
valid_models = list(string)
valid_accelerators = list(string)
request_rates = list(number)

config = list(object({
model_server = string # Model server name
model_server_configs = list(object({
models = list(string) # model name
model_configs = list(object({
accelerators = list(string) # Accelerator name
accelerator_configs = list(object({
accelerator_count = number # Number of accelerators
}))
}))
}))
}))
})

validation {
condition = alltrue([
for cfg in var.pipeline_config.config : alltrue([
for model_server_config in cfg.model_server_configs : (
alltrue([
for model_config in model_server_config.model_configs :
alltrue([for accelerator in model_config.accelerators :
contains(var.pipeline_config.valid_accelerators, accelerator)])
])
)
])
])
error_message = "Each accelerator must be in the valid_accelerators list."
}

validation {
condition = alltrue([
for cfg in var.pipeline_config.config : alltrue([
for model_server_config in cfg.model_server_configs : (
alltrue([
for model in model_server_config.models :
contains(var.pipeline_config.valid_models, model)
])
)
])
])
error_message = "Each model must be in the valid_models list."
}
}
Empty file.
Empty file.

0 comments on commit 44143e7

Please sign in to comment.