diff --git a/benchmarks/inference-server/vllm/hpa-templates/hpa.vllm.custom_metric.yaml.tftpl b/benchmarks/inference-server/vllm/hpa-templates/hpa.vllm.custom_metric.yaml.tftpl index 2c318d848..956241afd 100644 --- a/benchmarks/inference-server/vllm/hpa-templates/hpa.vllm.custom_metric.yaml.tftpl +++ b/benchmarks/inference-server/vllm/hpa-templates/hpa.vllm.custom_metric.yaml.tftpl @@ -11,15 +11,6 @@ spec: minReplicas: ${hpa_min_replicas} maxReplicas: ${hpa_max_replicas} metrics: -%{ if length(regexall("DCGM_.*", custom_metric_name)) > 0 } - - type: External - external: - metric: - name: prometheus.googleapis.com|${lower(custom_metric_name)}|unknown - target: - type: AverageValue - averageValue: ${hpa_averagevalue_target} -%{ else } - type: Pods pods: metric: @@ -27,4 +18,3 @@ spec: target: type: AverageValue averageValue: ${hpa_averagevalue_target} -%{ endif } diff --git a/benchmarks/inference-server/vllm/variables.tf b/benchmarks/inference-server/vllm/variables.tf index 71ff800ee..a59e4726f 100644 --- a/benchmarks/inference-server/vllm/variables.tf +++ b/benchmarks/inference-server/vllm/variables.tf @@ -107,15 +107,13 @@ variable "project_id" { type = string } - - variable "hpa_type" { - description = "How the TGI workload should be scaled." + description = "How the vllm workload should be scaled." type = string default = null nullable = true validation { - condition = var.hpa_type == null ? true : length(regexall("cpu|vllm.*|DCGM_.*", var.hpa_type)) > 0 + condition = var.hpa_type == null ? true : length(regexall("vllm.*", var.hpa_type)) > 0 error_message = "Allows values for hpa_type are {null, or vLLM metrics (e.g., \"vllm:num_requests_waiting\", \"vllm:gpu_cache_usage_perc\")}" } }