small fixes

GoogleCloudPlatform · Sep 18, 2024 · 76cdcc1 · 76cdcc1
1 parent 8530207
commit 76cdcc1
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 14 deletions.
diff --git a/benchmarks/inference-server/vllm/hpa-templates/hpa.vllm.custom_metric.yaml.tftpl b/benchmarks/inference-server/vllm/hpa-templates/hpa.vllm.custom_metric.yaml.tftpl
@@ -11,20 +11,10 @@ spec:
   minReplicas: ${hpa_min_replicas}
   maxReplicas: ${hpa_max_replicas}
   metrics:
-%{ if length(regexall("DCGM_.*", custom_metric_name)) > 0 }
-  - type: External
-    external:
-      metric:
-        name: prometheus.googleapis.com|${lower(custom_metric_name)}|unknown
-      target:
-        type: AverageValue
-        averageValue: ${hpa_averagevalue_target}
-%{ else }
   - type: Pods
     pods:
       metric:
         name: prometheus.googleapis.com|${custom_metric_name}|gauge
       target:
         type: AverageValue
         averageValue: ${hpa_averagevalue_target}
-%{ endif }
diff --git a/benchmarks/inference-server/vllm/variables.tf b/benchmarks/inference-server/vllm/variables.tf
@@ -107,15 +107,13 @@ variable "project_id" {
   type        = string
 }
 
-
-
 variable "hpa_type" {
-  description = "How the TGI workload should be scaled."
+  description = "How the vllm workload should be scaled."
   type        = string
   default     = null
   nullable    = true
   validation {
-    condition     = var.hpa_type == null ? true : length(regexall("cpu|vllm.*|DCGM_.*", var.hpa_type)) > 0
+    condition     = var.hpa_type == null ? true : length(regexall("vllm.*", var.hpa_type)) > 0
     error_message = "Allows values for hpa_type are {null, or vLLM metrics (e.g., \"vllm:num_requests_waiting\", \"vllm:gpu_cache_usage_perc\")}"
   }
 }