Skip to content

Commit

Permalink
small fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
annapendleton committed Sep 18, 2024
1 parent 8530207 commit 76cdcc1
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,10 @@ spec:
minReplicas: ${hpa_min_replicas}
maxReplicas: ${hpa_max_replicas}
metrics:
%{ if length(regexall("DCGM_.*", custom_metric_name)) > 0 }
- type: External
external:
metric:
name: prometheus.googleapis.com|${lower(custom_metric_name)}|unknown
target:
type: AverageValue
averageValue: ${hpa_averagevalue_target}
%{ else }
- type: Pods
pods:
metric:
name: prometheus.googleapis.com|${custom_metric_name}|gauge
target:
type: AverageValue
averageValue: ${hpa_averagevalue_target}
%{ endif }
6 changes: 2 additions & 4 deletions benchmarks/inference-server/vllm/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,13 @@ variable "project_id" {
type = string
}



variable "hpa_type" {
description = "How the TGI workload should be scaled."
description = "How the vllm workload should be scaled."
type = string
default = null
nullable = true
validation {
condition = var.hpa_type == null ? true : length(regexall("cpu|vllm.*|DCGM_.*", var.hpa_type)) > 0
condition = var.hpa_type == null ? true : length(regexall("vllm.*", var.hpa_type)) > 0
error_message = "Allows values for hpa_type are {null, or vLLM metrics (e.g., \"vllm:num_requests_waiting\", \"vllm:gpu_cache_usage_perc\")}"
}
}
Expand Down

0 comments on commit 76cdcc1

Please sign in to comment.