From f0e245eb496b7abf99558ea8b09987cfe58765cb Mon Sep 17 00:00:00 2001 From: Anna Pendleton Date: Wed, 4 Sep 2024 07:22:59 +0000 Subject: [PATCH] add podmonitoring to vllm, small fix --- benchmarks/inference-server/vllm/main.tf | 7 +++++++ .../vllm/manifest-templates/vllm.tftpl | 2 +- .../vllm-podmonitoring.yaml.tftpl | 12 ++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 benchmarks/inference-server/vllm/monitoring-templates/vllm-podmonitoring.yaml.tftpl diff --git a/benchmarks/inference-server/vllm/main.tf b/benchmarks/inference-server/vllm/main.tf index 4161e0347..cc2b8f1fc 100644 --- a/benchmarks/inference-server/vllm/main.tf +++ b/benchmarks/inference-server/vllm/main.tf @@ -43,6 +43,7 @@ locals { ? null : "${var.hugging_face_secret}/versions/${var.hugging_face_secret_version}" ) + vllm_podmonitoring = "${path.module}/monitoring-templates/vllm-podmonitoring.yaml.tftpl" } resource "kubernetes_manifest" "default" { @@ -59,3 +60,9 @@ resource "kubernetes_manifest" "default" { create = "60m" } } + +resource "kubernetes_manifest" "vllm-pod-monitoring" { + manifest = yamldecode(templatefile(local.vllm_podmonitoring, { + namespace = var.namespace + })) +} diff --git a/benchmarks/inference-server/vllm/manifest-templates/vllm.tftpl b/benchmarks/inference-server/vllm/manifest-templates/vllm.tftpl index 169ec4356..336c45167 100644 --- a/benchmarks/inference-server/vllm/manifest-templates/vllm.tftpl +++ b/benchmarks/inference-server/vllm/manifest-templates/vllm.tftpl @@ -55,7 +55,7 @@ spec: command: ["python3", "-m", "vllm.entrypoints.openai.api_server"] args: ["--model", "${model_id}", "--tensor-parallel-size", "${gpu_count}", "--port", "80", "--swap-space", "${swap_space}", "--disable-log-requests"] env: - - name: PORT + - name: VLLM_PORT value: 80 %{ for hugging_face_token_secret in hugging_face_token_secret_list ~} - name: HUGGING_FACE_HUB_TOKEN # Related token consumption diff --git a/benchmarks/inference-server/vllm/monitoring-templates/vllm-podmonitoring.yaml.tftpl b/benchmarks/inference-server/vllm/monitoring-templates/vllm-podmonitoring.yaml.tftpl new file mode 100644 index 000000000..f582fc76e --- /dev/null +++ b/benchmarks/inference-server/vllm/monitoring-templates/vllm-podmonitoring.yaml.tftpl @@ -0,0 +1,12 @@ +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + name: "vllm-podmonitoring" + namespace: ${namespace} +spec: + selector: + matchLabels: + app: vllm + endpoints: + - port: 80 + interval: 15s