Skip to content

Commit

Permalink
add podmonitoring to vllm, small fix (#796)
Browse files Browse the repository at this point in the history
* add podmonitoring to vllm, small fix

* fix numpy error in logs, run terraform fmt
  • Loading branch information
annapendleton committed Sep 4, 2024
1 parent c872599 commit 8027565
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ ninja # For faster builds.
psutil
ray >= 2.9
sentencepiece # Required for LLaMA tokenizer.
numpy
numpy < 2.0
torch == 2.1.1
transformers >= 4.37.0 # Required for Qwen2
xformers == 0.0.23
Expand Down
7 changes: 7 additions & 0 deletions benchmarks/inference-server/vllm/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ locals {
? null
: "${var.hugging_face_secret}/versions/${var.hugging_face_secret_version}"
)
vllm_podmonitoring = "${path.module}/monitoring-templates/vllm-podmonitoring.yaml.tftpl"
}

resource "kubernetes_manifest" "default" {
Expand All @@ -59,3 +60,9 @@ resource "kubernetes_manifest" "default" {
create = "60m"
}
}

resource "kubernetes_manifest" "vllm-pod-monitoring" {
manifest = yamldecode(templatefile(local.vllm_podmonitoring, {
namespace = var.namespace
}))
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ spec:
command: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
args: ["--model", "${model_id}", "--tensor-parallel-size", "${gpu_count}", "--port", "80", "--swap-space", "${swap_space}", "--disable-log-requests"]
env:
- name: PORT
- name: VLLM_PORT
value: 80
%{ for hugging_face_token_secret in hugging_face_token_secret_list ~}
- name: HUGGING_FACE_HUB_TOKEN # Related token consumption
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
name: "vllm-podmonitoring"
namespace: ${namespace}
spec:
selector:
matchLabels:
app: vllm
endpoints:
- port: 80
interval: 15s

0 comments on commit 8027565

Please sign in to comment.