diff --git a/benchmarks/benchmark/tools/profile-generator/container/requirements.txt b/benchmarks/benchmark/tools/profile-generator/container/requirements.txt index 739d46f7d..b477c334b 100644 --- a/benchmarks/benchmark/tools/profile-generator/container/requirements.txt +++ b/benchmarks/benchmark/tools/profile-generator/container/requirements.txt @@ -24,7 +24,7 @@ ninja # For faster builds. psutil ray >= 2.9 sentencepiece # Required for LLaMA tokenizer. -numpy +numpy < 2.0 torch == 2.1.1 transformers >= 4.37.0 # Required for Qwen2 xformers == 0.0.23 diff --git a/benchmarks/inference-server/vllm/main.tf b/benchmarks/inference-server/vllm/main.tf index cc2b8f1fc..7627efa5e 100644 --- a/benchmarks/inference-server/vllm/main.tf +++ b/benchmarks/inference-server/vllm/main.tf @@ -43,7 +43,7 @@ locals { ? null : "${var.hugging_face_secret}/versions/${var.hugging_face_secret_version}" ) - vllm_podmonitoring = "${path.module}/monitoring-templates/vllm-podmonitoring.yaml.tftpl" + vllm_podmonitoring = "${path.module}/monitoring-templates/vllm-podmonitoring.yaml.tftpl" } resource "kubernetes_manifest" "default" {