Add pod resources option for inference model deployment

stackhpc · Nov 15, 2024 · bdf8cf5 · bdf8cf5
1 parent dbb65cc
commit bdf8cf5
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 0 deletions.
diff --git a/deployment/helm/charts/danswer/templates/inference-model-deployment.yaml b/deployment/helm/charts/danswer/templates/inference-model-deployment.yaml
@@ -25,6 +25,10 @@ spec:
         image: "{{ .Values.inferenceCapability.deployment.image.repository }}:{{ .Values.inferenceCapability.deployment.image.tag | default .Values.appVersionOverride | default .Chart.AppVersion }}"
         imagePullPolicy: {{ .Values.inferenceCapability.deployment.image.pullPolicy }}
         command: {{ toYaml .Values.inferenceCapability.deployment.command | nindent 14 }}
+        {{- if .Values.inferenceCapability.deployment.resources }}
+        resources:
+            {{- toYaml .Values.inferenceCapability.deployment.resources | nindent 10 }}
+        {{- end }}
         ports:
         - containerPort: {{ .Values.inferenceCapability.service.port }}
         envFrom:

diff --git a/deployment/helm/charts/danswer/values.yaml b/deployment/helm/charts/danswer/values.yaml
@@ -39,6 +39,7 @@ inferenceCapability:
       tag:
       pullPolicy: IfNotPresent
     command: ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]
+    resources:
     port: 9000
     volumeMounts:
       - name: inference-model-storage