Improve autoscaling configuration to support VPA & HPA and separates …

…deployments (#48)
langfuse · Dec 11, 2024 · e5b5bf1 · e5b5bf1
1 parent cb06625
commit e5b5bf1
Show file tree

Hide file tree

Showing 10 changed files with 193 additions and 84 deletions.
diff --git a/README.md b/README.md
diff --git a/charts/langfuse/Chart.yaml b/charts/langfuse/Chart.yaml
@@ -1,6 +1,6 @@
 apiVersion: v2
 name: langfuse
-version: 0.9.0
+version: 0.10.0
 description: Open source LLM engineering platform - LLM observability, metrics, evaluations, prompt management.
 type: application
 keywords:

diff --git a/charts/langfuse/templates/deployment-web.yaml b/charts/langfuse/templates/deployment-web.yaml
@@ -5,7 +5,7 @@ metadata:
   labels:
     {{- include "langfuse.labels" . | nindent 4 }}
 spec:
-  {{- if not .Values.autoscaling.enabled }}
+  {{- if not .Values.langfuse.web.hpa.enabled }}
   replicas: {{ coalesce (.Values.langfuse.web).replicas .Values.replicaCount 1 }}
   {{- end }}
   selector:

diff --git a/charts/langfuse/templates/deployment-worker.yaml b/charts/langfuse/templates/deployment-worker.yaml
@@ -5,7 +5,7 @@ metadata:
   labels:
     {{- include "langfuse.labels" . | nindent 4 }}
 spec:
-  {{- if not .Values.autoscaling.enabled }}
+  {{- if not .Values.langfuse.worker.hpa.enabled }}
   replicas: {{ coalesce (.Values.langfuse.worker).replicas .Values.replicaCount 1 }}
   {{- end }}
   selector:

diff --git a/charts/langfuse/templates/hpa-web.yaml b/charts/langfuse/templates/hpa-web.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.autoscaling.enabled }}
+{{- if .Values.langfuse.web.hpa.enabled }}
 apiVersion: autoscaling/v2
 kind: HorizontalPodAutoscaler
 metadata:
@@ -10,23 +10,23 @@ spec:
     apiVersion: apps/v1
     kind: Deployment
     name: {{ include "langfuse.fullname" . }}-web
-  minReplicas: {{ .Values.autoscaling.minReplicas }}
-  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
+  minReplicas: {{ .Values.langfuse.web.hpa.minReplicas }}
+  maxReplicas: {{ .Values.langfuse.web.hpa.maxReplicas }}
   metrics:
-    {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
+    {{- with .Values.langfuse.web.hpa.targetCPUUtilizationPercentage }}
     - type: Resource
       resource:
         name: cpu
         target:
           type: Utilization
-          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
+          averageUtilization: {{ . }}
     {{- end }}
-    {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
+    {{- with .Values.langfuse.web.hpa.targetMemoryUtilizationPercentage }}
     - type: Resource
       resource:
         name: memory
         target:
           type: Utilization
-          averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
+          averageUtilization: {{ . }}
     {{- end }}
 {{- end }}
diff --git a/charts/langfuse/templates/hpa-worker.yaml b/charts/langfuse/templates/hpa-worker.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.autoscaling.enabled }}
+{{- if .Values.langfuse.worker.hpa.enabled }}
 apiVersion: autoscaling/v2
 kind: HorizontalPodAutoscaler
 metadata:
@@ -10,23 +10,23 @@ spec:
     apiVersion: apps/v1
     kind: Deployment
     name: {{ include "langfuse.fullname" . }}-worker
-  minReplicas: {{ .Values.autoscaling.minReplicas }}
-  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
+  minReplicas: {{ .Values.langfuse.worker.hpa.minReplicas }}
+  maxReplicas: {{ .Values.langfuse.worker.hpa.maxReplicas }}
   metrics:
-    {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
+    {{- with .Values.langfuse.worker.hpa.targetCPUUtilizationPercentage }}
     - type: Resource
       resource:
         name: cpu
         target:
           type: Utilization
-          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
+          averageUtilization: {{ . }}
     {{- end }}
-    {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
+    {{- with .Values.langfuse.worker.hpa.targetMemoryUtilizationPercentage }}
     - type: Resource
       resource:
         name: memory
         target:
           type: Utilization
-          averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
+          averageUtilization: {{ . }}
     {{- end }}
 {{- end }}
diff --git a/charts/langfuse/templates/vpa-web.yaml b/charts/langfuse/templates/vpa-web.yaml
@@ -0,0 +1,34 @@
+{{- if and (.Capabilities.APIVersions.Has "autoscaling.k8s.io/v1") .Values.langfuse.web.vpa.enabled }}
+apiVersion: autoscaling.k8s.io/v1
+kind: VerticalPodAutoscaler
+metadata:
+  name: {{ include "langfuse.fullname" . }}-web
+  labels:
+    {{- include "langfuse.labels" . | nindent 4 }}
+spec:
+  resourcePolicy:
+    containerPolicies:
+      - containerName: {{ .Chart.Name }}-web
+        {{- with .Values.langfuse.web.vpa.controlledResources }}
+        controlledResources:
+          {{- toYaml . | nindent 8 }}
+        {{- end }}
+        {{- with .Values.langfuse.web.vpa.maxAllowed }}
+        maxAllowed:
+          {{- toYaml . | nindent 8 }}
+        {{- end }}
+        {{- with .Values.langfuse.web.vpa.minAllowed }}
+        minAllowed:
+          {{- toYaml . | nindent 8 }}
+        {{- end }}
+  targetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "langfuse.fullname" . }}-web
+  {{- if .Values.langfuse.web.vpa.updatePolicy }}
+  updatePolicy:
+    {{- with .Values.langfuse.web.vpa.updatePolicy.updateMode }}
+    updateMode: {{ . }}
+    {{- end }}
+  {{- end }}
+{{- end }}
diff --git a/charts/langfuse/templates/vpa-worker.yaml b/charts/langfuse/templates/vpa-worker.yaml
@@ -0,0 +1,34 @@
+{{- if and (.Capabilities.APIVersions.Has "autoscaling.k8s.io/v1") .Values.langfuse.worker.vpa.enabled }}
+apiVersion: autoscaling.k8s.io/v1
+kind: VerticalPodAutoscaler
+metadata:
+  name: {{ include "langfuse.fullname" . }}-worker
+  labels:
+    {{- include "langfuse.labels" . | nindent 4 }}
+spec:
+  resourcePolicy:
+    containerPolicies:
+      - containerName: {{ .Chart.Name }}-worker
+        {{- with .Values.langfuse.worker.vpa.controlledResources }}
+        controlledResources:
+          {{- toYaml . | nindent 8 }}
+        {{- end }}
+        {{- with .Values.langfuse.worker.vpa.maxAllowed }}
+        maxAllowed:
+          {{- toYaml . | nindent 8 }}
+        {{- end }}
+        {{- with .Values.langfuse.worker.vpa.minAllowed }}
+        minAllowed:
+          {{- toYaml . | nindent 8 }}
+        {{- end }}
+  targetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "langfuse.fullname" . }}-worker
+  {{- if .Values.langfuse.worker.vpa.updatePolicy }}
+  updatePolicy:
+    {{- with .Values.langfuse.worker.vpa.updatePolicy.updateMode }}
+    updateMode: {{ . }}
+    {{- end }}
+  {{- end }}
+{{- end }}
diff --git a/charts/langfuse/values.yaml b/charts/langfuse/values.yaml
@@ -27,6 +27,33 @@ langfuse:
   extraInitContainers: []
   extraVolumeMounts: []
 
+  web:
+    hpa:
+      enabled: false
+      minReplicas: 1
+      maxReplicas: 2
+      targetCPUUtilizationPercentage: 50
+    vpa:
+      enabled: false
+      controlledResources: []
+      maxAllowed: {}
+      minAllowed: {}
+      updatePolicy:
+        updateMode: Auto
+  worker:
+    hpa:
+      enabled: false
+      minReplicas: 1
+      maxReplicas: 2
+      targetCPUUtilizationPercentage: 50
+    vpa:
+      enabled: false
+      controlledResources: []
+      maxAllowed: {}
+      minAllowed: {}
+      updatePolicy:
+        updateMode: Auto
+
   additionalEnv:
     # REDIS
     - name: "REDIS_CONNECTION_STRING"
@@ -76,9 +103,6 @@ ingress:
 
 resources: {}
 
-autoscaling:
-  enabled: false
-
 nodeSelector: {}
 
 tolerations: []

diff --git a/examples/values-example.yaml b/examples/values-example.yaml
@@ -73,9 +73,6 @@ ingress:
 
 resources: {}
 
-autoscaling:
-  enabled: false
-
 nodeSelector: {}
 
 tolerations: []