diff --git a/modules/jetstream-maxtext-deployment/templates/custom-metrics-stackdriver-adapter/hpa.jetstream.yaml.tftpl b/modules/jetstream-maxtext-deployment/templates/custom-metrics-stackdriver-adapter/hpa.jetstream.yaml.tftpl index b70218558..414cc2432 100644 --- a/modules/jetstream-maxtext-deployment/templates/custom-metrics-stackdriver-adapter/hpa.jetstream.yaml.tftpl +++ b/modules/jetstream-maxtext-deployment/templates/custom-metrics-stackdriver-adapter/hpa.jetstream.yaml.tftpl @@ -24,7 +24,11 @@ spec: - type: External external: metric: - name: kubernetes.io|node|accelerator|${rule.target_query} + name: prometheus.googleapis.com|${rule.target_query}|gauge + selector: + matchLabels: + metric.labels.container: jetstream-http + metric.labels.exported_namespace: default target: type: AverageValue averageValue: ${rule.average_value_target} diff --git a/modules/jetstream-maxtext-deployment/templates/prometheus-adapter/values.yaml.tftpl b/modules/jetstream-maxtext-deployment/templates/prometheus-adapter/values.yaml.tftpl index a07058dee..b1091fe9f 100644 --- a/modules/jetstream-maxtext-deployment/templates/prometheus-adapter/values.yaml.tftpl +++ b/modules/jetstream-maxtext-deployment/templates/prometheus-adapter/values.yaml.tftpl @@ -29,10 +29,10 @@ rules: matches: "" as: "jetstream_slots_used_percentage" metricsQuery: avg(<<.Series>>{<<.LabelMatchers>>,cluster="${cluster_name}"}) - - seriesQuery: 'kubernetes_io:node_accelerator_memory_used' + - seriesQuery: 'memory_used' resources: template: <<.Resource>> name: matches: "" as: "memory_used_percentage" - metricsQuery: avg(kubernetes_io:node_accelerator_memory_used{cluster_name="${cluster_name}"}) / avg(kubernetes_io:node_accelerator_memory_total{cluster_name="${cluster_name}"}) \ No newline at end of file + metricsQuery: avg(memory_used{cluster="${cluster_name}",exported_namespace="default",container="jetstream-http"}) / avg(memory_total{cluster="${cluster_name}",exported_namespace="default",container="jetstream-http"}) \ No newline at end of file