diff --git a/k8s-deployment-manifest-templates/deployment-mode/service/cwagent-prometheus/README.md b/k8s-deployment-manifest-templates/deployment-mode/service/cwagent-prometheus/README.md index 5ac0c90..555dd69 100644 --- a/k8s-deployment-manifest-templates/deployment-mode/service/cwagent-prometheus/README.md +++ b/k8s-deployment-manifest-templates/deployment-mode/service/cwagent-prometheus/README.md @@ -33,3 +33,4 @@ Both yaml files contain the default settings for the following containerized app |HAPROXY_INGRESS |Exposed by Helm Chart: [incubator/haproxy-ingress](https://github.com/helm/charts/tree/master/incubator/haproxy-ingress) | |AWS APP MESH |Exposed by Helm Chart: [EKS Charts](https://github.com/aws/eks-charts/blob/master/README.md) | |JAVA/JMX |Exposed by JMX_Exporter: [JMX_Exporter](https://github.com/prometheus/jmx_exporter) | +|KARPENTER |Exposed by Karpenter controller: [Karpenter metrics](https://karpenter.sh/v0.27.3/concepts/metrics/) | diff --git a/k8s-deployment-manifest-templates/deployment-mode/service/cwagent-prometheus/prometheus-eks-fargate.yaml b/k8s-deployment-manifest-templates/deployment-mode/service/cwagent-prometheus/prometheus-eks-fargate.yaml index 90b42ff..11915cb 100644 --- a/k8s-deployment-manifest-templates/deployment-mode/service/cwagent-prometheus/prometheus-eks-fargate.yaml +++ b/k8s-deployment-manifest-templates/deployment-mode/service/cwagent-prometheus/prometheus-eks-fargate.yaml @@ -170,6 +170,68 @@ data: "metric_selectors": [ "^jvm_memory_pool_bytes_used$" ] + }, + { + "source_labels": ["job"], + "label_matcher": "^karpenter$", + "dimensions": [ + ["ClusterName"] + ], + "metric_selectors": [ + "^karpenter_consistency_errors$", + "^karpenter_deprovisioning_evaluation_duration_seconds$", + "^karpenter_deprovisioning_replacement_node_initialized_seconds$", + "^karpenter_interruption_deleted_messages$", + "^karpenter_interruption_message_latency_time_seconds$", + "^karpenter_interruption_received_messages$", + "^karpenter_nodes_allocatable$", + "^karpenter_nodes_system_overhead$", + "^karpenter_nodes_termination_time_seconds$", + "^karpenter_nodes_total_daemon_limits$", + "^karpenter_nodes_total_daemon_requests$", + "^karpenter_nodes_total_pod_limits$", + "^karpenter_nodes_total_pod_requests$", + "^karpenter_pods_startup_time_seconds$", + "^karpenter_cloudprovider_batcher_batch_size$", + "^karpenter_cloudprovider_batcher_batch_time_seconds$", + "^karpenter_cloudprovider_duration_seconds$", + "^karpenter_pods_state$" + ] + }, + { + "source_labels": ["job"], + "label_matcher": "^karpenter$", + "dimensions": [ + ["ClusterName","action"] + ], + "metric_selectors": [ + "^karpenter_deprovisioning_actions_performed$", + "^karpenter_interruption_actions_performed$" + ] + }, + { + "source_labels": ["job"], + "label_matcher": "^karpenter$", + "dimensions": [ + ["ClusterName","provisioner"] + ], + "metric_selectors": [ + "^karpenter_provisioner_scheduling_duration_seconds$", + "^karpenter_nodes_created$", + "^karpenter_nodes_terminated$" + ] + }, + { + "source_labels": ["job"], + "label_matcher": "^karpenter$", + "dimensions": [ + ["ClusterName","provisioner","resource_type"] + ], + "metric_selectors": [ + "^karpenter_provisioner_limit$", + "^karpenter_provisioner_usage$", + "^karpenter_provisioner_usage_pct$" + ] } ] } @@ -338,6 +400,17 @@ data: regex: 'jvm_gc_collection_seconds.*' action: drop + - job_name: karpenter + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - karpenter + relabel_configs: + - source_labels: [__meta_kubernetes_endpoint_port_name] + regex: http-metrics + action: keep + kind: ConfigMap metadata: name: prometheus-config diff --git a/k8s-deployment-manifest-templates/deployment-mode/service/cwagent-prometheus/prometheus-eks.yaml b/k8s-deployment-manifest-templates/deployment-mode/service/cwagent-prometheus/prometheus-eks.yaml index 81e5fb2..05595d5 100644 --- a/k8s-deployment-manifest-templates/deployment-mode/service/cwagent-prometheus/prometheus-eks.yaml +++ b/k8s-deployment-manifest-templates/deployment-mode/service/cwagent-prometheus/prometheus-eks.yaml @@ -168,6 +168,68 @@ data: "metric_selectors": [ "^jvm_memory_pool_bytes_used$" ] + }, + { + "source_labels": ["job"], + "label_matcher": "^karpenter$", + "dimensions": [ + ["ClusterName"] + ], + "metric_selectors": [ + "^karpenter_consistency_errors$", + "^karpenter_deprovisioning_evaluation_duration_seconds$", + "^karpenter_deprovisioning_replacement_node_initialized_seconds$", + "^karpenter_interruption_deleted_messages$", + "^karpenter_interruption_message_latency_time_seconds$", + "^karpenter_interruption_received_messages$", + "^karpenter_nodes_allocatable$", + "^karpenter_nodes_system_overhead$", + "^karpenter_nodes_termination_time_seconds$", + "^karpenter_nodes_total_daemon_limits$", + "^karpenter_nodes_total_daemon_requests$", + "^karpenter_nodes_total_pod_limits$", + "^karpenter_nodes_total_pod_requests$", + "^karpenter_pods_startup_time_seconds$", + "^karpenter_cloudprovider_batcher_batch_size$", + "^karpenter_cloudprovider_batcher_batch_time_seconds$", + "^karpenter_cloudprovider_duration_seconds$", + "^karpenter_pods_state$" + ] + }, + { + "source_labels": ["job"], + "label_matcher": "^karpenter$", + "dimensions": [ + ["ClusterName","action"] + ], + "metric_selectors": [ + "^karpenter_deprovisioning_actions_performed$", + "^karpenter_interruption_actions_performed$" + ] + }, + { + "source_labels": ["job"], + "label_matcher": "^karpenter$", + "dimensions": [ + ["ClusterName","provisioner"] + ], + "metric_selectors": [ + "^karpenter_provisioner_scheduling_duration_seconds$", + "^karpenter_nodes_created$", + "^karpenter_nodes_terminated$" + ] + }, + { + "source_labels": ["job"], + "label_matcher": "^karpenter$", + "dimensions": [ + ["ClusterName","provisioner","resource_type"] + ], + "metric_selectors": [ + "^karpenter_provisioner_limit$", + "^karpenter_provisioner_usage$", + "^karpenter_provisioner_usage_pct$" + ] } ] } @@ -381,6 +443,17 @@ data: regex: 'jvm_gc_collection_seconds.*' action: drop + - job_name: karpenter + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - karpenter + relabel_configs: + - source_labels: [__meta_kubernetes_endpoint_port_name] + regex: http-metrics + action: keep + kind: ConfigMap metadata: name: prometheus-config diff --git a/k8s-deployment-manifest-templates/deployment-mode/service/cwagent-prometheus/prometheus-k8s.yaml b/k8s-deployment-manifest-templates/deployment-mode/service/cwagent-prometheus/prometheus-k8s.yaml index b238881..2d9aa3a 100644 --- a/k8s-deployment-manifest-templates/deployment-mode/service/cwagent-prometheus/prometheus-k8s.yaml +++ b/k8s-deployment-manifest-templates/deployment-mode/service/cwagent-prometheus/prometheus-k8s.yaml @@ -160,6 +160,68 @@ data: "metric_selectors": [ "^jvm_memory_pool_bytes_used$" ] + }, + { + "source_labels": ["job"], + "label_matcher": "^karpenter$", + "dimensions": [ + ["ClusterName"] + ], + "metric_selectors": [ + "^karpenter_consistency_errors$", + "^karpenter_deprovisioning_evaluation_duration_seconds$", + "^karpenter_deprovisioning_replacement_node_initialized_seconds$", + "^karpenter_interruption_deleted_messages$", + "^karpenter_interruption_message_latency_time_seconds$", + "^karpenter_interruption_received_messages$", + "^karpenter_nodes_allocatable$", + "^karpenter_nodes_system_overhead$", + "^karpenter_nodes_termination_time_seconds$", + "^karpenter_nodes_total_daemon_limits$", + "^karpenter_nodes_total_daemon_requests$", + "^karpenter_nodes_total_pod_limits$", + "^karpenter_nodes_total_pod_requests$", + "^karpenter_pods_startup_time_seconds$", + "^karpenter_cloudprovider_batcher_batch_size$", + "^karpenter_cloudprovider_batcher_batch_time_seconds$", + "^karpenter_cloudprovider_duration_seconds$", + "^karpenter_pods_state$" + ] + }, + { + "source_labels": ["job"], + "label_matcher": "^karpenter$", + "dimensions": [ + ["ClusterName","action"] + ], + "metric_selectors": [ + "^karpenter_deprovisioning_actions_performed$", + "^karpenter_interruption_actions_performed$" + ] + }, + { + "source_labels": ["job"], + "label_matcher": "^karpenter$", + "dimensions": [ + ["ClusterName","provisioner"] + ], + "metric_selectors": [ + "^karpenter_provisioner_scheduling_duration_seconds$", + "^karpenter_nodes_created$", + "^karpenter_nodes_terminated$" + ] + }, + { + "source_labels": ["job"], + "label_matcher": "^karpenter$", + "dimensions": [ + ["ClusterName","provisioner","resource_type"] + ], + "metric_selectors": [ + "^karpenter_provisioner_limit$", + "^karpenter_provisioner_usage$", + "^karpenter_provisioner_usage_pct$" + ] } ] } @@ -328,6 +390,17 @@ data: regex: 'jvm_gc_collection_seconds.*' action: drop + - job_name: karpenter + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - karpenter + relabel_configs: + - source_labels: [__meta_kubernetes_endpoint_port_name] + regex: http-metrics + action: keep + kind: ConfigMap metadata: name: prometheus-config