From 51d2166b6af3a63adc9adc7eab8310e7f39d10c0 Mon Sep 17 00:00:00 2001 From: Neeraj Sharma <neeraj@last9.io> Date: Thu, 14 Apr 2022 00:32:38 +0530 Subject: [PATCH 1/3] prometheus source k8s cluster module --- prometheus/k8s/k8s_resources.hcl | 49 ++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 prometheus/k8s/k8s_resources.hcl diff --git a/prometheus/k8s/k8s_resources.hcl b/prometheus/k8s/k8s_resources.hcl new file mode 100644 index 0000000..a783ba8 --- /dev/null +++ b/prometheus/k8s/k8s_resources.hcl @@ -0,0 +1,49 @@ +scraper prometheus_kube_cluster module { + frequency = 120 + lookback = 600 + timeout = 90 + resolution = 60 + lag = 60 + + gauge "available_nodes" { + source prometheus "available_nodes" { + query = "label_replace(sum by (cluster)(kube_node_status_condition{condition='Ready', status='true'})/ sum by (cluster)(kube_node_status_condition{condition='Ready'}), 'cluster', '$input{cluster}', '', '')" + } + } + + gauge "disk_pressure_nodes" { + source prometheus "disk_pressure_nodes" { + query = "label_replace(sum by (cluster)(kube_node_status_condition{condition='DiskPressure', status='true'})/ sum by (cluster)(kube_node_status_condition{condition='DiskPressure'}), 'cluster', '$input{cluster}', '', '')" + } + } + + gauge "pid_pressure_nodes" { + source prometheus "pid_pressure_nodes" { + query = "label_replace(sum by (cluster)(kube_node_status_condition{condition='PIDPressure', status='true'})/ sum by (cluster)(kube_node_status_condition{condition='PIDPressure'}), 'cluster', '$input{cluster}', '', '')" + } + } + + gauge "memory_pressure_nodes" { + source prometheus "memory_pressure_nodes" { + query = "label_replace(sum by (cluster)(kube_node_status_condition{condition='MemoryPressure', status='true'})/ sum by (cluster)(kube_node_status_condition{condition='MemoryPressure'}), 'cluster', '$input{cluster}', '', '')" + } + } + + gauge "requested_memory" { + source prometheus "requested_memory" { + query = "label_replace(sum(kube_pod_container_resource_requests{resource='memory', unit='byte'})/sum by (cluster)(kube_node_status_allocatable{resource='memory', unit='byte'}), 'cluster', '$input{cluster}', '', '')" + } + } + + gauge "requested_cpu" { + source prometheus "requested_cpu" { + query = "label_replace(sum(kube_pod_container_resource_requests{resource='cpu', unit='core'})/sum by (cluster)(kube_node_status_allocatable{resource='cpu', unit='core'}), 'cluster', '$input{cluster}', '', '')" + } + } + + gauge "saturated_nodes" { + source prometheus "saturated_nodes" { + query = "label_replace(sum by (cluster) (kube_node_spec_unschedulable{}), 'cluster', '$input{cluster}', '', '')" + } + } +} From b6ef666998739fb4d256b8ab21d70958359df38c Mon Sep 17 00:00:00 2001 From: Neeraj Sharma <neeraj@last9.io> Date: Thu, 14 Apr 2022 00:37:56 +0530 Subject: [PATCH 2/3] proetheus source name fixed to promql --- prometheus/k8s/k8s_resources.hcl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/prometheus/k8s/k8s_resources.hcl b/prometheus/k8s/k8s_resources.hcl index a783ba8..7f2926b 100644 --- a/prometheus/k8s/k8s_resources.hcl +++ b/prometheus/k8s/k8s_resources.hcl @@ -1,4 +1,4 @@ -scraper prometheus_kube_cluster module { +scraper promql_kube_cluster module { frequency = 120 lookback = 600 timeout = 90 @@ -6,43 +6,43 @@ scraper prometheus_kube_cluster module { lag = 60 gauge "available_nodes" { - source prometheus "available_nodes" { + source promql "available_nodes" { query = "label_replace(sum by (cluster)(kube_node_status_condition{condition='Ready', status='true'})/ sum by (cluster)(kube_node_status_condition{condition='Ready'}), 'cluster', '$input{cluster}', '', '')" } } gauge "disk_pressure_nodes" { - source prometheus "disk_pressure_nodes" { + source promql "disk_pressure_nodes" { query = "label_replace(sum by (cluster)(kube_node_status_condition{condition='DiskPressure', status='true'})/ sum by (cluster)(kube_node_status_condition{condition='DiskPressure'}), 'cluster', '$input{cluster}', '', '')" } } gauge "pid_pressure_nodes" { - source prometheus "pid_pressure_nodes" { + source promql "pid_pressure_nodes" { query = "label_replace(sum by (cluster)(kube_node_status_condition{condition='PIDPressure', status='true'})/ sum by (cluster)(kube_node_status_condition{condition='PIDPressure'}), 'cluster', '$input{cluster}', '', '')" } } gauge "memory_pressure_nodes" { - source prometheus "memory_pressure_nodes" { + source promql "memory_pressure_nodes" { query = "label_replace(sum by (cluster)(kube_node_status_condition{condition='MemoryPressure', status='true'})/ sum by (cluster)(kube_node_status_condition{condition='MemoryPressure'}), 'cluster', '$input{cluster}', '', '')" } } gauge "requested_memory" { - source prometheus "requested_memory" { + source promql "requested_memory" { query = "label_replace(sum(kube_pod_container_resource_requests{resource='memory', unit='byte'})/sum by (cluster)(kube_node_status_allocatable{resource='memory', unit='byte'}), 'cluster', '$input{cluster}', '', '')" } } gauge "requested_cpu" { - source prometheus "requested_cpu" { + source promql "requested_cpu" { query = "label_replace(sum(kube_pod_container_resource_requests{resource='cpu', unit='core'})/sum by (cluster)(kube_node_status_allocatable{resource='cpu', unit='core'}), 'cluster', '$input{cluster}', '', '')" } } gauge "saturated_nodes" { - source prometheus "saturated_nodes" { + source promql "saturated_nodes" { query = "label_replace(sum by (cluster) (kube_node_spec_unschedulable{}), 'cluster', '$input{cluster}', '', '')" } } From 3cdd667be5d47e70bc4da446e589330281fadc77 Mon Sep 17 00:00:00 2001 From: Neeraj Sharma <neeraj@last9.io> Date: Thu, 14 Apr 2022 09:42:29 +0530 Subject: [PATCH 3/3] k8s node and cluster with namespace module added --- prometheus/k8s/k8s_resources.hcl | 91 +++++++++++++++++++++++++++++--- 1 file changed, 84 insertions(+), 7 deletions(-) diff --git a/prometheus/k8s/k8s_resources.hcl b/prometheus/k8s/k8s_resources.hcl index 7f2926b..eed0d2d 100644 --- a/prometheus/k8s/k8s_resources.hcl +++ b/prometheus/k8s/k8s_resources.hcl @@ -7,43 +7,120 @@ scraper promql_kube_cluster module { gauge "available_nodes" { source promql "available_nodes" { - query = "label_replace(sum by (cluster)(kube_node_status_condition{condition='Ready', status='true'})/ sum by (cluster)(kube_node_status_condition{condition='Ready'}), 'cluster', '$input{cluster}', '', '')" + query = "label_replace(sum by (cluster)(kube_node_status_condition{condition='Ready', status='true'})/ sum by (cluster)(kube_node_status_condition{condition='Ready'}), 'cluster', '${resources.each.cluster}', '', '')" } } gauge "disk_pressure_nodes" { source promql "disk_pressure_nodes" { - query = "label_replace(sum by (cluster)(kube_node_status_condition{condition='DiskPressure', status='true'})/ sum by (cluster)(kube_node_status_condition{condition='DiskPressure'}), 'cluster', '$input{cluster}', '', '')" + query = "label_replace(sum by (cluster)(kube_node_status_condition{condition='DiskPressure', status='true'})/ sum by (cluster)(kube_node_status_condition{condition='DiskPressure'}), 'cluster', '${resources.each.cluster}', '', '')" } } gauge "pid_pressure_nodes" { source promql "pid_pressure_nodes" { - query = "label_replace(sum by (cluster)(kube_node_status_condition{condition='PIDPressure', status='true'})/ sum by (cluster)(kube_node_status_condition{condition='PIDPressure'}), 'cluster', '$input{cluster}', '', '')" + query = "label_replace(sum by (cluster)(kube_node_status_condition{condition='PIDPressure', status='true'})/ sum by (cluster)(kube_node_status_condition{condition='PIDPressure'}), 'cluster', '${resources.each.cluster}', '', '')" } } gauge "memory_pressure_nodes" { source promql "memory_pressure_nodes" { - query = "label_replace(sum by (cluster)(kube_node_status_condition{condition='MemoryPressure', status='true'})/ sum by (cluster)(kube_node_status_condition{condition='MemoryPressure'}), 'cluster', '$input{cluster}', '', '')" + query = "label_replace(sum by (cluster)(kube_node_status_condition{condition='MemoryPressure', status='true'})/ sum by (cluster)(kube_node_status_condition{condition='MemoryPressure'}), 'cluster', '${resources.each.cluster}', '', '')" } } gauge "requested_memory" { source promql "requested_memory" { - query = "label_replace(sum(kube_pod_container_resource_requests{resource='memory', unit='byte'})/sum by (cluster)(kube_node_status_allocatable{resource='memory', unit='byte'}), 'cluster', '$input{cluster}', '', '')" + query = "label_replace(sum(kube_pod_container_resource_requests{resource='memory', unit='byte'})/sum by (cluster)(kube_node_status_allocatable{resource='memory', unit='byte'}), 'cluster', '${resources.each.cluster}', '', '')" } } gauge "requested_cpu" { source promql "requested_cpu" { - query = "label_replace(sum(kube_pod_container_resource_requests{resource='cpu', unit='core'})/sum by (cluster)(kube_node_status_allocatable{resource='cpu', unit='core'}), 'cluster', '$input{cluster}', '', '')" + query = "label_replace(sum(kube_pod_container_resource_requests{resource='cpu', unit='core'})/sum by (cluster)(kube_node_status_allocatable{resource='cpu', unit='core'}), 'cluster', '${resources.each.cluster}', '', '')" } } gauge "saturated_nodes" { source promql "saturated_nodes" { - query = "label_replace(sum by (cluster) (kube_node_spec_unschedulable{}), 'cluster', '$input{cluster}', '', '')" + query = "label_replace(sum by (cluster) (kube_node_spec_unschedulable{}), 'cluster', '${resources.each.cluster}', '', '')" + } + } +} + +scraper promql_kube_cluster_with_namespace module { + frequency = 120 + lookback = 600 + timeout = 90 + resolution = 60 + lag = 60 + + gauge "unscheduled_pods" { + source promql "unscheduled_pods" { + query = "label_replace(sum by (cluster, namespace) (increase(kube_pod_status_unschedulable{}[1m])), 'cluster', '${resources.each.cluster}', '', '')" + } + } + + gauge "desired_pods" { + source promql "desired_pods" { + query = "label_replace(sum by (cluster, namespace) (increase(kube_pod_status_phase{}[1m])), 'cluster', '${resources.each.cluster}', '', '')" + } + } + + gauge "running_pods" { + source promql "running_pods" { + query = "label_replace(sum by (cluster, namespace) (kube_pod_status_phase{phase=~'Running'}), 'cluster', '${resources.each.cluster}', '', '')" + } + } + + gauge "pending_pods" { + source promql "pending_pods" { + query = "label_replace(sum by (cluster, namespace) (kube_pod_status_phase{phase=~'Pending'}), 'cluster', '${resources.each.cluster}', '', '')" + } + } + + gauge "failed_and_unknown_pods" { + source promql "failed_and_unknown_pods" { + query = "label_replace(sum by (cluster, namespace) (kube_pod_status_phase{phase=~'Failed|Unknown'}), 'cluster', '${resources.each.cluster}', '', '')" + } + } + + gauge "container_restarts" { + source promql "container_restarts" { + query = "label_replace(sum by (cluster, namespace) (rate(kube_pod_container_status_restarts_total{}[1m])*60), 'cluster', '${resources.each.cluster}', '', '')" + } + } + +} + +scraper promql_kube_node module { + frequency = 120 + lookback = 600 + timeout = 90 + resolution = 60 + lag = 60 + + gauge "disk_pressure" { + source promql "disk_pressure" { + query = "label_replace(sum by (cluster, node)(kube_node_status_condition{condition='DiskPressure', status='true'})/ sum by (cluster, node)(kube_node_status_condition{condition='DiskPressure'}), 'cluster', '${resources.each.cluster}', '', '')" + } + } + + gauge "pid_pressure" { + source promql "pid_pressure" { + query = "label_replace(sum by (cluster, node)(kube_node_status_condition{condition='PIDPressure', status='true'})/ sum by (cluster, node)(kube_node_status_condition{condition='PIDPressure'}), 'cluster', '${resources.each.cluster}', '', '')" + } + } + + gauge "memory_pressure" { + source promql "memory_pressure" { + query = "label_replace(sum by (cluster, node)(kube_node_status_condition{condition='MemoryPressure', status='true'})/ sum by (cluster, node)(kube_node_status_condition{condition='MemoryPressure'}), 'cluster', '${resources.each.cluster}', '', '')" + } + } + + gauge "saturated" { + source promql "saturated" { + query = "label_replace(sum by (cluster, node) (kube_node_spec_unschedulable{}), 'cluster', '${resources.each.cluster}', '', '')" } } }