Skip to content

Commit

Permalink
Update components versions. Improve restarting pods monitoring.
Browse files Browse the repository at this point in the history
  • Loading branch information
camilb committed Sep 28, 2017
1 parent 5e6378b commit f413d01
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 17 deletions.
2 changes: 1 addition & 1 deletion operator/assets/alertmanager/alertmanager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ receivers:
{{ range .Alerts }}
*Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}`
*Description:* {{ .Annotations.description }}
*Graph:* <{{ .GeneratorURL }}|:chart_with_upwards_trend:>
*Graph:* <{{ .GeneratorURL }}|:chart_with_upwards_trend:> *Runbook:* <{{ .Annotations.runbook }}|:spiral_note_pad:>
*Details:*
{{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
{{ end }}
Expand Down
24 changes: 17 additions & 7 deletions operator/assets/prometheus/rules/custom.rules
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,31 @@ groups:
labels:
severity: critical
annotations:
summary: 'Deployment of {{$labels.exported_namespace}}/{{$labels.name}} failed'
description: 'Deployment of {{$labels.exported_namespace}}/{{$labels.name}} failed - observed generation != intended generation'
summary: Deployment of {{$labels.namespace}}/{{$labels.name}} failed
description: Deployment of {{$labels.namespace}}/{{$labels.name}} failed - observed generation != intended generation
- alert: DeploymentReplicasMismatch
expr: kube_deployment_spec_replicas{job="kube-state-metrics"} != kube_deployment_status_availableReplicas{job="kube-state-metrics"}
for: 5m
labels:
severity: critical
annotations:
summary: 'Deployment of {{$labels.exported_namespace}}/{{$labels.name}} failed.'
description: 'Deployment of {{$labels.exported_namespace}}/{{$labels.name}} failed - observed replicas != intended replicas.'
summary: Deployment of {{$labels.name}} failed.
description: Deployment of {{$labels.namespace}}/{{$labels.name}} failed - observed replicas != intended replicas.
- alert: DeploymentReplicasUnavailable
expr: kube_deployment_status_replicas_unavailable > 0
for: 5m
labels:
severity: critical
annotations:
summary: Deployment `{{ $labels.deployment }}` has `{{ $value }}` replica(s) unavailable.
description: Deployment `{{ $labels.deployment }}` has `{{ $value }}` replica(s) unavailable.
runbook: https://kubernetes.io/docs/tasks/debug-application-cluster/debug-pod-replication-controller/
- alert: PodRestartingTooMuch
expr: rate(kube_pod_container_status_restarts[10m])*1200 > 2
expr: rate(kube_pod_container_status_restarts[5m])*600 > 2
for: 5m
labels:
severity: warning
annotations:
summary: 'Pod {{$labels.namespace}}/{{$label.name}} restarting too much.'
description: 'Pod {{$labels.namespace}}/{{$label.name}} restarting too much.'
summary: Pod `{{ $labels.pod }}` is restarting too much.
description: Pod `{{ $labels.namespace }}/{{ $labels.pod }}` is restarting too much.'
runbook: https://kubernetes.io/docs/tasks/debug-application-cluster/debug-pod-replication-controller/
4 changes: 2 additions & 2 deletions operator/deploy
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
#########################################################################################
#components default version
#########################################################################################
GRAFANA_DEFAULT_VERSION=4.5.1
GRAFANA_DEFAULT_VERSION=4.5.2
PROMETHEUS_DEFAULT_VERSION=v2.0.0-beta.5
PROMETHEUS_OPERATOR_DEFAULT_VERSION=v0.13.0
ALERT_MANAGER_DEFAULT_VERSION=v0.8.0
ALERT_MANAGER_DEFAULT_VERSION=v0.9.0
NODE_EXPORTER_DEFAULT_VERSION=v0.14.0
KUBE_STATE_METRICS_DEFAULT_VERSION=v1.0.1

Expand Down
24 changes: 17 additions & 7 deletions operator/manifests/prometheus/prometheus-k8s-rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,24 +50,34 @@ data:
labels:
severity: critical
annotations:
summary: 'Deployment of {{$labels.exported_namespace}}/{{$labels.name}} failed'
description: 'Deployment of {{$labels.exported_namespace}}/{{$labels.name}} failed - observed generation != intended generation'
summary: Deployment of {{$labels.namespace}}/{{$labels.name}} failed
description: Deployment of {{$labels.namespace}}/{{$labels.name}} failed - observed generation != intended generation
- alert: DeploymentReplicasMismatch
expr: kube_deployment_spec_replicas{job="kube-state-metrics"} != kube_deployment_status_availableReplicas{job="kube-state-metrics"}
for: 5m
labels:
severity: critical
annotations:
summary: 'Deployment of {{$labels.exported_namespace}}/{{$labels.name}} failed.'
description: 'Deployment of {{$labels.exported_namespace}}/{{$labels.name}} failed - observed replicas != intended replicas.'
summary: Deployment of {{$labels.name}} failed.
description: Deployment of {{$labels.namespace}}/{{$labels.name}} failed - observed replicas != intended replicas.
- alert: DeploymentReplicasUnavailable
expr: kube_deployment_status_replicas_unavailable > 0
for: 5m
labels:
severity: critical
annotations:
summary: Deployment `{{ $labels.deployment }}` has `{{ $value }}` replica(s) unavailable.
description: Deployment `{{ $labels.deployment }}` has `{{ $value }}` replica(s) unavailable.
runbook: https://kubernetes.io/docs/tasks/debug-application-cluster/debug-pod-replication-controller/
- alert: PodRestartingTooMuch
expr: rate(kube_pod_container_status_restarts[10m])*1200 > 2
expr: rate(kube_pod_container_status_restarts[5m])*600 > 2
for: 5m
labels:
severity: warning
annotations:
summary: 'Pod {{$labels.namespace}}/{{$label.name}} restarting too much.'
description: 'Pod {{$labels.namespace}}/{{$label.name}} restarting too much.'
summary: Pod `{{ $labels.pod }}` is restarting too much.
description: Pod `{{ $labels.namespace }}/{{ $labels.pod }}` is restarting too much.'
runbook: https://kubernetes.io/docs/tasks/debug-application-cluster/debug-pod-replication-controller/
etcd3.rules: |+
groups:
- name: etcd.rules
Expand Down

0 comments on commit f413d01

Please sign in to comment.