Skip to content

Commit

Permalink
add requested alerts
Browse files Browse the repository at this point in the history
  • Loading branch information
MiaAltieri committed Dec 9, 2024
1 parent 6c91b8b commit b1386be
Showing 1 changed file with 55 additions and 1 deletion.
56 changes: 55 additions & 1 deletion src/alert_rules/prometheus/percona-mongodb-exporter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,64 @@ groups:
description: "MongoDB replica member is not healthy\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: MongodbReplicationLag
expr: 'mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"} > 10'
expr: '(mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"}) / 1000 > 10'
for: 0m
labels:
severity: critical
annotations:
summary: MongoDB replication lag (instance {{ $labels.instance }})
description: "Mongodb replication lag is more than 10s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: MongodbReplicationHeadroom
expr: 'sum(avg(mongodb_mongod_replset_oplog_head_timestamp - mongodb_mongod_replset_oplog_tail_timestamp)) - sum(avg(mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"})) <= 0'
for: 0m
labels:
severity: critical
annotations:
summary: MongoDB replication headroom (instance {{ $labels.instance }})
description: "MongoDB replication headroom is <= 0\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: MongodbNumberCursorsOpen
expr: 'mongodb_ss_metrics_cursor_open{csr_type="total"} > 10 * 1000'
for: 2m
labels:
severity: warning
annotations:
summary: MongoDB number cursors open (instance {{ $labels.instance }})
description: "Too many cursors opened by MongoDB for clients (> 10k)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: MongodbCursorsTimeouts
expr: "increase(mongodb_ss_metrics_cursor_timedOut[1m]) > 100"
for: 2m
labels:
severity: warning
annotations:
summary: MongoDB cursors timeouts (instance {{ $labels.instance }})
description: "Too many cursors are timing out\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: MongodbTooManyConnections
expr: 'avg by(instance) (rate(mongodb_ss_connections{conn_type="current"}[1m])) / avg by(instance) (sum (mongodb_ss_connections) by (instance)) * 100 > 80'
for: 2m
labels:
severity: warning
annotations:
summary: MongoDB too many connections (instance {{ $labels.instance }})
description: "Too many connections (> 80%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: MongoDBNotWritable
expr: "sum(mongodb_mongod_replset_my_state == 1) == 0"
for: 2m
labels:
severity: critical
annotations:
summary: MongoDB is not writable, no node is primary
description: "MongoDB is not writable, no node is primary"

- alert: MongoDBOneNodeLossAwayFromNotWitable
expr: "sum(mongodb_mongod_replset_my_state) == 1"
for: 2m
labels:
severity: warning
annotations:
summary: If MongoDB loses one more node it will not be writable
description: "If MongoDB loses one more node it will not be writable"

0 comments on commit b1386be

Please sign in to comment.