-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathcheck_galera_cluster
executable file
·284 lines (238 loc) · 7.07 KB
/
check_galera_cluster
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
#!/bin/bash
LANG=C
PROGNAME=`basename $0`
VERSION="Version 1.1.5"
AUTHOR="Guillaume Coré <[email protected]>, Ales Nosek <[email protected]>, Staf Wagemakers <[email protected]>, Claudio Kuenzler <claudiokuenzler.com>"
ST_OK=0
ST_WR=1
ST_CR=2
ST_UK=3
warnAlerts=0
critAlerts=0
unknAlerts=0
print_version() {
echo "$VERSION $AUTHOR"
}
print_help() {
print_version $PROGNAME $VERSION
echo ""
echo "$PROGNAME is a monitoring plugin to monitor Galera cluster status."
echo ""
echo "$PROGNAME [-u USER] [-p PASSWORD] [-H HOST] [-P PORT] [-m file] [-w SIZE] [-c SIZE] [-s statefile] [-f FLOAT] [-0]"
echo ""
echo "Options:"
echo " u)"
echo " MySQL user."
echo " p)"
echo " MySQL password."
echo " H)"
echo " MySQL host."
echo " P)"
echo " MySQL port."
echo " m)"
echo " MySQL extra my.cnf configuration file."
echo " w)"
echo " Sets minimum number of nodes in the cluster when WARNING is raised. (default is same as critical)."
echo " c)"
echo " Sets minimum number of nodes in the cluster when CRITICAL is raised. (default is 2)."
echo " f)"
echo " Sets critical value of wsrep_flow_control_paused (default is 0.1)."
echo " 0)"
echo " Rise CRITICAL if the node is not primary"
echo " s)"
echo " Create state file, detect disconnected nodes"
exit $ST_UK
}
# default values
crit=2
fcp=0.1
check_executable() {
if [ -z "$1" ]; then
echo "check_executable: no parameter given!"
exit $ST_UK
fi
if ! command -v "$1" &>/dev/null; then
echo "UNKNOWN: Cannot find $1"
exit $ST_UK
fi
}
check_executable mysql
check_executable bc
while getopts “hvu:p:H:P:w:c:f:m:s:0” OPTION; do
case $OPTION in
h)
print_help
exit $ST_UK
;;
v)
print_version $PROGNAME $VERSION
exit $ST_UK
;;
u)
mysqluser=$OPTARG
;;
p)
password=$OPTARG
;;
H)
mysqlhost=$OPTARG
;;
P)
port=$OPTARG
;;
m)
myconfig=$OPTARG
;;
w)
warn=$OPTARG
;;
c)
crit=$OPTARG
;;
f)
fcp=$OPTARG
;;
0)
primary='TRUE'
;;
s)
stateFile=$OPTARG
;;
?)
echo "Unknown argument: $1"
print_help
exit $ST_UK
;;
esac
done
if [ -z "$warn" ]; then
warn=$crit
fi
create_param() {
if [ -n "$2" ]; then
echo $1$2
fi
}
param_mysqlhost=$(create_param -h "$mysqlhost")
param_port=$(create_param -P "$port")
param_mysqluser=$(create_param -u "$mysqluser")
param_password=$(create_param -p "$password")
param_configfile=$(create_param --defaults-extra-file= "$myconfig")
export MYSQL_PWD=$password
param_mysql="$param_mysqlhost $param_port $param_mysqluser $param_configfile"
#
# verify the database connection
#
mysql $param_mysql -B -N -e '\s;' >/dev/null 2>&1 || {
echo "CRITICAL: mysql connection check failed"
exit $ST_CR
}
#
# retrieve the mysql status
#
rMysqlStatus=$(mysql $param_mysql -B -N -e "show status like 'wsrep_%';")
#
# verify that the node is part of a cluster
#
rClusterStateUuid=$(echo "$rMysqlStatus" | awk '/wsrep_cluster_state_uuid/ {print $2}')
if [ -z "$rClusterStateUuid" ]; then
echo "CRITICAL: node is not part of a cluster"
exit $ST_CR
fi
rClusterSize=$(echo "$rMysqlStatus" | awk '/wsrep_cluster_size/ {print $2}')
rClusterStatus=$(echo "$rMysqlStatus" | awk '/wsrep_cluster_status/ {print $2}') # Primary
rFlowControl=$(echo "$rMysqlStatus" | awk '/wsrep_flow_control_paused\t/ {print $2}') # < 0.1
rFlowControl=$(printf "%.14f" $rFlowControl) # issue #4
rReady=$(echo "$rMysqlStatus" | awk '/wsrep_ready/ {print $2}') # ON
rConnected=$(echo "$rMysqlStatus" | awk '/wsrep_connected/ {print $2}') # ON
rLocalStateComment=$(echo "$rMysqlStatus" | awk '/wsrep_local_state_comment/ {print $2}') # Synced
rIncommingAddresses=$(echo "$rMysqlStatus" | awk '/wsrep_incoming_addresses/ {print $2}')
if [ -z "$rFlowControl" ]; then
echo "UNKNOWN: wsrep_flow_control_paused is empty"
unknAlerts=$(($unknAlerts+1))
fi
if [ $(echo "$rFlowControl > $fcp" | bc) = 1 ]; then
echo "CRITICAL: wsrep_flow_control_paused is > $fcp"
critAlerts=$(($criticalAlerts+1))
fi
if [ "$primary" = 'TRUE' ]; then
if [ "$rClusterStatus" != 'Primary' ]; then
echo "CRITICAL: node is not primary (wsrep_cluster_status)"
critAlerts=$(($criticalAlerts+1))
fi
fi
if [ "$rReady" != 'ON' ]; then
echo "CRITICAL: node is not ready (wsrep_ready)"
critAlerts=$(($criticalAlerts+1))
fi
if [ "$rConnected" != 'ON' ]; then
echo "CRITICAL: node is not connected (wsrep_connected)"
critAlerts=$(($criticalAlerts+1))
fi
if [ "$rLocalStateComment" != 'Synced' ]; then
echo "CRITICAL: node is not synced - actual state is: $rLocalStateComment (wsrep_local_state_comment)"
critAlerts=$(($criticalAlerts+1))
fi
if [ $rClusterSize -gt $warn ]; then
# only display the ok message if the state check not enabled
if [ -z "$stateFile" ]; then
echo "OK: number of NODES = $rClusterSize (wsrep_cluster_size)"
fi
elif [ $rClusterSize -le $crit ]; then
echo "CRITICAL: number of NODES = $rClusterSize (wsrep_cluster_size)"
critAlerts=$(($criticalAlerts+1))
elif [ $rClusterSize -le $warn ]; then
echo "WARNING: number of NODES = $rClusterSize (wsrep_cluster_size)"
warnAlerts=$(($warnAlerts+1))
else
exit $ST_UK
fi
#
# detect is the connection is lost automatically
#
if [ ! -z "$stateFile" ]; then
touch $stateFile
if [ $? != "0" ]; then
echo "UNKNOWN: stateFile \"$stateFile\" is not writeable"
unknAlerts=$(($unknAlerts+1))
else
if [ "$rConnected" = "ON" ]; then
# get the current connected Nodes
currentNodes=$(echo $rIncommingAddresses | tr "," "\n" | sort -u)
if [ -f "$stateFile" ]; then
# get the nodes added to the cluster
newNodes=$(echo $currentNodes | tr " " "\n" | comm -2 -3 - $stateFile)
# get the nodes that were removed from the cluster
missingNodes=$(echo $currentNodes | tr " " "\n" | comm -1 -3 - $stateFile)
if [ ! -z "$newNodes" ]; then
# add the new nodes to the cluster to the state file
echo $newNodes | tr " " "\n" >> $stateFile
fi
else
# there is no state file yet, creating new one.
echo $currentNodes | tr " " "\n" > $stateFile
fi # -f stateFile
# get the numeber of nodes that were part of the cluster before
maxClusterSize=$(cat $stateFile | wc -l)
if [ $maxClusterSize -eq $rClusterSize ]; then
if [ $maxClusterSize -eq 1 ]; then
if [ $crit -eq 0 -a $warn -eq 0 ]; then
echo "OK: running single-node database cluster"
fi
else
echo "OK: running redundant $rClusterSize online / $maxClusterSize total"
fi
else
echo "WARNING: redundant $rClusterSize online / $maxClusterSize total, missing peers: $missingNodes"
warnAlerts=$(($warnAlerts+1))
fi
fi # rConnected
fi # -w stateFile
fi # -z stateFile
#
# exit
#
[ "$critAlerts" -gt "0" ] && exit $ST_CR
[ "$unknAlerts" -gt "0" ] && exit $ST_UK
[ "$warnAlerts" -gt "0" ] && exit $ST_WR
exit 0