From 43f53c44fc9baa255f32693069fc72ebaba1da2f Mon Sep 17 00:00:00 2001 From: Ryan Frantz Date: Sun, 8 Jun 2014 02:18:49 +0000 Subject: [PATCH] Adding NRPE examples. - These examples correspond to the built-in formatters. - Resolves #15. --- contrib/nrpe-plugins/check_cpu_stats.sh | 186 +++++++++++++++++++ contrib/nrpe-plugins/check_disk.sh | 34 ++++ contrib/nrpe-plugins/check_mem.pl | 181 ++++++++++++++++++ contrib/nrpe-plugins/nrpe-plugin-examples.md | 11 ++ 4 files changed, 412 insertions(+) create mode 100755 contrib/nrpe-plugins/check_cpu_stats.sh create mode 100755 contrib/nrpe-plugins/check_disk.sh create mode 100755 contrib/nrpe-plugins/check_mem.pl create mode 100644 contrib/nrpe-plugins/nrpe-plugin-examples.md diff --git a/contrib/nrpe-plugins/check_cpu_stats.sh b/contrib/nrpe-plugins/check_cpu_stats.sh new file mode 100755 index 0000000..858912f --- /dev/null +++ b/contrib/nrpe-plugins/check_cpu_stats.sh @@ -0,0 +1,186 @@ +#!/bin/bash +# ======================================================================================== +# CPU Utilization Statistics plugin for Nagios +# +# Written by : Steve Bosek +# Release : 2.1 +# Creation date : 8 September 2007 +# Revision date : 28 Februar 2008 +# Package : DTB Nagios Plugin +# Description : Nagios plugin (script) to check cpu utilization statistics. +# This script has been designed and written on Unix plateform (Linux, Aix, Solaris), +# requiring iostat as external program. The locations of these can easily +# be changed by editing the variables $IOSTAT at the top of the script. +# The script is used to query 4 of the key cpu statistics (user,system,iowait,idle) +# at the same time. Note though that there is only one set of warning +# and critical values for iowait percent. +# +# Usage : ./check_cpu_stats.sh [-w ] [-c ] [ -n ]) +# ---------------------------------------------------------------------------------------- +# +# TODO: Support for HP-UX +# +# +# ======================================================================================== +# +# HISTORY : +# Release | Date | Authors | Description +# --------------+---------------+---------------+------------------------------------------ +# 2.0 | 16.02.08 | Steve Bosek | Solaris support and new parameters +# | | | New Parameters : - iostat seconds intervals +# | | | - iostat report number +# 2.1 | 08.06.08 | Steve Bosek | Bug perfdata and convert comma in point for Linux result +# ----------------------------------------------------------------------------------------- +# +# ========================================================================================= + +# Paths to commands used in this script. These may have to be modified to match your system setup. + +IOSTAT=/usr/bin/iostat + +# Nagios return codes +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +# Plugin parameters value if not define +WARNING_THRESHOLD=${WARNING_THRESHOLD:="30"} +CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:="100"} +INTERVAL_SEC=${INTERVAL_SEC:="3"} +NUM_REPORT=${NUM_REPORT:="2"} + +# Plugin variable description +PROGNAME=$(basename $0) +RELEASE="Revision 2.1" +AUTHOR="(c) 2008 Steve Bosek (steve.bosek@gmail.com)" + +if [ ! -x $IOSTAT ]; then + echo "UNKNOWN: iostat not found or is not executable by the nagios user." + exit $STATE_UNKNOWN +fi + +# Functions plugin usage +print_release() { + echo "$RELEASE $AUTHOR" +} + +print_usage() { + echo "" + echo "$PROGNAME $RELEASE - CPU Utilization check script for Nagios" + echo "" + echo "Usage: check_cpu_stats.sh -w -c -wi -ci (-i -n)" + echo "" + echo " -w Warning level in % for cpu iowait" + echo " -c Crical level in % for cpu iowait" + echo " -wi Warn if cpu idle is less than x" + echo " -ci Critical is CPU idle is less than x" + echo " -i Interval in seconds for iostat (default : 3)" + echo " -n Number report for iostat (default : 2)" + echo " -h Show this page" + echo "" + echo "Usage: $PROGNAME" + echo "Usage: $PROGNAME --help" + echo "" +} + +print_help() { + print_usage + echo "" + echo "This plugin will check cpu utilization (user,system,iowait,idle in %)" + echo "" + exit 0 +} + +# Parse parameters +while [ $# -gt 0 ]; do + case "$1" in + -h | --help) + print_help + exit $STATE_OK + ;; + -v | --version) + print_release + exit $STATE_OK + ;; + -w | --warning) + shift + WARNING_THRESHOLD=$1 + ;; + -c | --critical) + shift + CRITICAL_THRESHOLD=$1 + ;; + -wi | --warn-idle) + shift + WARN_IDLE=$1 + ;; + -ci | --critical-idle) + shift + CRIT_IDLE=$1 + ;; + -i | --interval) + shift + INTERVAL_SEC=$1 + ;; + -n | --number) + shift + NUM_REPORT=$1 + ;; + *) echo "Unknown argument: $1" + print_usage + exit $STATE_UNKNOWN + ;; + esac +shift +done + +# CPU Utilization Statistics Unix Plateform ( Linux,AIX,Solaris are supported ) +case `uname` in + Linux ) CPU_REPORT=`iostat -c $INTERVAL_SEC $NUM_REPORT|tail -2|head -n 1| tr -s " " " " ` + CPU_USER=`echo $CPU_REPORT | cut -d " " -f 1 ` + CPU_SYSTEM=`echo $CPU_REPORT | cut -d " " -f 3 ` + CPU_IOWAIT=`echo $CPU_REPORT | cut -d " " -f 4 ` + CPU_IO=`echo $CPU_IOWAIT | sed s/\\\./""/` + CPU_IDLE=`echo $CPU_REPORT | cut -d " " -f 6` + CPU_IDL=`echo $CPU_IDLE | sed s/\\\./""/` + ;; + *) echo "UNKNOWN: `uname` not yet supported by this plugin. Coming soon !" + exit $STATE_UNKNOWN + ;; + esac + +WARNING_THRESH=$(( $WARNING_THRESHOLD * 100 )) +CRIT_THRESH=$(( $CRITICAL_THRESHOLD * 100 )) +WARN_IDLE_THRESH=$(( $WARN_IDLE * 100 )) +CRIT_IDLE_THRESH=$(( $CRIT_IDLE * 100 )) + +OUTPUT="user=${CPU_USER}% system=${CPU_SYSTEM}% iowait=${CPU_IOWAIT}% idle=${CPU_IDLE}% | user = ${CPU_USER}, system = ${CPU_SYSTEM}, iowait = ${CPU_IOWAIT}, idle = ${CPU_IDLE} " + +find_top_five_procs_by_cpu() { + echo "TOP 5 PROCESSES BY CPU:" + ps -eo %cpu,cputime,user,pid,args --sort -%cpu | head -n 6 | awk '{command = substr($0, index($0,$5)); printf "%5s %12s %12s %6s %s\n", $1, $2, $3, $4, command}' +} + +# Return +if [ $CPU_IO -ge $CRIT_THRESH ]; then + echo "CRITICAL CPU iowait is > ${CRITICAL_THRESHOLD}%: ${OUTPUT}" + find_top_five_procs_by_cpu + exit $STATE_CRITICAL +elif [ $CPU_IO -ge $WARNING_THRESH ]; then + echo "WARNING CPU iowait is > ${WARNING_THRESHOLD}%: ${OUTPUT}" + find_top_five_procs_by_cpu + exit $STATE_WARNING +elif [ $CPU_IDL -le $CRIT_IDLE_THRESH ]; then + echo "CRITICAL CPU idle is < ${CRIT_IDLE}%: ${OUTPUT}" + find_top_five_procs_by_cpu + exit $STATE_CRITICAL +elif [ $CPU_IDL -le $WARN_IDLE_THRESH ]; then + echo "WARNING CPU idle is < ${WARN_IDLE}%: ${OUTPUT}" + find_top_five_procs_by_cpu + exit $STATE_WARNING +else + echo "OK: ${OUTPUT}" + exit $STATE_OK +fi + diff --git a/contrib/nrpe-plugins/check_disk.sh b/contrib/nrpe-plugins/check_disk.sh new file mode 100755 index 0000000..34af98e --- /dev/null +++ b/contrib/nrpe-plugins/check_disk.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# This script wraps around the `check_disk` command available in the nagios-plugins package. + +WARN_THRES="10%" +CRIT_THRES="5%" +EXCLUDE_PATH="" +EXCLUDE_FS="" + +# We can exclude directories... +EXCLUDE_OPTS="" +EXCLUDE_FS_OPTS="" + +if [ -n "$EXCLUDE_PATH" ] +then + EXCLUDE_OPTS="-x ${EXCLUDE_PATH}" +fi + +if [ -n "$EXCLUDE_FS" ] +then + EXCLUDE_FS_OPTS="-x ${EXCLUDE_FS}" +fi + +# Call check_disk +/usr/local/nagios-plugins/check_disk -l -e -w $WARN_THRES -c $CRIT_THRES $EXCLUDE_OPTS $EXCLUDE_FS_OPTS + +# Store the return code so we can exit with the right code even after doing other things. +RETURN=$? + +# Print the check's thresholds. +printf "\nTHRESHOLDS - WARNING:%s;CRITICAL:%s;\n\n" $WARN_THRES $CRIT_THRES +# Print the output of `df` for the 'additional details' section. +df -h + +exit $RETURN diff --git a/contrib/nrpe-plugins/check_mem.pl b/contrib/nrpe-plugins/check_mem.pl new file mode 100755 index 0000000..8617a8a --- /dev/null +++ b/contrib/nrpe-plugins/check_mem.pl @@ -0,0 +1,181 @@ +#!/usr/bin/perl -w +# $Id: check_mem.pl 2 2002-02-28 06:42:51Z egalstad $ + +# check_mem.pl Copyright (C) 2000 Dan Larsson +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty +# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# you should have received a copy of the GNU General Public License +# along with this program (or with Nagios); if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA + +# Tell Perl what we need to use +use strict; +use Getopt::Std; + +use vars qw($opt_c $opt_f $opt_u $opt_w + $free_memory $used_memory $total_memory $cached_memory + $crit_level $warn_level + %exit_codes @memlist + $percent $fmt_pct $fmt_used $fmt_free $fmt_cached $fmt_slabs + $verb_err $command_line $slabs_reclaimable); + +# Predefined exit codes for Nagios +%exit_codes = ('UNKNOWN' , 3, + 'OK' , 0, + 'WARNING' , 1, + 'CRITICAL', 2,); + +# Turn this to 1 to see reason for parameter errors (if any) +$verb_err = 1; + +# This the unix command string that brings Perl the data +$command_line = `free |grep Mem|awk '{print \$2,\$3,\$4,\$7}'`; + +chomp $command_line; +@memlist = split(/ /, $command_line); + +# Get the amount used by dentry_cache etc, as this counts as "free" too. +$slabs_reclaimable = `grep SReclaimable /proc/meminfo | awk '{print \$2}'`; +chomp $slabs_reclaimable; + +# Time for calculations. Cached and the slabs reclaimable shouldn't count as "used" +# because they can and will be used by the kernel if needs be (before swapping) +$cached_memory = $memlist[3]; +$used_memory = $memlist[1] - $cached_memory - $slabs_reclaimable; +$free_memory = $memlist[2] + $cached_memory + $slabs_reclaimable; +$total_memory = $memlist[0]; + +# All our machines have over a GB of RAM. Stop this sillyness. +$used_memory = $used_memory / 1024 / 1024; +$free_memory = $free_memory / 1024 / 1024; +$total_memory = $total_memory / 1024 / 1024; +$cached_memory = $cached_memory / 1024 / 1024; +$slabs_reclaimable = $slabs_reclaimable / 1024 / 1024; + +# Some pretty formatting for output purposes. +$fmt_free = sprintf "%.3f", $free_memory; +$fmt_used = sprintf "%.3f", $used_memory; +$fmt_cached = sprintf "%.3f", $cached_memory; +$fmt_slabs = sprintf "%.3f", $slabs_reclaimable; + +# Get the options +if ($#ARGV le 0) +{ + &usage; +} +else +{ + getopts('c:fuw:'); +} + +# Shortcircuit the switches +if (!$opt_w or $opt_w == 0 or !$opt_c or $opt_c == 0) +{ + print "*** You must define WARN and CRITICAL levels!" if ($verb_err); + &usage; +} +elsif (!$opt_f and !$opt_u) +{ + print "*** You must select to monitor either USED or FREE memory!" if ($verb_err); + &usage; +} + +# Check if levels are sane +if ($opt_w <= $opt_c and $opt_f) +{ + print "*** WARN level must not be less than CRITICAL when checking FREE memory!" if ($verb_err); + &usage; +} +elsif ($opt_w >= $opt_c and $opt_u) +{ + print "*** WARN level must not be greater than CRITICAL when checking USED memory!" if ($verb_err); + &usage; +} + +$warn_level = $opt_w; +$crit_level = $opt_c; + +sub find_top_five_procs_by_mem { + # Find the top 5 process by memory usage; sort by RSS in descending order. + my @top_five_procs = qx/ps -eo %mem,rss,user,pid,args --sort -rss | head -n 6 | awk '{command = substr(\$0, index(\$0,\$5)); printf "%5s %12s %12s %6s %s\\n", \$1, \$2, \$3, \$4, command}'/; + print 'TOP 5 PROCESSES BY MEMORY USAGE:\n'; + foreach my $line (@top_five_procs) { + chomp $line; + print $line . '\n'; + } +} + +if ($opt_f) +{ + $percent = $free_memory / $total_memory * 100; + $fmt_pct = sprintf "%.1f", $percent; + if ($percent <= $crit_level) + { + print "Memory CRITICAL - $fmt_pct% free ($fmt_free GB total including $fmt_cached GB cached, $fmt_slabs GB reclaimable) \n"; + find_top_five_procs_by_mem(); + exit $exit_codes{'CRITICAL'}; + } + elsif ($percent <= $warn_level) + { + print "Memory WARNING - $fmt_pct% free ($fmt_free GB total including $fmt_cached GB cached, $fmt_slabs GB reclaimable) \n"; + find_top_five_procs_by_mem(); + exit $exit_codes{'WARNING'}; + } + else + { + print "Memory OK - $fmt_pct% free ($fmt_free GB total including $fmt_cached GB cached, $fmt_slabs GB reclaimable) \n"; + exit $exit_codes{'OK'}; + } +} +elsif ($opt_u) +{ + $percent = $used_memory / $total_memory * 100; + $fmt_pct = sprintf "%.1f", $percent; + if ($percent >= $crit_level) + { + print "Memory CRITICAL - $fmt_pct% used ($fmt_used GB total plus $fmt_cached GB cached, $fmt_slabs GB reclaimable)\n"; + find_top_five_procs_by_mem(); + exit $exit_codes{'CRITICAL'}; + } + elsif ($percent >= $warn_level) + { + print "Memory WARNING - $fmt_pct% used ($fmt_used GB total plus $fmt_cached GB cached, $fmt_slabs GB reclaimable)\n"; + find_top_five_procs_by_mem(); + exit $exit_codes{'WARNING'}; + } + else + { + print "Memory OK - $fmt_pct% used ($fmt_used GB total plus $fmt_cached GB cached, $fmt_slabs GB reclaimable)\n"; + exit $exit_codes{'OK'}; + } +} + +# Show usage +sub usage() +{ + print "\ncheck_mem.pl v1.0 - Nagios Plugin\n\n"; + print "usage:\n"; + print " check_mem.pl - -w -c \n\n"; + print "options:\n"; + print " -f Check FREE memory\n"; + print " -u Check USED memory\n"; + print " -w PERCENT Percent free/used when to warn\n"; + print " -c PERCENT Percent free/used when critical\n"; + print "\nCopyright (C) 2000 Dan Larsson \n"; + print "check_mem.pl comes with absolutely NO WARRANTY either implied or explicit\n"; + print "This program is licensed under the terms of the\n"; + print "GNU General Public License (check source code for details)\n"; + exit $exit_codes{'UNKNOWN'}; +} + + diff --git a/contrib/nrpe-plugins/nrpe-plugin-examples.md b/contrib/nrpe-plugins/nrpe-plugin-examples.md new file mode 100644 index 0000000..bb333be --- /dev/null +++ b/contrib/nrpe-plugins/nrpe-plugin-examples.md @@ -0,0 +1,11 @@ +# NRPE Plugin Examples + +The following NRPE scripts are provided as examples of check scripts that +correspond to similarly named ``nagios-herald`` formatters. Their output is +used by the formatters to provide more context in alerts. + +* ``check_cpu_stats.sh`` - A modified version of the script available at +[http://exchange.nagios.org/directory/Plugins/System-Metrics/CPU-Usage-and-Load/check_cpu_stats-2Esh/details](http://exchange.nagios.org/directory/Plugins/System-Metrics/CPU-Usage-and-Load/check_cpu_stats-2Esh/details) +* ``check_disk.sh`` - A wrapper around the ``check_disk`` command available in the ``nagios-plugins`` package. +* ``check_mem.pl`` - A modified version of the script available at +[http://exchange.nagios.org/directory/Plugins/System-Metrics/Memory/check_mem-2Epl/details](http://exchange.nagios.org/directory/Plugins/System-Metrics/Memory/check_mem-2Epl/details)