nagios 监控内存
nagios 版本:nagios-3.2.0
监控端:
1、定义插件
cd /usr/local/nagios/libexec
vim check_mem
把附件内容添加到文件中,这样监控内存的程序就做好了。
赋予权限:
chmod +x /usr/local/nagios/libexec/check_mem
检查命令:
./check_mem -h
[root@wqk1 libexec]# ./check_mem -h
Wrong Syntax: check_mem -h
Usage: check_mem [-w|--warning]<percent free> [-c|--critical]<percent free>
由帮助可以看出是以剩余内存为判断标准
./check_mem -w 90 -c 50
WARNING - 1658 MB (82%) Free Memory
[root@wqk1 libexec]# ./check_mem -w 95 -c 90
CRITICAL - 1658 MB (82%) Free Memory
2、定义命令
vim /usr/local/nagios/etc/objects/commands.cfg
定义nrpe命令
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ -t 30
}
定义内存命令
define command{
command_name check_mem
command_line $USER1$/check_mem -w $ARG1$ -c $ARG2$
}
3、定义被监控主机
vim /usr/local/nagios/etc/objects/localhost.cfg
define host {
host_name wqk_centos-107
alias centos-107
address 192.168.10.107
check_command check-host-alive
notification_options d,u,r
check_interval 1
max_check_attempts 2
contact_groups admins
notification_interval 10
notification_period 24x7
}
4、定义服务
vim /usr/local/nagios/etc/objects/localhost.cfg
define service {
host_name wqk_centos-107
service_description check_mem
check_period 24x7
normal_check_interval 2
retry_check_interval 1
max_check_attempts 5
notification_period 24x7
notification_options w,u,c,r
check_command check_nrpe!check_mem //如果没有定义check_nrpe,可以写成check_mem!90!50;而且如果监控端和被监控端的command字段不修改,只修改这里的数值就能影响到nagios监控结果。
}
5、检查排错
cd /usr/local/nagios/libexec
./check_nrpe -H 192.168.10.107 -c check_mem(被监控机命令定义后检查)
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
6、重启服务
service nagios restart
==================================================
被监控端:
1、定义插件
cd /usr/local/nagios/libexec
vim check_mem
把附件内容添加到文件中,这样监控内存的程序就做好了。
赋予权限:
chmod +x /usr/local/nagios/libexec/check_mem
检查命令:
./check_mem -h
./check_mem -w 90 -c 50
2、定义命令
编辑nrpe配置文件:
vim /usr/local/nagios/etc/nrpe.cfg
添加:
allowed_host=127.0.0.1,192.168.0.1(本机ip),192.168.0.100(监控机ip)
command[check_mem]=/usr/local/nagios/libexec/check_mem -w 90 -c 50 //设置90是为了看到效果,实际生产中要调低
3、重启服务
ps aux | grep nrpe
kill掉nrpe 进程
/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d //启动nrpe服务
4、远程测试命令
在监控端测试
cd /usr/local/nagios/libexec
./check_nrpe -H 192.168.10.107 -c check_mem
****************************************************************
check_mem 脚本:
#script to check real memory usage
# L.Gill 02/05/06 - V.1.0
# ------------------------------------------
# ######## Script Modifications ##########
# ------------------------------------------
# Who When What
# --- ---- ----
# LGill 17/05/06 "$percent" lt 1% fix - sed edits dc result beggining with "."
#
#
#!/bin/bash
USAGE="`basename $0` [-w|--warning]<percent free> [-c|--critical]<percent free>"
THRESHOLD_USAGE="WARNING threshold must be greater than CRITICAL: `basename $0` $*"
calc=/tmp/memcalc
percent_free=/tmp/mempercent
critical=""
warning=""
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
# print usage
if [[ $# -lt 4 ]]
then
echo ""
echo "Wrong Syntax: `basename $0` $*"
echo ""
echo "Usage: $USAGE"
echo ""
exit 0
fi
# read input
while [[ $# -gt 0 ]]
do
case "$1" in
-w|--warning)
shift
warning=$1
;;
-c|--critical)
shift
critical=$1
;;
esac
shift
done
# verify input
if [[ $warning -eq $critical || $warning -lt $critical ]]
then
echo ""
echo "$THRESHOLD_USAGE"
echo ""
echo "Usage: $USAGE"
echo ""
exit 0
fi
# Total memory available
total=`free -m | head -2 |tail -1 |gawk '{print $2}'`
# Total memory used
used=`free -m | head -2 |tail -1 |gawk '{print $3}'`
# Calc total minus used
free=`free -m | head -2 |tail -1 |gawk '{print $4+$7}'`
# normal values
#echo "$total"MB total
#echo "$used"MB used
#echo "$free"MB free
# make it into % percent free = ((free mem / total mem) * 100)
echo "5" > $calc # decimal accuracy
echo "k" >> $calc # commit
echo "100" >> $calc # multiply
echo "$free" >> $calc # division integer
echo "$total" >> $calc # division integer
echo "/" >> $calc # division sign
echo "*" >> $calc # multiplication sign
echo "p" >> $calc # print
percent=`/usr/bin/dc $calc|/bin/sed 's/^\./0./'|/usr/bin/tr "." " "|/usr/bin/gawk {'print $1'}`
#percent1=`/usr/bin/dc $calc`
#echo "$percent1"
if [[ "$percent" -le $critical ]]
then
echo "CRITICAL - $free MB ($percent%) Free Memory"
exit 2
fi
if [[ "$percent" -le $warning ]]
then
echo "WARNING - $free MB ($percent%) Free Memory"
exit 1
fi
if [[ "$percent" -gt $warning ]]
then
echo "OK - $free MB ($percent%) Free Memory"
exit 0
fi