企业中对于服务器常规监控都有部署监控软件系统,如常用的zabbix、ganglia、nagios、observer等,但是对于特殊的业务监控,比如日志中出现某些关键字多少次后即报警通知负责人,对某些Http接口心跳监控或结果正确性检测等,这些特定需求运维也需要开发相应的脚本进行支持。一般监控有变更都需要通知运维人员来操作,我们也可以自己开发脚本实现简单的监控。
import smtplib
import socket
import fcntl
import struct
import os
import commands
import time
from email.mime.text
import MIMEText
#
发送的邮件列表,多个邮件地址逗号分隔
mailto_list=[
'
david1228@foxmail.com
',
]
#
需要监控列表,JSON格式配置
#
配置说明:logfile需要检测的日志文件,limitnum为阀值,readnum为读取日志最后的行数,kword为出现的关键字,sg为大于或小于阀值满足后报警
check_list={
'
mq:geturl_updatevideo:flush cache OK has a problem, please check!
' : {
'
logfile
':
'
/home/ldw/logs/geturl/online/geturl_updatevideo.log
',
'
limitnum
':
'
10
',
'
readnum
':
'
200
',
'
kword
':
'
flush cache OK
',
'
sg
':
'
<
'
},
'
mq:geturl_updatevideo has message error, please check!
' : {
'
logfile
':
'
/home/ldw/logs/geturl/online/geturl_updatevideo.log
',
'
limitnum
':
'
1
',
'
readnum
':
'
2000
',
'
kword
':
'
message error
',
'
sg
':
'
<
'
}
}
#
读取网卡IP,输入参数为网卡名,如eth0、eth1、bond0(网卡做Bond)
def get_ip_address(ifname):
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
return socket.inet_ntoa(fcntl.ioctl(
s.fileno(),
0x8915,
#
SIOCGIFADDR
struct.pack(
'
256s
', ifname[:15])
)[20:24])
#
邮件发送
def send_mail(to_list,sub,content):
print content
me=mail_user
#
发送中文,需要设置编码
msg = MIMEText(content,_subtype=
'
plain
',_charset=
'
gb2312
')
msg[
'
Subject
'] = sub
msg[
'
From
'] = me
msg[
'
To
'] =
"
;
".join(to_list)
try:
server = smtplib.SMTP()
server.connect(mail_host)
server.login(mail_user,mail_pass)
server.sendmail(me, to_list, msg.as_string())
server.close()
return True
except Exception, e:
print str(e)
return False
#
发送邮箱认证配置
mail_host=
"
smtp.126.com
"
mail_user=
"
xyz@126.com
"
mail_pass=
"
xyz
"
mail_postfix=
"
126.com
"
content =
""
timeddiff=300
def monitor_list(ethip):
for k
in check_list.keys():
logfile=check_list[k][
'
logfile
']
readnum=check_list[k][
'
readnum
']
limitnum=check_list[k][
'
limitnum
']
kword = check_list[k][
'
kword
']
sg = check_list[k][
'
sg
']
#
需要监控的文件小于5分钟进行检测
if ( os.path.exists(logfile)
and (time.time() - os.stat(logfile).st_mtime)
cmdstring=
'
tail -n
' + readnum +
'
' + logfile +
'
| grep "
'+ kword+
'
" |wc -l 2>&1
'
cmdstatus,cmdoutput = commands.getstatusoutput(cmdstring)
if ((sg ==
'
<
'
and int(cmdoutput)
or (sg ==
'
>
'
and int(cmdoutput) > int(limitnum))):
#
邮件正文,带上有问题的服务器IP,方便定位.
content = ethip +k+
"
:
"+cmdoutput+
"
/
"+readnum+
"
\n
"
send_mail(mailto_list,
"
Monitor Warning!!!
",content)
content =
""
else:
"
Normal monitoring service:
"+logfile
else:
"
File has not been updated :
"+logfile
if
__name__ ==
'
__main__
':
ethip =
"
[
"+get_ip_address(
'
bond0
')+
"
]\n
"+content
monitor_list(ethip)