代码如下:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#2018/05/17
import os
import requests
import re
import xml.sax
import xml.dom.minidom as XmlDocument
class MovieHandler(xml.sax.ContentHandler):
def __init__(self):
self.index = 0
self.KB = ''
self.VulType = ''
self.CurrentData = ''
self.data = []
# 元素开始事件处理
def startElement(self, tag, attributes):
if tag == 'Vul':
self.index += 1
self.CurrentData = tag
# 元素结束事件处理
def endElement(self, tag):
# print self.KB,self.VulType
if tag == 'Vul':
print({'KB': self.KB[2:], 'Type': self.VulType})
self.data.append({'KB': self.KB[2:], 'Type': self.VulType})
return
# 内容事件处理
def characters(self, content):
if content == '':
return
if self.CurrentData == 'KB':
self.KB = content
elif self.CurrentData == 'VulType':
self.VulType = content
def get(self):
return self.data
# 解析xml文件,获取所有kb号
def get_dirname():
parser = xml.sax.make_parser()
# turn off namepsaces
parser.setFeature(xml.sax.handler.feature_namespaces, 0)
# 重写 ContextHandler
Handler = MovieHandler()
parser.setContentHandler(Handler)
parser.parse('TVL00001.tvl.xml') #需要解析的xml文件路径(全量库文件)
data = Handler.get()
return data
# 输入系统的kb号,输出需要去重的数据
def get_patch(KB, Type):
result = []
url = 'http://www.catalog.update.microsoft.com/Search.aspx?q=' + KB
content = requests.get(url).content
s = requests.session()
s.keep_alive = False
Microsoftservername = re.findall('for (.*) \(KB.*\)', content) # 输出系统名称
upid = re.findall('goToDetails\("(.*)"\)', content)
for i in range(len(Microsoftservername)):
get_new_kb = get_patch_info(upid[i])
if system_matching(Microsoftservername[i]) == None or get_new_kb == KB:
continue
result.append([KB, Type, system_matching(Microsoftservername[i]), get_new_kb])
return result
# 输入非系统的kb号,输出需要去重的数据
def get_patch_2(KB, Type):
result = []
url = 'http://www.catalog.update.microsoft.com/Search.aspx?q=' + KB
content = requests.get(url).content
s = requests.session()
s.keep_alive = False
upid = re.findall('goToDetails\("(.*)"\)', content)
if len(upid) == 0:
return None
for i in range(len(upid)):
get_new_kb = get_patch_info(upid[i])
if get_new_kb == KB:
continue
result.append([KB, Type, '', get_new_kb])
return result
# 在第二个网页输入upid,获取最新补丁
def get_patch_info(id):
url = 'http://www.catalog.update.microsoft.com/ScopedViewInline.aspx?updateid=' + id
content = requests.get(url).content
s = requests.session()
s.keep_alive = False
upid = re.findall("updateid=(.{36})", content) # 获取补丁对应的网址数据,有updateid的是data3
data2 = re.findall("\(KB(.{1,8})\)", content)# 获取到的第一个kb号为上一个输入的kb号
if len(data2) == 0:
data2 = re.findall("/kb/(.*)\"", content) # 获取不到kb号,采用另外一种策略
if len(upid) == 1:
return data2[0]
else:
return get_patch_info(upid[1]) # 有多个被替换的,然后输入第一个,返回的网页找不到upid
# 系统名称匹配
def system_matching(Microsoftservername):#字典代表能够识别的系统添加匹配的系统
dict = {'Windows 7': 'enumSV_Windows_7',
'Windows 7 for x64-based Systems': 'enumSV_Windows_7_X64',
'Windows Server 2008 R2 for x64-based Systems': 'enumSV_Windows_2008_R2_X64',
'Windows Server 2008': 'enumSV_Windows_2008',
'Windows Vista': 'enumSV_WinVista',
'Windows Server 2008 for x64-based Systems': 'enumSV_Windows_2008_X64',
'Windows Vista for x64-based Systems': 'enumSV_WinVista_X64',
'Windows Server 2012 R2 for x64-based Systems': 'enumSV_Windows_2012_R2_X64',
'Windows 8.1 for x64-based Systems': 'enumSV_Windows_8_SP1_X64',
'Windows Server 2012 for x64-based Systems': 'enumSV_Windows_2012_X64',
'Windows Embedded 8 Standard for x64-based Systems': 'enumSV_Windows_8_X64',
'Windows 7 for x86-based Systems': 'enumSV_Windows_7',
'Windows 8.1 for x86-based Systems': 'enumSV_Windows_8_SP1',
'Windows 8.1': 'enumSV_Windows_8_SP1',
'Windows 8': 'enumSV_Windows_8',
'Windows 8 for x64-based Systems': 'enumSV_Windows_8_X64',
'Windows Server 2003 for x64-based Systems': 'enumSV_2003_X64',
'Windows XP for x64-based Systems': 'enumSV_WinXp_X64',
'Windows Server 2003': 'enumSV_2003',
'Windows XP': 'enumSV_WinXp',
'Windows Server 2012 R2': 'enumSV_Windows_2012_R2_X64',
'Windows Server 2008 R2 x64 Edition': 'enumSV_Windows_2008_R2_X64',
'Windows Server 2003 x64 Edition': 'enumSV_2003_X64',
'Windows Server 2008 x64 Edition': 'enumSV_Windows_2008_X64',
'Windows Server 2012': 'enumSV_Windows_2012_X64',
'Windows XP x64 Edition': 'enumSV_WinXp_X64',
'Server 2008 R2 for x64': 'enumSV_Windows_2008_R2_X64',
'Server 2008 x64': 'enumSV_Windows_2008_X64',
'Windows Vista for x64 based Systems': 'enumSV_WinVista_X64'
}
if Microsoftservername in dict:
return dict[Microsoftservername]
return None
#将数据进行去重
def Duplicate_removal(Vuli):
if len(Vuli) == 2 and Vuli[0] == Vuli[1]:
return Vuli[0]
index = 0
out_list = []
while len(Vuli):
out_list.append([])
out_list[index].append(Vuli[0])
j = 1
if len(Vuli) == j:
break
while True:
if Vuli[0][3] == Vuli[j][3]:
out_list[index].append(Vuli.pop(j))
else:
j += 1
if j == len(Vuli):
Vuli.pop(0)
break
index += 1
return out_list
#去重之后,将系统名称进行合并
def System_merging(Vulis):
out = []
for sub_vuli in Vulis:
data = [sub_vuli[0][0],sub_vuli[0][1],[],sub_vuli[0][3]]
for i in sub_vuli:
data[2].append(i[2])
out.append(data)
return out
# 生成xml格式的文件
# input ['1111','2',['win7','win8'],'22222']
def create_xml_files(doc, Vu):
Vul = doc.createElement('Vul')
VulList.appendChild(Vul)
kbid = doc.createElement('kbid')
Vul.appendChild(kbid)
kbid.appendChild(doc.createTextNode(Vu[0]))
VulType = doc.createElement('VulType')
Vul.appendChild(VulType)
VulType.appendChild(doc.createTextNode(Vu[1]))
ByReplaceList = doc.createElement('ByReplaceList')
Vul.appendChild(ByReplaceList)
ByKbid = doc.createElement('ByKbid')
ByReplaceList.appendChild(ByKbid)
ByKbid.appendChild(doc.createTextNode(Vu[3]))
SystemList = doc.createElement('SystemList')
Vul.appendChild(SystemList)
if Vu[2] == ['']:
SystemList.appendChild(doc.createTextNode('\n' + '\t'))
return 'system is None'
# SystemList.appendChild(doc.createTextNode(''))
for sub_system in Vu[2]:
System = doc.createElement('System')
SystemList.appendChild(System)
System.appendChild(doc.createTextNode(sub_system))
if __name__ == '__main__':
if os.path.exists('VulExpir.xml'):
os.remove('VulExpir.xml')
result = []
kb_number = get_dirname()
news_kb_number = []
for kb in kb_number:
if kb not in news_kb_number:
news_kb_number.append(kb)
# xml 根节点
doc = XmlDocument.Document()
VulExpiration = doc.createElement('VulExpiration')
doc.appendChild(VulExpiration)
VulList = doc.createElement('VulList')
VulExpiration.appendChild(VulList)
# xml 循环写入
num = len(news_kb_number)
n = 1
for i in news_kb_number:
print '当前进度%d,总数%d' % (n, num)
n+=1
if i['Type'] == '2':
Vulist = get_patch_2(i['KB'], i['Type'])
else:
Vulist = get_patch(i['KB'], i['Type']) # 输出需要进行去重的多个值
if Vulist == None:
continue
Vulis = Duplicate_removal(Vulist)
if len(Vulis) == 4 and isinstance(Vulis[0], unicode):
create_xml_files(doc, Vulis)
continue
Vuli = System_merging(Vulis)
for Vu in Vuli:
create_xml_files(doc, Vu)
with open('VulExpir.xml', 'w') as f:
doc.writexml(f, addindent=' ', newl='\n', encoding='utf-16')
f.close()