代码如下:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#2018/05/17
import os
import requests
import re
import xml.sax
import xml.dom.minidom as XmlDocument


class MovieHandler(xml.sax.ContentHandler):
    def __init__(self):
        self.index = 0
        self.KB = ''
        self.VulType = ''
        self.CurrentData = ''
        self.data = []

    # 元素开始事件处理
    def startElement(self, tag, attributes):
        if tag == 'Vul':
            self.index += 1
        self.CurrentData = tag

    # 元素结束事件处理
    def endElement(self, tag):
        # print self.KB,self.VulType
        if tag == 'Vul':
            print({'KB': self.KB[2:], 'Type': self.VulType})
            self.data.append({'KB': self.KB[2:], 'Type': self.VulType})
        return

    # 内容事件处理
    def characters(self, content):
        if content == '':
            return
        if self.CurrentData == 'KB':
            self.KB = content
        elif self.CurrentData == 'VulType':
            self.VulType = content

    def get(self):
        return self.data


# 解析xml文件,获取所有kb号
def get_dirname():
    parser = xml.sax.make_parser()
    # turn off namepsaces
    parser.setFeature(xml.sax.handler.feature_namespaces, 0)

    # 重写 ContextHandler
    Handler = MovieHandler()
    parser.setContentHandler(Handler)
    parser.parse('TVL00001.tvl.xml') #需要解析的xml文件路径(全量库文件)
    data = Handler.get()
    return data


# 输入系统的kb号,输出需要去重的数据
def get_patch(KB, Type):
    result = []
    url = 'http://www.catalog.update.microsoft.com/Search.aspx?q=' + KB
    content = requests.get(url).content
    s = requests.session()
    s.keep_alive = False
    Microsoftservername = re.findall('for (.*) \(KB.*\)', content)  # 输出系统名称
    upid = re.findall('goToDetails\("(.*)"\)', content)
    for i in range(len(Microsoftservername)):
        get_new_kb = get_patch_info(upid[i])
        if system_matching(Microsoftservername[i]) == None or get_new_kb == KB:
            continue
        result.append([KB, Type, system_matching(Microsoftservername[i]), get_new_kb])
    return result


# 输入非系统的kb号,输出需要去重的数据
def get_patch_2(KB, Type):
    result = []
    url = 'http://www.catalog.update.microsoft.com/Search.aspx?q=' + KB
    content = requests.get(url).content
    s = requests.session()
    s.keep_alive = False
    upid = re.findall('goToDetails\("(.*)"\)', content)
    if len(upid) == 0:
        return None
    for i in range(len(upid)):
        get_new_kb = get_patch_info(upid[i])
        if get_new_kb == KB:
            continue
        result.append([KB, Type, '', get_new_kb])
    return result


# 在第二个网页输入upid,获取最新补丁
def get_patch_info(id):
    url = 'http://www.catalog.update.microsoft.com/ScopedViewInline.aspx?updateid=' + id
    content = requests.get(url).content
    s = requests.session()
    s.keep_alive = False
    upid = re.findall("updateid=(.{36})", content)  # 获取补丁对应的网址数据,有updateid的是data3
    data2 = re.findall("\(KB(.{1,8})\)", content)# 获取到的第一个kb号为上一个输入的kb号
    if len(data2) == 0:
        data2 = re.findall("/kb/(.*)\"", content)  # 获取不到kb号,采用另外一种策略
    if len(upid) == 1:
        return data2[0]
    else:
        return get_patch_info(upid[1])  # 有多个被替换的,然后输入第一个,返回的网页找不到upid


# 系统名称匹配
def system_matching(Microsoftservername):#字典代表能够识别的系统添加匹配的系统
    dict = {'Windows 7': 'enumSV_Windows_7',
            'Windows 7 for x64-based Systems': 'enumSV_Windows_7_X64',
            'Windows Server 2008 R2 for x64-based Systems': 'enumSV_Windows_2008_R2_X64',
            'Windows Server 2008': 'enumSV_Windows_2008',
            'Windows Vista': 'enumSV_WinVista',
            'Windows Server 2008 for x64-based Systems': 'enumSV_Windows_2008_X64',
            'Windows Vista for x64-based Systems': 'enumSV_WinVista_X64',
            'Windows Server 2012 R2 for x64-based Systems': 'enumSV_Windows_2012_R2_X64',
            'Windows 8.1 for x64-based Systems': 'enumSV_Windows_8_SP1_X64',
            'Windows Server 2012 for x64-based Systems': 'enumSV_Windows_2012_X64',
            'Windows Embedded 8 Standard for x64-based Systems': 'enumSV_Windows_8_X64',
            'Windows 7 for x86-based Systems': 'enumSV_Windows_7',
            'Windows 8.1 for x86-based Systems': 'enumSV_Windows_8_SP1',
            'Windows 8.1': 'enumSV_Windows_8_SP1',
            'Windows 8': 'enumSV_Windows_8',
            'Windows 8 for x64-based Systems': 'enumSV_Windows_8_X64',
            'Windows Server 2003 for x64-based Systems': 'enumSV_2003_X64',
            'Windows XP for x64-based Systems': 'enumSV_WinXp_X64',
            'Windows Server 2003': 'enumSV_2003',
            'Windows XP': 'enumSV_WinXp',
            'Windows Server 2012 R2': 'enumSV_Windows_2012_R2_X64',
            'Windows Server 2008 R2 x64 Edition': 'enumSV_Windows_2008_R2_X64',
            'Windows Server 2003 x64 Edition': 'enumSV_2003_X64',
            'Windows Server 2008 x64 Edition': 'enumSV_Windows_2008_X64',
            'Windows Server 2012': 'enumSV_Windows_2012_X64',
            'Windows XP x64 Edition': 'enumSV_WinXp_X64',
            'Server 2008 R2 for x64': 'enumSV_Windows_2008_R2_X64',
            'Server 2008 x64': 'enumSV_Windows_2008_X64',
            'Windows Vista for x64 based Systems': 'enumSV_WinVista_X64'
            }
    if Microsoftservername in dict:
        return dict[Microsoftservername]
    return None

#将数据进行去重
def Duplicate_removal(Vuli):
    if len(Vuli) == 2 and Vuli[0] == Vuli[1]:
        return Vuli[0]
    index = 0
    out_list = []
    while len(Vuli):
        out_list.append([])
        out_list[index].append(Vuli[0])
        j = 1
        if len(Vuli) == j:
            break
        while True:
            if Vuli[0][3] == Vuli[j][3]:
                out_list[index].append(Vuli.pop(j))
            else:
                j += 1
            if j == len(Vuli):
                Vuli.pop(0)
                break
        index += 1
    return out_list

#去重之后,将系统名称进行合并
def System_merging(Vulis):
    out = []
    for sub_vuli in Vulis:
        data = [sub_vuli[0][0],sub_vuli[0][1],[],sub_vuli[0][3]]
        for i in sub_vuli:
            data[2].append(i[2])
        out.append(data)
    return out

# 生成xml格式的文件
# input ['1111','2',['win7','win8'],'22222']
def create_xml_files(doc, Vu):
    Vul = doc.createElement('Vul')
    VulList.appendChild(Vul)

    kbid = doc.createElement('kbid')
    Vul.appendChild(kbid)
    kbid.appendChild(doc.createTextNode(Vu[0]))

    VulType = doc.createElement('VulType')
    Vul.appendChild(VulType)
    VulType.appendChild(doc.createTextNode(Vu[1]))

    ByReplaceList = doc.createElement('ByReplaceList')
    Vul.appendChild(ByReplaceList)

    ByKbid = doc.createElement('ByKbid')
    ByReplaceList.appendChild(ByKbid)
    ByKbid.appendChild(doc.createTextNode(Vu[3]))

    SystemList = doc.createElement('SystemList')
    Vul.appendChild(SystemList)


    if Vu[2] == ['']:
        SystemList.appendChild(doc.createTextNode('\n' + '\t'))
        return 'system is None'
        # SystemList.appendChild(doc.createTextNode(''))
    for sub_system in Vu[2]:
        System = doc.createElement('System')
        SystemList.appendChild(System)
        System.appendChild(doc.createTextNode(sub_system))

if __name__ == '__main__':
    if os.path.exists('VulExpir.xml'):
        os.remove('VulExpir.xml')
    result = []
    kb_number = get_dirname()
    news_kb_number = []
    for kb in kb_number:
        if kb not in news_kb_number:
            news_kb_number.append(kb)
    # xml 根节点
    doc = XmlDocument.Document()
    VulExpiration = doc.createElement('VulExpiration')
    doc.appendChild(VulExpiration)

    VulList = doc.createElement('VulList')
    VulExpiration.appendChild(VulList)
# xml 循环写入
    num = len(news_kb_number)
    n = 1
    for i in news_kb_number:
        print '当前进度%d,总数%d' % (n, num)
        n+=1
        if i['Type'] == '2':
            Vulist = get_patch_2(i['KB'], i['Type'])
        else:
            Vulist = get_patch(i['KB'], i['Type'])  # 输出需要进行去重的多个值
        if Vulist == None:
            continue
        Vulis = Duplicate_removal(Vulist)
        if len(Vulis) == 4 and isinstance(Vulis[0], unicode):
            create_xml_files(doc, Vulis)
            continue
        Vuli = System_merging(Vulis)
        for Vu in Vuli:
            create_xml_files(doc, Vu)
            with open('VulExpir.xml', 'w') as f:
                doc.writexml(f, addindent=' ', newl='\n', encoding='utf-16')
                f.close()