python中用来下载插件的代码

转载
mob64ca13fc5fb6 2024-09-06 19:39:10
文章标签 python中用来下载插件的代码 python firefox 开发语言 html 文章分类 Python 后端开发
不废话，直接上代码
#-*-coding:GBK -*-
import urllib.request
import requests
import re
import webbrowser
import ctypes #隐藏控制台库
import threading  #多线程
import random
from tkinter import *
import tkinter as tk
import sys
from urllib.parse import urlencode
import pandas as pd  #制表模块
from urllib.parse import urlparse

my_headers = [
    "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36",
    "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14",
    "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)",
    'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
    'Opera/9.25 (Windows NT 5.1; U; en)',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
    'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
    'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
    'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
    "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7",
    "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 "
]
headers = {'User-Agent':random.choice(my_headers)}
def get_page(url):#封装下载页面方法
    response = requests.get(url,headers=headers)
    if response.status_code == 200:
        return response.content.decode("utf-8")#应对乱码
    else:
        return '爬取失败！'
def parse_html(html_content):
    pattern = re.compile('.*?fname fl.*?>(.*?)\D(\d+)\D</a>.*?单位净值.*?>(.*?)</span>.*?<span.*?>(.*?)</span>.*?基金类型：(.*?)</li>.*?管 理 人：.*?>(.*?)</a>.*?规    模</a>：((---)|(.*?)亿元).*?基金经理：.*?>(.*?)</a>', re.S)#(.*?fname fl.*?>(.*?)\D(\d+)\D</a>).*?ping.*?>(.*?)</span>.*?基金类型：(.*?)</li>.*?管 理 人：.*?>(.*?)</a>.*?规    模</a>：(.*?)亿元.*?基金经理：.*?>(.*?)</a>.*?手 续 费</a>：(.*?)<', re.S)
    result1 = re.findall(pattern, html_content)
    return result1
def parse_html_list(html_content_list):
    pattern = re.compile('.*?{.*?\[(.*?)].*?',re.S)
    result_list = re.findall(pattern, html_content_list)
    return result_list
def parse_html_list_up(html_content_list_up):
    pattern = re.compile('\"(.*?)\"',re.S)
    result_list_up = re.findall(pattern, html_content_list_up)
    return result_list_up
def parse_html1(html):
    pattern = re.compile('.*?allPages.*?(\d+)', re.S)
    result2 = re.findall(pattern, html)
    return result2

def thread_it(func, *args):
    '''将函数打包进线程'''
    # 创建
    t = threading.Thread(target=func, args=args)
    # 守护 !!!
    t.setDaemon(True)
    # 启动
    t.start()
    # 阻塞--卡死界面！
    # t.join()

def hideConsole(): #隐藏控制台
    whnd = ctypes.windll.kernel32.GetConsoleWindow()
    if whnd != 0:
        ctypes.windll.user32.ShowWindow(whnd, 0)
def main_windows():
    # 第1步，实例化object，建立窗口window
    window = tk.Tk()

    # 第2步，给窗口的可视化起名字
    window.title('天天基金数据下载工具')
    window.resizable(0, 0)
    # 第3步，设定窗口的大小(长 * 宽)
    screenwidth = window.winfo_screenwidth()
    screenheight = window.winfo_screenheight()
    dialog_width = 800
    dialog_height = 480
    window.geometry("%dx%d+%d+%d" % (
    dialog_width, dialog_height, (screenwidth - dialog_width) / 2, (screenheight - dialog_height) / 2))  # 这里的乘是小x

    def test(content, reason, name):
        return True

    class myStdout():  # 重定向类
        def __init__(self):
            # 将其备份
            self.stdoutbak = sys.stdout
            self.stderrbak = sys.stderr
            # 重定向
            sys.stdout = self
            sys.stderr = self

        def write(self, info):
            # info信息即标准输出sys.stdout和sys.stderr接收到的输出信息
            t.insert('end', info)  # 在多行文本控件最后一行插入print信息
            t.update()  # 更新显示的文本，不加这句插入的信息无法显示
            t.see(tk.END)  # 始终显示最后一行，不加这句，当文本溢出控件最后一行时，不会自动显示最后一行

        def restoreStd(self):
            # 恢复标准输出
            sys.stdout = self.stdoutbak
            sys.stderr = self.stderrbak
    mystd = myStdout()
    test_cmd = window.register(test)
    v_1 = StringVar()
    v_1.set('http://fund.eastmoney.com/daogou/#dt4;ft;rs;sd;ed;pr;cp;rt;tp;rk;se;nx;sc3y;stdesc;pi1;pn20;zfdiy;shlist')
    v_2 = StringVar()
    v_2.set('基金数据表格')
    #test_cmd = window.register(test)
    tk.Label(window, text='输入天天基金数据网址', font=('宋体', 12)).place(x=260, y=28)
    tk.Label(window, text='表格名称', font=('宋体', 12)).place(x=180, y=80)
    e1 = tk.Entry(window,
              width=100,
              textvariable=v_1,
              validate='all',  # 发生任何变动的时候，就会调用validatecommand
              validatecommand=(test_cmd, '%P', '%v', '%W')  # %P代表输入框的实时内容
              )
    e1.place(x=40,y=50)
    e2 = tk.Entry(window,
              width=50,
              textvariable=v_2,
              validate='all',  # 发生任何变动的时候，就会调用validatecommand
              validatecommand=(test_cmd, '%P', '%v', '%W')  # %P代表输入框的实时内容
              )
    e2.place(x=40,y=120)
    global active1
    active1 = True
    def down_list():
      try:
        ex_name = e2.get() + '.csv'
        url = e1.get()
        b1.config(state=tk.DISABLED)
        result = urlparse(url)
        url_parse = list(result)  # 元组转为列表
        url_parse_fragment = url_parse[-1]
        url_parse_fragment=url_parse_fragment.replace('_',',')
        url_parse_fragment_l = url_parse_fragment.split(";")  # 将字符串转为列表
        new_data_list = []
        for data in url_parse_fragment_l:
            data_1 = re.split('(\w{2})', data, 1)
            data_2 = data_1
            new_data_list.append(data_2)
        new_data_list_len = len(new_data_list)
        new_dict = {}
        for i in new_data_list:
            new_dict[i[1]] = i[2]
        base_url = 'http://fund.eastmoney.com/data/FundGuideapi.aspx?'
        new_url = base_url + urlencode(new_dict)
        html = get_page(new_url)
        rusult2 = parse_html1(html)
        all_page = int(rusult2[0])
        shuju = pd.DataFrame([],
                             columns=['名称', '基金类型', '管理人', '规模（亿元）', '基金经理', '净值', '日增长率', '成立来', '今年来', '近一周', '近一月',
                                      '近三月', '近六月', '近一年', '近两年', '近三年', '日期', '手续费', '购买起点',
                                      '原手续费'])  # 0代码1名称2单位净值3涨跌4基金类型5管理人7-8规模9基金经理12字母代码13基金类型14今年来15近一周16近一月17近三月18近六月19近一年
        # 20近两年21近三年25日期26净值27日增长率29手续费30购买起点（元）32原手续费34成立来

        for i in range(all_page):
            new_dict['pi'] = str(i + 1)
            new_dict['sh'] = 'table'
            new_url = base_url + urlencode(new_dict)
            html_content = get_page(new_url)
            result1 = parse_html(html_content)
            new_dict['sh'] = 'list'
            new_url = base_url + urlencode(new_dict)
            html_content = get_page(new_url)
            result_list = parse_html_list(html_content)
            result_list_up = parse_html_list_up(result_list[0])
            for list_1 in result1:
                list_1_1 = list(list_1)
                list_num = result1.index(list_1)
                #print(list_num)
                list_1_2 = result_list_up[list_num].split(",")  # 通过表一索引提取表二对应字符串并生成列表
                list_3 = list_1_1 + list_1_2
                print(list_3)
                daima = '代码：' + list_3[1]
                biao_1 = list_3[0]
                biao_2 = list_3[4]
                biao_3 = list_3[5]
                biao_4 = list_3[7] + list_3[8]
                biao_5 = list_3[9]
                biao_jin = list_3[2]
                biao_6 = list_3[14] + '%'
                biao_7 = list_3[15] + '%'
                biao_8 = list_3[16] + '%'
                biao_9 = list_3[17] + '%'
                biao_10 = list_3[18] + '%'
                biao_11 = list_3[19] + '%'
                biao_12 = list_3[20] + '%'
                biao_13 = list_3[21] + '%'
                biao_14 = list_3[25]
                biao_15 = list_3[27] + '%'
                biao_16 = list_3[29]
                biao_17 = list_3[30] + '元'
                biao_18 = list_3[32]
                biao_19 = list_3[34] + '%'
                shuju.loc[daima, '名称'] = biao_1
                shuju.loc[daima, '基金类型'] = biao_2
                shuju.loc[daima, '管理人'] = biao_3
                shuju.loc[daima, '规模（亿元）'] = biao_4
                shuju.loc[daima, '基金经理'] = biao_5
                shuju.loc[daima, '净值'] = biao_jin
                shuju.loc[daima, '日增长率'] = biao_15
                shuju.loc[daima, '成立来'] = biao_19
                shuju.loc[daima, '今年来'] = biao_6
                shuju.loc[daima, '近一周'] = biao_7
                shuju.loc[daima, '近一月'] = biao_8
                shuju.loc[daima, '近三月'] = biao_9
                shuju.loc[daima, '近六月'] = biao_10
                shuju.loc[daima, '近一年'] = biao_11
                shuju.loc[daima, '近两年'] = biao_12
                shuju.loc[daima, '近三年'] = biao_13
                shuju.loc[daima, '日期'] = biao_14
                shuju.loc[daima, '手续费'] = biao_16
                shuju.loc[daima, '购买起点'] = biao_17
                shuju.loc[daima, '原手续费'] = biao_18
        shuju.to_csv(ex_name, encoding='utf-8')
        b1.config(state=tk.NORMAL)
      except PermissionError:
          print('文件名重复并且未关闭，请另外命名或关闭文件')
      print('下载结束')
    def callback1(event=None):
        global window
        e1.event_generate('<<Cut>>')
    def callback2(event=None):
        global window
        e1.event_generate('<<Copy>>')
    def callback3(event=None):
        global window
        e1.event_generate('<<Paste>>')
    def callback4(event=None):
        global window
        e2.event_generate('<<Cut>>')
    def callback5(event=None):
        global window
        e2.event_generate('<<Copy>>')
    def callback6(event=None):
        global window
        e2.event_generate('<<Paste>>')
    '''创建一个弹出菜单'''
    menu = Menu(window,
                tearoff=False,
                # bg="black",
                )
    menu1 = Menu(window,
                tearoff=False,
                # bg="black",
                )
    menu.add_command(label="剪切", command=callback1)
    menu.add_command(label="复制", command=callback2)
    menu.add_command(label="粘贴", command=callback3)
    menu1.add_command(label="剪切", command=callback4)
    menu1.add_command(label="复制", command=callback5)
    menu1.add_command(label="粘贴", command=callback6)
    def popup(event):
        menu.post(event.x_root, event.y_root)  # post在指定的位置显示弹出菜单
    def popup1(event):
        menu1.post(event.x_root, event.y_root)  # post在指定的位置显示弹出菜单
    e1.bind("<Button-3>", popup)  # 绑定鼠标右键,执行popup函数
    e2.bind("<Button-3>", popup1)
    def start():
        global  ex_name
        global url
        ex_name = e2.get() + '.csv'
        url = e1.get()
        down_list()
    b1=tk.Button(window, text='开始下载', width=10,
               height=1, command=lambda :thread_it(start))
    b1.place(x=500,y=110)
    t = tk.Text(window, height=20)
    scroll = tk.Scrollbar()
    # 放到窗口的右侧, 填充Y竖直方向
    scroll.pack(side=tk.RIGHT, fill=tk.Y)

    # 两个控件关联
    scroll.config(command=t.yview)
    t.config(yscrollcommand=scroll.set)

    str1 = '万一佣金开户 联系方式 电话&微信：18017439644\n' \
           '基金认购费0.1折（银行认购费一般不打折费率平均为1.2%，支付宝天天基金网一般打一折为0.12%。） \n' \
           '另有无广客户交流群\n' \
           '  专业团队信息发布，理论指导，研报分享。\n' \
           '  每日晨报专业老师直播分享财经动态 \n' \
           '也可关注公众号：天风散人\n' \
           '  下载海量研究报告\n' \
           '有脚本，数据分析爬取需求，加微联系'
    t.insert(tk.INSERT, str1)
    t.place(x=40, y=160)
    link = tk.Label(window, text='进入天天基金网站', font=('Arial', 10))
    link.place(x=40, y=440)
    link1 = tk.Label(window, text='扫码万一佣金开户', font=('宋体', 16))
    link1.place(x=610, y=180)
    def open_url(event):
      webbrowser.open("http://fund.eastmoney.com/daogou", new=0)
    link.bind("<Button-1>", open_url)
    image_file = tk.PhotoImage(file='img/pic.gif')
    canvas = tk.Canvas(window,height=150, width=150)
    image = canvas.create_image(75, 0, anchor='n', image=image_file)
    canvas.place(x=620, y=250)

    window.mainloop()
    mystd.restoreStd()
hideConsole()
main_windows()
软件效果图片
python中用来下载插件的代码_html