Python实现ChromeDriver定时更新
Selenium作为一款基于web网页的UI自动化测试框架,深受开发人员的喜爱,再自动化领域占有着一席之地;Selenium框架,伴随着它配置的工具ChromeDriver,一同使用,帮助开发人员们完成了各式各样的工作,同时页节省了大部分的人力物力,大大提高了效率。
但是,使用Selenium框架的人员都知道,ChromeDriver是随着Google浏览器的更新需要同步去更新的,只有匹配版本的ChromeDriver,才能够启动浏览器,完成我们想要运行的程序。而现在,谷歌浏览器的更新频率,两周多便会有一次版本的更新,所以,定时能够更新ChromeDriver工具,对于整个自动化来说,显得十分的重要。
大家去搜索ChromeDriver更新的时候,也能够再网站上面得到各式各样的结果,各路大神所用的方法也是各有所不同,但是,最近的一次更新,导致了我的自动化程序停机了一天,让我不得不放下手头的活,来看一下具体的问题所在
在Debug整个代码之后,发现问题出现在软件镜像源上面,之前我们使用的都是ChromeDriver的淘宝镜像仓库,地址为:
但是,此次你再打开该链接后,会发现地址自动跳转到了一个带有SSL加密的新的镜像地址,这就导致了网上基本所有的自动更新的代码失效了,其新的地址为 我们通过镜像源获取相应的版本,其实现的基本思路,无非就是获取到浏览器的最新版本,然后去对应寻找对应版本的ChromeDriver即可,自动化的逻辑也就是去获取两边的版本号,去做一个比对即可,但是,如果你直接请求爬取上面的新地址的内容,你会发现,他的网页源码中并没有你想要的版本号列表,如下方代码所示:
<!DOCTYPE html>
<html>
<head>
<title>CNPM Binaries Mirror</title>
</head>
<body>
<script>
// Forked from https://chromedriver.storage.googleapis.com/index.html
// Split a string in 2 parts. The first is the leading number, if any,
// the second is the string following the numbers.
function splitNum(s) {
var results = new Array();
results[0] = 'None';
for (var i = 0; i < s.length; i++) {
var substr = s.substr(0, i+1)
if (isNaN(substr)) {
// Not a number anymore.
results[1] = s.substr(i)
break;
} else {
// This is a number. update the results.
results[0] = parseFloat(substr);
}
}
return results;
}
// Compare 2 strings using a custom alphanumerical algorithm.
// This is similar to a normal string sort, except that we sort
// first by leading digits, if any.
// For example:
// 100hello > 2goodbye
// Numbers anywhere else in the string are compared using the normal
// sort algorithm.
function alphanumCompare(a, b) {
var parsedA = splitNum(a);
var parsedB = splitNum(b);
var numA = parsedA[0];
var numB = parsedB[0];
var strA = parsedA[1];
var strB = parsedB[1];
if (isNaN(numA) == false && isNaN(numB) == false) {
// They both start with numbers.
if (numA < numB) return -1;
if (numA > numB) return 1;
// Identical. Fallback to string.
return (strA < strB) ? -1 : (strA > strB ? 1 : 0)
}
// If only one starts with a number, we start with that one as
// the lowest.
if (isNaN(numA) == false) return -1
if (isNaN(numB) == false) return 1
// They are both strings.
return (a < b) ? -1 : (a > b ? 1 : 0)
}
// Helper function to retrieve the value of a GET query parameter.
// Greatly inspired from http://alturl.com/8rj7a
function getParameter(parameterName) {
// Add '=' to the parameter name (i.e. parameterName=value)
var parameterName = parameterName + '=';
var queryString = window.location.search.substring(1);
if (queryString.length <= 0) {
return '';
}
// Find the beginning of the string
begin = queryString.indexOf(parameterName);
// If the parameter name is not found, skip it, otherwise return the
// value.
if (begin == -1) {
return '';
}
// Add the length (integer) to the beginning.
begin += parameterName.length;
// Multiple parameters are separated by the '&' sign.
end = queryString.indexOf ('&', begin);
if (end == -1) {
end = queryString.length;
}
// Return the string.
return escape(unescape(queryString.substring(begin, end)));
}
// Displays the directory listing given the XML and path.
function displayList(items, root, path) {
// Display the header
document.write('<h1>Index of /' + path + '</h1>');
// Start the table for the results.
document.write('<table style="border-spacing:15px 0px;">');
var sortOrder = getParameter('sort');
var sortLink = location.pathname + '?path=' + path;
if (sortOrder != 'desc') {
sortLink += '&sort=desc';
}
// Display the table header.
document.write('<tr><th><img src="https://gw.alipayobjects.com/mdn/rms_fa382b/afts/img/A*v6fRRLopV_0AAAAAAAAAAAAAARQnAQ" alt="[ICO]"></th>');
document.write('<th><a href="' + sortLink + '">Name</a></th>');
document.write('<th>Last modified</th>');
document.write('<th>Size</th>');
document.write('<tr><th colspan="5"><hr></th></tr>');
// Display the 'go back' button.
if (path != '') {
var backpath = location.pathname;
// If there is more than one section delimited by '/' in the current
// path we truncate the last section and append the rest to backpath.
var delimiter = path.lastIndexOf('/');
if (delimiter >= 0) {
delimiter = path.substr(0, delimiter).lastIndexOf('/');
if (delimiter >= 0) {
backpath += '?path=';
backpath += path.substr(0, delimiter+1);
}
}
document.write('<tr><td valign="top"><img src="https://gw.alipayobjects.com/mdn/rms_fa382b/afts/img/A*3QmJSqp2zpUAAAAAAAAAAAAAARQnAQ" alt="[DIR]"></td>');
document.write('<td><a href="');
document.write(backpath);
document.write('">Parent Directory</a></td>');
document.write('<td> </td>');
document.write('<td align="right"> - </td></tr>');
}
// Set up the variables.
var directories = new Array();
var files = new Array();
for (var i = 0; i < items.length; i++) {
var item = items[i];
if (item.type === 'file') {
files.push(item);
} else {
directories.push(item);
}
}
files.sort(alphanumCompare);
directories.sort(alphanumCompare);
// Reverse the list for a descending sort.
if (sortOrder == 'desc') {
files.reverse();
directories.reverse();
}
// Display the directories.
for (var i = 0; i < directories.length; i++) {
var lnk = location.pathname.substr(0, location.pathname.indexOf('?'));
var item = directories[i];
lnk += '?path=' + path + item.name;
document.write('<tr>');
document.write('<td valign="top"><img src="https://gw.alipayobjects.com/mdn/rms_fa382b/afts/img/A*ct35SJLile8AAAAAAAAAAAAAARQnAQ" alt="[DIR]"></td>');
document.write('<td><a href="' + lnk + '">' +
item.name + '</a></td>');
document.write('<td align="right">' + (item.date || '-') + '</td>');
document.write('<td align="right">-</td>');
document.write('</tr>');
}
// Display the files.
for (var i = 0; i < files.length; i++) {
var item = files[i];
var link = item.url;
var filename = item.name;
var sizeUnit = '';
var size = item.size;
if (size > 1024) {
sizeUnit = 'KB';
size = size / 1024;
if (size > 1024) {
sizeUnit = 'MB';
size = size / 1024;
}
}
if (sizeUnit !== '') {
size = size.toFixed(2) + sizeUnit;
}
var lastModified = item.date;
// Remove the entries we don't want to show.
if (filename == '') {
continue;
}
if (filename.indexOf('$folder$') >= 0) {
continue;
}
// Display the row.
document.write('<tr>');
document.write('<td valign="top"><img src="https://gw.alipayobjects.com/mdn/rms_fa382b/afts/img/A*FKvWRo-vns4AAAAAAAAAAAAAARQnAQ" alt="[DIR]"></td>');
document.write('<td><a href="' + link + '">' + filename +
'</a></td>');
document.write('<td align="right">' + lastModified + '</td>');
document.write('<td align="right">' + size + '</td>');
document.write('</tr>');
}
// Close the table.
document.write('<tr><th colspan="5"><hr></th></tr>');
document.write('</table>');
document.title = 'CNPM Binaries Mirror';
}
function fetchAndDisplay() {
var path = getParameter('path');
var lastSlash = location.pathname.lastIndexOf("/");
var filename = location.pathname.substring(lastSlash + 1);
var root = 'https://registry.npmmirror.com/-/binary/';
var http = new XMLHttpRequest();
http.open('GET', root + path, true);
http.onreadystatechange = useHttpResponse;
http.send(null);
function useHttpResponse() {
if (http.readyState == 4) {
var items = [];
try {
items = JSON.parse(http.responseText);
} catch (err) {
console.error(err, http.responseText);
}
displayList(items, root, path);
}
}
}
fetchAndDisplay();
</script>
</body>
</html>
通过对其进行抓包分析,发现其真实的请求路径并不网站上方看到的地址本身,而是另外一个专门加载这些版本信息的请求,如下图所示
从上面两张图中,我们可以很清楚的看到,具体得到左侧网站中的版本信息的请求,其本身的请求路径如下:(这里可以点击我下方贴上去的网址,你就会发现,这里便是你想要获取的有关网站上版本的一个Json格式的数据)
明确了这些,我们就可以去搭建一个自动化定时更新ChromeDriver的脚本了
首先我们先明确整个需求的逻辑,如下流程图:
其具体实现代码如下,其中需要注意几点:
- 新的镜像源网站协议为Https,在使用requests进行爬取的时候,注意忽略掉SSL证书协议的判断
- Chrome的完整版本号会跟镜像源网站上ChromeDriver的完整版本号不一致,所以,在做比较的时候,我们只需要比较最开始的第一组数即可,保持大的版本一致
- 代码中获取本地路径的方法,可以沿用到使用ChromeDriver的时候,通过方法去填充对应路径,而不是去写死一个绝对路径
# -*- coding:utf-8 -*-
import json
import os
import time
import ssl
import requests
import winreg
import zipfile
from requests.packages.urllib3.exceptions import InsecureRequestWarning
"""忽略SSL证书警告"""
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
ssl._create_default_https_context = ssl._create_unverified_context
"""ChromeDriver仓库淘宝镜像地址"""
# ChromeDriver_depot_url = r'http://npm.taobao.org/mirrors/chromedriver/'
ChromeDriver_depot_url = r'https://registry.npmmirror.com/binary.html?path=chromedriver/'
ChromeDriver_base_url = r'https://registry.npmmirror.com/-/binary/chromedriver/'
def get_Chrome_version():
"""
通过注册表的方式获取Google Chrome的版本
:return: 本机Chrome的版本号(如:96.0.4664)
"""
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r'Software\Google\Chrome\BLBeacon')
version, types = winreg.QueryValueEx(key, 'version')
print("本机目前的Chrome版本为:", version)
return version
def get_version():
"""
查询系统内的Chromedriver版本
:return: 本机ChromeDriver的版本(如:92.0.4515)
"""
ChromeDriverVersion = os.popen('chromedriver --version').read()
print("本机目前的Chrome版本为:", ChromeDriverVersion.split(' ')[1])
return ChromeDriverVersion.split(' ')[1]
def get_server_chrome_versions(url):
"""
获取ChromeDriver版本仓库中的所有版本并写入列表
:param: 淘宝的ChromeDriver仓库地址
:return: versionList 版本列表
"""
versionList = []
rep = requests.get(url, verify=False).text
rep_list = json.loads(rep)
for i in range(len(rep_list)):
version = rep_list[i]['name'] # 提取版本号
versionList.append(version[:-1]) # 将所有版本存入列表
return versionList
def download_driver(download_url):
"""
下载文件
:param download_url: ChromeDriver对应版本下载地址
"""
file = requests.get(download_url, verify=False)
with open("chromedriver.zip", 'wb') as zip_file: # 保存文件到脚本所在目录
zip_file.write(file.content)
print('下载成功')
def unzip_driver(path):
"""
解压Chromedriver压缩包到指定目录
:param path: 指定解压目录
"""
f = zipfile.ZipFile("chromedriver.zip", 'r')
for file in f.namelist():
f.extract(file, path)
def get_path():
"""
获取当前ChromeDriver的存放路径
:return: ChromeDriver当前路径
"""
ChromeDriverLocating = os.popen('where chromedriver').read()
ChromeSavePath, ChromeName = os.path.split(ChromeDriverLocating)
return ChromeSavePath
def check_update_chromedriver():
chromeVersion = get_Chrome_version()
chrome_main_version = int(chromeVersion.split(".")[0]) # chrome主版本号
driverVersion = get_version()
driver_main_version = int(driverVersion.split(".")[0]) # chromedriver主版本号
download_url = ""
if driver_main_version != chrome_main_version:
print("chromedriver版本与chrome浏览器不兼容,更新中>>>")
versionList = get_server_chrome_versions(ChromeDriver_base_url)
if chromeVersion in versionList:
download_url = f"{ChromeDriver_base_url}{chromeVersion}/chromedriver_win32.zip"
else:
for version in versionList:
if version.startswith(str(chrome_main_version)):
download_url = f"{ChromeDriver_base_url}{version}/chromedriver_win32.zip"
break
if download_url == "":
print(r"暂无法找到与chrome兼容的chromedriver版本,请在http://npm.taobao.org/mirrors/chromedriver/ 核实。")
download_driver(download_url=download_url)
Chrome_Location_path = get_path()
print("解压地址为:", Chrome_Location_path)
unzip_driver(Chrome_Location_path)
os.remove("chromedriver.zip")
print('更新后的Chromedriver版本为:', get_version())
else:
print(r"chromedriver版本与chrome浏览器相兼容,无需更新chromedriver版本!")
if __name__ == "__main__":
check_update_chromedriver()
time.sleep(10)
至于定时任务,只需要在windows上面设置一个定时任务即可,这步可以请各位自行百度
本文至此便告一段落,祝各位码上无ERROR,键盘无BUG!!