1. 简介
selenium 最初是一个自动化测试工具,因为requests无法直接执行js代码,所以在爬虫中使用它.
selenium本质是通过驱动浏览器,完全模拟浏览器的操作,比如跳转 输入 点击 后退 前进等, 直接那到网页渲染后的结果,可以支持多种浏览器.
```
from selenium import webdriver
browser=webdriver.Chrome()
browser=webdriver.Firefox()
browser=webdriver.PhantomJS()
browser=webdriver.Safari()
browser=webdriver.Edge()
```
官网:http://selenium-python.readthedocs.io
2. 安装
使用pip 安装 selenium
pip3 install selenium
下载chromdriver.exe放到python安装路径的scripts目录中即可
http://npm.taobao.org/mirrors/chromedriver
验证安装,进入cmd 调用Python
from selenium import webdriver
driver = webdriver.Chrome()#弹出浏览器
driver.get('https://www.baidu.com') #访问百度
3. 基本使用
from selenium import webdriver #驱动浏览器
from selenium.webdriver import ActionChains #滑动验证
from selenium.webdriver.common.by import By #选择器
from selenium.webdriver.common.keys import Keys #模拟键盘的按键
from selenium.webdriver.support import expected_conditions as EC #
from selenium.webdriver.support.wait import WebDriverWait #与EC联用,等待某个元素被加载
import time
browser = webdriver.Chrome()
wait = WebDriverWait(browser, 10)#最长等待加载时间
browser.get('https://www.baidu.com')
input_tag = browser.find_element_by_id('kw') #找到id为kw的标签
input_tag.send_keys('苹果切一半')
button = browser.find_element_by_id('su')
button.click()#点击
wait.until(EC.presence_of_element_located((By.ID, 'content_left')))#等待id为content_left加载完
res = browser.page_source#获取结果
print(res)
time.sleep(10)
browser.close()#关闭浏览器
4. 三种选择器
三种查找元素方式
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
from selenium.webdriver.common.keys import Keys #键盘按键操作
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素
browser=webdriver.Chrome()
browser.get('https://www.baidu.com')
#以下三种方式达到的都是同一种效果:查找id为kw的标签
input_tag1=browser.find_element(By.ID,'kw') #等同于:input_tag1=browser.find_element_by_id('kw')
input_tag2=browser.find_element(By.CSS_SELECTOR,'#kw') #等同于:input_tag2=browser.find_element_by_css_selector('#kw')
input_tag3=browser.find_element(By.XPATH,'//*[@id="kw"]') #等同于:input_tag3=browser.find_element_by_xpath('//*[@id="kw"]')
#注意:browser.find_elements系列与browser.find_element的区别就是,前者是查找多个,后者是只找第一个
div1=browser.find_element(By.CSS_SELECTOR,'div') #找到第一个div标签
div2=browser.find_elements(By.CSS_SELECTOR,'div') #找到所有的div标签,放到列表里
browser.close()
三种查找元素方式
获取标签属性
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
from selenium.webdriver.common.keys import Keys #键盘按键操作
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素
browser=webdriver.Chrome()
browser.get('https://www.amazon.cn/')
wait=WebDriverWait(browser,10)
wait.until(EC.presence_of_element_located((By.ID,'cc-lm-tcgShowImgContainer')))
tag=browser.find_element(By.CSS_SELECTOR,'#cc-lm-tcgShowImgContainer img')
#获取标签属性,
print(tag.get_attribute('src'))
#获取标签ID,位置,名称,大小(了解)
print(tag.id)
print(tag.location)
print(tag.tag_name)
print(tag.size)
browser.close()
获取标签属性
5. 等待元素被加载
1、selenium只是模拟浏览器的行为,而浏览器解析页面是需要时间的(执行css,js),一些元素可能需要过一段时间才能加载出来,为了保证能查找到元素,必须等待
2、等待的方式分两种:
隐式等待:在browser.get(’xxx’)前就设置,针对所有元素有效
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
from selenium.webdriver.common.keys import Keys #键盘按键操作
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素
browser=webdriver.Chrome()
#隐式等待:在查找所有元素时,如果尚未被加载,则等10秒
browser.implicitly_wait(10)
browser.get('https://www.baidu.com')
input_tag=browser.find_element_by_id('kw')
input_tag.send_keys('美女')
input_tag.send_keys(Keys.ENTER)
contents=browser.find_element_by_id('content_left') #没有等待环节而直接查找,找不到则会报错
print(contents)
browser.close()
隐式等待
显式等待:在browser.get(’xxx’)之后设置,只针对某个元素有效
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
from selenium.webdriver.common.keys import Keys #键盘按键操作
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素
browser=webdriver.Chrome()
browser.get('https://www.baidu.com')
input_tag=browser.find_element_by_id('kw')
input_tag.send_keys('美女')
input_tag.send_keys(Keys.ENTER)
#显式等待:显式地等待某个元素被加载
wait=WebDriverWait(browser,10)
wait.until(EC.presence_of_element_located((By.ID,'content_left')))
contents=browser.find_element(By.CSS_SELECTOR,'#content_left')
print(contents)
browser.close()
显式等待
6. 元素交互操作
点击清空
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
from selenium.webdriver.common.keys import Keys #键盘按键操作
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素
browser=webdriver.Chrome()
browser.get('https://www.amazon.cn/')
wait=WebDriverWait(browser,10)
input_tag=wait.until(EC.presence_of_element_located((By.ID,'twotabsearchtextbox')))
input_tag.send_keys('iphone 8')
button=browser.find_element_by_css_selector('#nav-search > form > div.nav-right > div > input')
button.click()
import time
time.sleep(3)
input_tag=browser.find_element_by_id('twotabsearchtextbox')
input_tag.clear() #清空输入框
input_tag.send_keys('iphone7plus')
button=browser.find_element_by_css_selector('#nav-search > form > div.nav-right > div > input')
button.click()
# browser.close()
点击,清空
actions.perform
#http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.action_chains
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
from selenium.webdriver.common.keys import Keys #键盘按键操作
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素
browser=webdriver.Chrome()
browser.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
browser.switch_to.frame('iframeResult') #切换到iframeResult
source=browser.find_element_by_id('draggable')
target=browser.find_element_by_id('droppable')
actions=ActionChains(browser) #拿到动作链对象
actions.drag_and_drop(source,target) #把动作放到动作链中,准备串行执行
actions.perform() #执行
Action Chains
一些难以实现的操作,自己写js代码
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
from selenium.webdriver.common.keys import Keys #键盘按键操作
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素
try:
browser=webdriver.Chrome()
browser.get('https://www.baidu.com')
browser.execute_script('alert("hello world")') #打印警告
finally:
browser.close()
在交互动作比较难实现的时候可以自己写JS(万能方法)
7. 补充
模拟浏览器的前进后退
#模拟浏览器的前进后退
import time
from selenium import webdriver
browser=webdriver.Chrome()
browser.get('https://www.baidu.com')
browser.get('https://www.taobao.com')
browser.get('http://www.sina.com.cn/')
browser.back()
time.sleep(10)
browser.forward()
browser.close()
模拟浏览器的前进后退
cookies
#cookies
from selenium import webdriver
browser=webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
print(browser.get_cookies())
browser.add_cookie({'k1':'xxx','k2':'yyy'})
print(browser.get_cookies())
# browser.delete_all_cookies()
cookies
异常处理
from selenium import webdriver
from selenium.common.exceptions import TimeoutException,NoSuchElementException,NoSuchFrameException
try:
browser=webdriver.Chrome()
browser.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
browser.switch_to.frame('iframssseResult')
except TimeoutException as e:
print(e)
except NoSuchFrameException as e:
print(e)
finally:
browser.close()
异常处理
8. 自动登入126邮箱并发送邮件
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# __author__ = "half apple"
# Date: 2017/11/8
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By # 按照什么方式查找,By.ID,By.CSS_SELECTOR
from selenium.webdriver.common.keys import Keys # 键盘按键操作
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait # 等待页面加载某些元素
import time
driver=webdriver.Chrome()
wait = WebDriverWait(driver, 5)
try:
driver.get('http://mail.126.com/')
frame = wait.until(EC.presence_of_element_located((By.ID, 'x-URS-iframe')))
driver.switch_to.frame(frame)
wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'm-container')))
inp_user = driver.find_element_by_name('email')
inp_pwd = driver.find_element_by_name('password')
button = driver.find_element_by_id('dologin')
inp_user.send_keys('邮箱账号')
inp_pwd.send_keys('邮箱密码')
button.click()
wait.until(EC.presence_of_element_located((By.ID, 'dvNavTop')))
write_msg = driver.find_elements_by_css_selector('#dvNavTop li')[1] # 获取第二个li标签就是“写信”了
write_msg.click()
wait.until(EC.presence_of_element_located((By.CLASS_NAME,'tH0')))
recv_man=driver.find_element_by_class_name('nui-editableAddr-ipt')
title=driver.find_element_by_css_selector('.dG0 .nui-ipt-input')
recv_man.send_keys('xx@qq.com')
title.send_keys('FBI WARNING')
print(title.tag_name)
frame=wait.until(EC.presence_of_element_located((By.CLASS_NAME,'APP-editor-iframe')))
driver.switch_to.frame(frame)
body=driver.find_element(By.CSS_SELECTOR, 'body')
body.send_keys('Federal law provides severe civil and criminal penalties for the unauthorized reproduction,distribution,or exhibition of copyrighted motion prictures(Title 17, United States Code, Sections 501 and 508). The federal bureau of Investigation investigate allegations of criminal copyright infringement.')
# pic_btn = driver.find_elements_by_css_selector('.ico-editor-image')[0].parent
# print(pic_btn)
# pic_btn.click()
#
# frame = wait.until(EC.presence_of_element_located((By.ID, '_mail_msgbox_4_709')))
# driver.switch_to.frame(frame)
# wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'nui-msgbox-normal ')))
# url_btn = driver.find_element_by_class_name('nui-tabs-item-hover')
# url_btn.click()
# url_ipt = driver.find_elements_by_css_selector('#_mail_input_7_719 input')
# url_ipt.send_keys('https://gss0.baidu.com/7Po3dSag_xI4khGko9WTAnF6hhy/zhidao/pic/item/4a36acaf2edda3ccc38e3b2f02e93901213f9253.jpg')
# url_tak = driver.find_element_by_css_selector('#_mail_button_140_720 span')
# url_tak.click()
# driver.switch_to.parent_frame()
driver.switch_to.parent_frame() #切回他爹
send_button=driver.find_element_by_class_name('nui-toolbar-item')
send_button.click()
#可以睡时间久一点别让浏览器关掉,看看发送成功没有
import time
time.sleep(10000)
except Exception as e:
print('1111111111111')
print(e)
finally:
driver.close()