数据
1.折线图
import matplotlib.pyplot as plt
input_values = [1,2,3,4,5]
squares = [1,4,9,16,25]
plt.plot(input_values,squares,linewidth = 5) #线条粗细为5
plt.title("Square Numbers",fontsize = 24) #图标标题,字体大小
plt.xlabel("Value",fontsize = 14) #横坐标,字体大小
plt.ylabel("Square", fontsize = 14) #纵坐标,字体大小
plt.tick_params(axis = 'both', labelsize = 14) #设置刻度标记的大小
plt.show()
2.散点图
import matplotlib.pyplot as plt
input_values = [1,2,3,4,5]
squares = [1,4,9,16,25]
plt.scatter(input_values,squares,s = 5,edgecolor = 'none',c=(0,0,0.8)) #点大小s=5, 删除轮廓 edgecolor = 'none' ,自定义颜色 c = (0,0,0.8)
plt.scatter(0,0,c = 'green' ,s = 100, edgecolor = 'none') #突出起点
plt.title("Square Numbers",fontsize = 24) #图标标题,字体大小
plt.xlabel("Value",fontsize = 14) #横坐标,字体大小
plt.ylabel("Square", fontsize = 14) #纵坐标,字体大小
plt.tick_params(axis = 'both',which = 'major' labelsize = 14) #设置刻度标记的大学
plt.show()
plt.savefig = ('square_plot.png',bbox_inches = 'tight') #自动保存图片
3.画廊
import pygal
frequencies = []
for value in range(1,die.num_sides +1):
frequency = results.count(value)
frequencies.append(frequency)
hist = pygal.Bar()
hist.title = "..."
hist.x_labels = {'1','2',...}
hist.x_title = "Result"
hist.y_title = 'Frequency of Result"
hist.add('D6,freauencies )
hist.render_to_file('die_visual.svg')
4.cvs
将数据作为一系列以逗号分隔的值写入文件
import csv
filename = '...'
with open(filename,'r') as f:
reader = csv.reader(f)
header_row = next(reader)
for index,colum_header in enumerate(header_row):#对列表调用enumerate()来获取每个元素的索引及其值
print(index,colum_header)
highs = []
for row in reader:#获取并提取数据
highs.append(row[1])
将字符串转换为整数时,先将字符串转换为浮点数,再转换为整数,避免因存在小数点而报错
常用内建模块
1.datetime
获取当前日期和时间
from datetime import datetime
now = datetime.now()
获取指定日期和时间
from datetime import datetime
dt = datetime(2015,4,19,12,20)
datetime 转换为 timestamp
dt.timestamp()
注:Python的timestamp是一个浮点数,如果有小数位,小数位表示毫秒数
timestamp 转换为 datetime
from datetime import datetime
t = 1429417200.0
datetime.fromtimestamp(t) #转换为本地时间
datetime.utcfromtimestamp(t) #转换为UTC时间
str 转换为datetime
from datetime import datetime
cday = datetime.strptime('2015-6-1 18:19:59','%Y-%m-%d %H:%M:%S')
datetime 转换为 str
from datetime import datetime
now = datetime.now()
datatime.strftime('%a,%b %d %H:%M')
datetime 加减
from datetime import datetime, timedelta
now = datetime.now()
now + timedelta(hours=10)
now - timedelta(days=1)
now + timedelta(days=2, hours=12)
本地时间转换为UTC时间
from datetime import datetime, timedelta, timezone
tz_utc_8 = timezone(timedelta(hours=8)) # 创建时区UTC+8:00
now = datetime.now()
dt = now.replace(tzinfo=tz_utc_8) # 强制设置为UTC+8:00
时区转换
# 拿到UTC时间,并强制设置时区为UTC+0:00:
utc_dt = datetime.utcnow().replace(tzinfo=timezone.utc)
# astimezone()将转换时区为北京时间:
bj_dt = utc_dt.astimezone(timezone(timedelta(hours=8)))
# astimezone()将转换时区为东京时间:
tokyo_dt = utc_dt.astimezone(timezone(timedelta(hours=9)))
# astimezone()将bj_dt转换时区为东京时间:
tokyo_dt2 = bj_dt.astimezone(timezone(timedelta(hours=9)))
2.collections
namedtuple
from collections import nametuple
Point = nametuple('Point',['x','y'])
p = Point(1,2)
p.x
p.y
deque(按顺序访问元素)
from collections import deque
q = deque(['a','b','c'])
defaultdict (如果key不存在,就会抛出默认值)
from collections import defaultdict
dd = defaultdict(lambda:'N/A')
OrderedDict(保持key的顺序)
from collections import OrderedDict
od = OrderedDict([('a',1),('b',2),('c',3)])
Counter (简单的计数器)
from collections import Counter
c = Counter()
for ch in 'programming':
c[ch]+=1
3.base64:(用64个字符来表示任意二进制数据的方法)
import base64
base64.b64encode(b'...')
base64.b64decode(b'...')
base64.urlsafe_b64encode(b'...')
base64.urlsafe_b64decode(b'...')
4.摘要算法:hashlib
import hashlib
md5 = hashlib.md5()
md5.update('how to use md5 in python hashlib?'.encode('utf-8'))
print(md5.hexdigest())
5.itertools
import itertools
natuals = itertools.count(1)
for n in natuals:
print(n)
count()会创建一个无限的迭代器
cycle()会把传入的一个序列无限重复下去
repeat()负责把一个元素无限重复下去
chain()可以把一组迭代对象串联起来,形成一个更大的迭代对象
groupby()把迭代器中相邻的重复元素挑出来放在一起
6.urllib
Get
from urllib import request
with request.urlopen('https://...') as f:
data = f.read()
for k,v in f.getheaders():
print(k,v)
print(data.decode('utf-8'))
模拟浏览器发送GET请求
from urllib import request
req = request.Request('http://www.douban.com/')
req.add_header('User-Agent', 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25')
with request.urlopen(req) as f:
print('Status:', f.status, f.reason)
for k, v in f.getheaders():
print('%s: %s' % (k, v))
print('Data:', f.read().decode('utf-8'))
POST
from urllib import request, parse
print('Login to weibo.cn...')
email = input('Email: ')
passwd = input('Password: ')
login_data = parse.urlencode([
('username', email),
('password', passwd),
('entry', 'mweibo'),
('client_id', ''),
('savestate', '1'),
('ec', ''),
('pagerefer', 'https://passport.weibo.cn/signin/welcome?entry=mweibo&r=http%3A%2F%2Fm.weibo.cn%2F')
])
req = request.Request('https://passport.weibo.cn/sso/login')
req.add_header('Origin', 'https://passport.weibo.cn')
req.add_header('User-Agent', 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25')
req.add_header('Referer', 'https://passport.weibo.cn/signin/login?entry=mweibo&res=wel&wm=3349&r=http%3A%2F%2Fm.weibo.cn%2F')
with request.urlopen(req, data=login_data.encode('utf-8')) as f:
print('Status:', f.status, f.reason)
for k, v in f.getheaders():
print('%s: %s' % (k, v))
print('Data:', f.read().decode('utf-8'))
Handler:通过代理去访问网站
proxy_handler = urllib.request.ProxyHandler({'http': 'http://www.example.com:3128/'})
proxy_auth_handler = urllib.request.ProxyBasicAuthHandler()
proxy_auth_handler.add_password('realm', 'host', 'username', 'password')
opener = urllib.request.build_opener(proxy_handler, proxy_auth_handler)
with opener.open('http://www.example.com/login.html') as f:
pass
7.XML
3个事件:
<a href = "/">python</a>
start_element(self,name,attrs),读取<a href = "/">时
char_data(self,text),读取python
end_element(self,name),读取</a>时
from xml.parsers.expat import ParserCreate
handler = DefaultSaxHandler()
parser = ParserCreate()
parser.StartElementHandler = handler.start_element
parser.EndElementHandler = handler.end_element
parser.CharacterDataHandler = handler.char_data
parser.Parse(xml)
8.HTMLParser
from html.parser import HTMLParser
from html.entities import name2codepoint
class MyHTMLParser(HTMLParser):
def handle_starttag(self, tag, attrs):
print('<%s>' % tag)
def handle_endtag(self, tag):
print('</%s>' % tag)
def handle_startendtag(self, tag, attrs):
print('<%s/>' % tag)
def handle_data(self, data):
print(data)
def handle_comment(self, data):
print('<!--', data, '-->')
def handle_entityref(self, name):
print('&%s;' % name)
def handle_charref(self, name):
print('&#%s;' % name)
parser = MyHTMLParser()
parser.feed('''<html>
<head></head>
<body>
<!-- test html parser -->
<p>Some <a href=\"#\">html</a> HTML tutorial...<br>END</p>
</body></html>''')
9 Hacker News API
import requests from operator
import itemgetter # 执行API调用并存储响应
url = 'https://hacker-news.firebaseio.com/v0/topstories.json'
r = requests.get(url)
print("Status code:", r.status_code)
# 处理有关每篇文章的信息
submission_ids = r.json()
submission_dicts = []
for submission_id in submission_ids[:30]:
# 对于每篇文章,都执行一个API调用
url = ('https://hacker-news.firebaseio.com/v0/item/' + str(submission_id) + '.json')
submission_r = requests.get(url)
print(submission_r.status_code)
response_dict = submission_r.json()
submission_dict = {'title': response_dict['title'],
'link': 'http://news.ycombinator.com/item?id=' + str(submission_id),
'comments': response_dict.get('descendants', 0)
}
submission_dicts.append(submission_dict)submission_dicts = sorted(submission_dicts, key=itemgetter('comments'), reverse=True)
for submission_dict in submission_dicts:
print("\nTitle:", submission_dict['title'])
print("Discussion link:", submission_dict['link'])
print("Comments:", submission_dict['comments'])