数据

1.折线图

 

import matplotlib.pyplot as plt
input_values = [1,2,3,4,5]
squares = [1,4,9,16,25]
plt.plot(input_values,squares,linewidth = 5) #线条粗细为5
plt.title("Square Numbers",fontsize = 24) #图标标题,字体大小
plt.xlabel("Value",fontsize = 14) #横坐标,字体大小
plt.ylabel("Square", fontsize = 14) #纵坐标,字体大小
plt.tick_params(axis = 'both', labelsize = 14) #设置刻度标记的大小
plt.show()

 

 

 

2.散点图

 

import matplotlib.pyplot as plt
input_values = [1,2,3,4,5]
squares = [1,4,9,16,25]
plt.scatter(input_values,squares,s = 5,edgecolor = 'none',c=(0,0,0.8)) #点大小s=5, 删除轮廓 edgecolor = 'none' ,自定义颜色 c = (0,0,0.8)
plt.scatter(0,0,c = 'green' ,s = 100, edgecolor = 'none') #突出起点
plt.title("Square Numbers",fontsize = 24) #图标标题,字体大小
plt.xlabel("Value",fontsize = 14) #横坐标,字体大小
plt.ylabel("Square", fontsize = 14) #纵坐标,字体大小
plt.tick_params(axis = 'both',which = 'major' labelsize = 14) #设置刻度标记的大学
plt.show()
plt.savefig = ('square_plot.png',bbox_inches = 'tight') #自动保存图片

 

3.画廊

 

import pygal

frequencies = []
for value in range(1,die.num_sides +1):
    frequency = results.count(value)
    frequencies.append(frequency)

hist = pygal.Bar()
hist.title = "..."
hist.x_labels =  {'1','2',...}
hist.x_title = "Result"
hist.y_title = 'Frequency of Result"

hist.add('D6,freauencies )
hist.render_to_file('die_visual.svg')

 

 

4.cvs

将数据作为一系列以逗号分隔的值写入文件

 

import csv

filename = '...'
with open(filename,'r') as f:
	reader = csv.reader(f)
	header_row = next(reader)

	for index,colum_header in enumerate(header_row):#对列表调用enumerate()来获取每个元素的索引及其值
		print(index,colum_header)
	highs = []
	for row in reader:#获取并提取数据
		highs.append(row[1])

将字符串转换为整数时,先将字符串转换为浮点数,再转换为整数,避免因存在小数点而报错

 

 

常用内建模块

1.datetime

获取当前日期和时间

 

from datetime import datetime
now = datetime.now()

 

 

获取指定日期和时间

 

from datetime import datetime
dt = datetime(2015,4,19,12,20)

 

 

datetime 转换为 timestamp

 

dt.timestamp()

注:Python的timestamp是一个浮点数,如果有小数位,小数位表示毫秒数

 

 

timestamp 转换为 datetime

 

from datetime import datetime

t = 1429417200.0
datetime.fromtimestamp(t) #转换为本地时间
datetime.utcfromtimestamp(t) #转换为UTC时间

str 转换为datetime

 

 

from datetime import datetime
cday = datetime.strptime('2015-6-1 18:19:59','%Y-%m-%d %H:%M:%S')

 

 

datetime 转换为 str

 

from datetime import datetime
now = datetime.now()
datatime.strftime('%a,%b %d %H:%M')

datetime 加减

 

 

from datetime import datetime, timedelta
now = datetime.now()
now + timedelta(hours=10)
now - timedelta(days=1)
now + timedelta(days=2, hours=12)

本地时间转换为UTC时间

 

 

from datetime import datetime, timedelta, timezone
tz_utc_8 = timezone(timedelta(hours=8)) # 创建时区UTC+8:00
now = datetime.now()
dt = now.replace(tzinfo=tz_utc_8) # 强制设置为UTC+8:00

时区转换

 

 

# 拿到UTC时间,并强制设置时区为UTC+0:00:
utc_dt = datetime.utcnow().replace(tzinfo=timezone.utc)
# astimezone()将转换时区为北京时间:
bj_dt = utc_dt.astimezone(timezone(timedelta(hours=8)))
# astimezone()将转换时区为东京时间:
tokyo_dt = utc_dt.astimezone(timezone(timedelta(hours=9)))
# astimezone()将bj_dt转换时区为东京时间:
tokyo_dt2 = bj_dt.astimezone(timezone(timedelta(hours=9)))

2.collections

 

namedtuple

 

from collections import nametuple
Point = nametuple('Point',['x','y'])
p = Point(1,2)
p.x
p.y

deque(按顺序访问元素)

 

 

from collections import deque
q = deque(['a','b','c'])

defaultdict (如果key不存在,就会抛出默认值)

 

 

from collections import defaultdict
dd = defaultdict(lambda:'N/A')

OrderedDict(保持key的顺序)

 

 

from collections import OrderedDict
od = OrderedDict([('a',1),('b',2),('c',3)])

 

 

Counter (简单的计数器)

 

from collections import Counter
c = Counter()
for ch in 'programming':
    c[ch]+=1

3.base64:(用64个字符来表示任意二进制数据的方法)

 

 

import base64
base64.b64encode(b'...')
base64.b64decode(b'...')
base64.urlsafe_b64encode(b'...')
base64.urlsafe_b64decode(b'...')

 

 

4.摘要算法:hashlib

 

import hashlib

md5 = hashlib.md5()
md5.update('how to use md5 in python hashlib?'.encode('utf-8'))
print(md5.hexdigest())

5.itertools

 

 

import itertools
natuals = itertools.count(1)
for n in natuals:
   print(n)

count()会创建一个无限的迭代器

 

cycle()会把传入的一个序列无限重复下去

repeat()负责把一个元素无限重复下去

chain()可以把一组迭代对象串联起来,形成一个更大的迭代对象

groupby()把迭代器中相邻的重复元素挑出来放在一起

 

6.urllib

Get

 

from urllib import request

with request.urlopen('https://...') as f:
	data = f.read()
	for k,v in f.getheaders():
		print(k,v)
	print(data.decode('utf-8'))

模拟浏览器发送GET请求

 

 

from urllib import request

req = request.Request('http://www.douban.com/')
req.add_header('User-Agent', 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25')
with request.urlopen(req) as f:
    print('Status:', f.status, f.reason)
    for k, v in f.getheaders():
        print('%s: %s' % (k, v))
    print('Data:', f.read().decode('utf-8'))

 

 

POST

 

from urllib import request, parse

print('Login to weibo.cn...')
email = input('Email: ')
passwd = input('Password: ')
login_data = parse.urlencode([
    ('username', email),
    ('password', passwd),
    ('entry', 'mweibo'),
    ('client_id', ''),
    ('savestate', '1'),
    ('ec', ''),
    ('pagerefer', 'https://passport.weibo.cn/signin/welcome?entry=mweibo&r=http%3A%2F%2Fm.weibo.cn%2F')
])

req = request.Request('https://passport.weibo.cn/sso/login')
req.add_header('Origin', 'https://passport.weibo.cn')
req.add_header('User-Agent', 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25')
req.add_header('Referer', 'https://passport.weibo.cn/signin/login?entry=mweibo&res=wel&wm=3349&r=http%3A%2F%2Fm.weibo.cn%2F')

with request.urlopen(req, data=login_data.encode('utf-8')) as f:
    print('Status:', f.status, f.reason)
    for k, v in f.getheaders():
        print('%s: %s' % (k, v))
    print('Data:', f.read().decode('utf-8'))

Handler:通过代理去访问网站

 

 

proxy_handler = urllib.request.ProxyHandler({'http': 'http://www.example.com:3128/'})
proxy_auth_handler = urllib.request.ProxyBasicAuthHandler()
proxy_auth_handler.add_password('realm', 'host', 'username', 'password')
opener = urllib.request.build_opener(proxy_handler, proxy_auth_handler)
with opener.open('http://www.example.com/login.html') as f:
    pass

7.XML

 

3个事件:

<a href = "/">python</a>
start_element(self,name,attrs),读取<a href = "/">时
char_data(self,text),读取python
end_element(self,name),读取</a>时

 

from xml.parsers.expat import ParserCreate
handler = DefaultSaxHandler()
parser = ParserCreate()
parser.StartElementHandler = handler.start_element
parser.EndElementHandler = handler.end_element
parser.CharacterDataHandler = handler.char_data
parser.Parse(xml)

 

8.HTMLParser

 

from html.parser import HTMLParser
from html.entities import name2codepoint

class MyHTMLParser(HTMLParser):

    def handle_starttag(self, tag, attrs):
        print('<%s>' % tag)

    def handle_endtag(self, tag):
        print('</%s>' % tag)

    def handle_startendtag(self, tag, attrs):
        print('<%s/>' % tag)

    def handle_data(self, data):
        print(data)

    def handle_comment(self, data):
        print('<!--', data, '-->')

    def handle_entityref(self, name):
        print('&%s;' % name)

    def handle_charref(self, name):
        print('&#%s;' % name)

parser = MyHTMLParser()
parser.feed('''<html>
<head></head>
<body>
<!-- test html parser -->
    <p>Some <a href=\"#\">html</a> HTML tutorial...<br>END</p>
</body></html>''')

 

 

9 Hacker News API

import requests  from operator 
import itemgetter  # 执行API调用并存储响应 
url = 'https://hacker-news.firebaseio.com/v0/topstories.json'  
r = requests.get(url)  
print("Status code:", r.status_code)
# 处理有关每篇文章的信息 
submission_ids = r.json() 
submission_dicts = []  
for submission_id in submission_ids[:30]:
	# 对于每篇文章,都执行一个API调用
	url = ('https://hacker-news.firebaseio.com/v0/item/' + str(submission_id) + '.json')
	submission_r = requests.get(url)      
	print(submission_r.status_code)
	response_dict = submission_r.json()
	submission_dict = {'title': response_dict['title'],
	'link': 'http://news.ycombinator.com/item?id=' + str(submission_id),
	'comments': response_dict.get('descendants', 0)
	}
submission_dicts.append(submission_dict)submission_dicts = sorted(submission_dicts, key=itemgetter('comments'), reverse=True)
 for submission_dict in submission_dicts:
 	print("\nTitle:", submission_dict['title'])
 	print("Discussion link:", submission_dict['link'])
 	print("Comments:", submission_dict['comments'])