pandas+pyecharts实战1(附代码)
- 导入模块
- 数据准备&预处理
- 数据可视化
- 1、评分等级分布
- 2、每日评论量
- 3、每小时评论量
- 4、一周各天评论量
- 5、角色热度
- 6、观众地域分布
导入模块
# 导入模块
import pandas as pd
import numpy as np
from collections import Counter
from pyecharts.charts import Geo,Bar,Line,Pie,Timeline
from pyecharts import options as opts
from pyecharts.globals import ThemeType,ChartType
数据准备&预处理
df = pd.read_excel(path)
df.info()
# 填充空白值
df['用户名'].fillna('未知',inplace = True)
df.isnull().sum()
数据集展示如下:
数据可视化
1、评分等级分布
# 评分等级分布
df_star = df.groupby(by = '评分').count()['id']
df_star = df_star.sort_values(ascending = True)
x_data = [str(i) for i in list(df_star.index)]
y_data = df_star.values.tolist()
b1 = (
Bar(init_opts = opts.InitOpts(theme = ThemeType.LIGHT))
.add_xaxis(x_data)
.add_yaxis('',y_data)
.reversal_axis()
.set_series_opts(label_opts = opts.LabelOpts(position = 'right'))
.set_global_opts(
yaxis_opts = opts.AxisOpts(name = '评分等级'),
xaxis_opts = opts.AxisOpts(name = '人/次'),
title_opts = opts.TitleOpts(title = '评分等级分布',pos_left = '45%',pos_top = '5%'),
legend_opts = opts.LegendOpts(type_ = "scroll", pos_left = "85%",pos_top = "28%",orient = "vertical")
)
)
df_star = df.groupby('评分').count()['id']
x_data = [str(i) for i in list(df_star.index)]
y_data = df_star.values.tolist()
p1 = (
Pie(init_opts = opts.InitOpts(width = '800px', height = '600px'))
.add(
'',
[list(z) for z in zip(x_data, y_data)],
radius = ['10%', '30%'],
center = ['65%', '60%'],
label_opts = opts.LabelOpts(is_show = True),
)
.set_series_opts(label_opts = opts.LabelOpts(formatter='评分{b}: {d}%'),position="outside")
)
b1.overlap(p1)
b1.render_notebook()
2、每日评论量
# 每日评论量
df['评论日期'] = df['评论时间'].apply(lambda x:x[:10]).astype(str)
df_comment = df.groupby('评论日期').count()['id']
day_x_data = df_comment.index.tolist()
day_y_data = df_comment.values.tolist()
line1 = (
Line()
.add_xaxis(xaxis_data = day_x_data)
.add_yaxis(
series_name = "",
y_axis = day_y_data,
is_smooth = True,
symbol = 'circle',
symbol_size = 6
)
.set_global_opts(
title_opts=opts.TitleOpts(
title="八月每日评论量",
pos_top="5%",
pos_left="center",
title_textstyle_opts=opts.TextStyleOpts(color="#fff", font_size=16),
)
)
)
line1.render_notebook()
3、每小时评论量
# 每小时评论量
df['小时'] = df['评论时间'].apply(lambda x:x[11:13]).astype(str)
df_hour = df.groupby(by = ['评论日期','小时']).count()['id']
df_hour = pd.DataFrame(df_hour).reset_index().rename(columns={'id':'评论数'})
timeline = Timeline(init_opts=opts.InitOpts(width='1200px', height='600px'))
timeline.add_schema(axis_type='category', orient='horizontal', symbol='circle', symbol_size=10,
play_interval=3000, is_auto_play=True, is_loop_play=True, is_timeline_show=True,
control_position='left', pos_left='10%', pos_bottom='0%', width='800px', height='30px')
for i in sorted(list(set(df_hour['评论日期']))):
line = (
Line(init_opts=opts.InitOpts(width='1200px', height='600px'))
.add_xaxis([str(x) for x in range(0, 24)])
.add_yaxis('', y_axis=list(df_hour.loc[np.array(df_hour['评论日期'] == i), '评论数']))
.set_global_opts(
title_opts=opts.TitleOpts('每小时评论数', pos_left='50%'),
legend_opts=opts.LegendOpts(is_show=True, pos_top='50%', pos_right='0%', orient='vertical'))
)
timeline.add(chart=line, time_point=i)
timeline.render_notebook()
4、一周各天评论量
# 一周各天评论量
# !数据源待排序,按照周一周二的顺序来!
df['第几周'] = pd.to_datetime(df['评论日期']).dt.isocalendar().week
df['星期几'] = pd.to_datetime(df['评论日期']).dt.day_name()
df_week = df.groupby(by = ['第几周','星期几']).count()['id']
df_week = pd.DataFrame(df_week).reset_index().rename(columns={'id':'评论数'})
timeline2 = Timeline(init_opts=opts.InitOpts(width='1200px', height='600px'))
timeline2.add_schema(axis_type='category', orient='horizontal', symbol='circle', symbol_size=10,
play_interval=3000, is_auto_play=True, is_loop_play=True, is_timeline_show=True,
control_position='left', pos_left='10%', pos_bottom='0%', width='800px', height='30px')
for i in sorted(list(set(df['第几周']))):
line = (
Line(init_opts=opts.InitOpts(width='1200px', height='600px'))
.add_xaxis(list(df_week.loc[np.array(df_week['第几周'] == i), '星期几']))
.add_yaxis('', y_axis=list(df_week.loc[np.array(df_week['第几周'] == i), '评论数']))
.set_global_opts(
title_opts=opts.TitleOpts('一周各天评论数', pos_left='50%'),
legend_opts=opts.LegendOpts(is_show=True, pos_top='50%', pos_right='0%', orient='vertical'))
)
timeline2.add(chart=line, time_point=i)
timeline2.render_notebook()
5、角色热度
# 3.6 角色热度
roles=['小白','小青','许仙','法海','司马','孙姐','牛头帮主','蒙面男子','宝青坊主','书生']
content = ''.join([str(i) for i in list(df['评论'])])
roles_num = []
for role in roles:
role_count = content.count(role)
roles_num.append((role,role_count))
roles_num = pd.DataFrame(roles_num,columns=['名称','出现次数'])
roles_num = roles_num.sort_values(by = '出现次数',ascending = False)
x_data = roles_num['名称']
y_data = roles_num['出现次数']
b2 = (
Bar(init_opts = opts.InitOpts(theme = ThemeType.LIGHT))
.add_xaxis(list(x_data))
.add_yaxis('频次', list(y_data))
.set_global_opts(title_opts=opts.TitleOpts(title='影评角色频次分布',pos_top='2%',pos_left = 'center'),
legend_opts=opts.LegendOpts(is_show=False),
yaxis_opts=opts.AxisOpts(name="频次",name_location='middle',name_gap=50,name_textstyle_opts=opts.TextStyleOpts(font_size=16)))
)
b2.render_notebook()
6、观众地域分布
# 观众地域分布
cities = df['城市'].values.tolist()
data = Counter(cities).most_common(80)
geo = (
Geo(init_opts=opts.InitOpts(width="1000px", height="600px", bg_color="#404a59"))
.add_schema(maptype="china",
itemstyle_opts={
'normal': {
'shadowColor': 'rgba(0, 0, 0, .5)',
'shadowBlur': 5,
'shadowOffsetY': 0,
'shadowOffsetX': 0,
'borderColor': '#fff'
}
}
)
.add("评论数量", data,type_=ChartType.HEATMAP,)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(
title_opts=opts.TitleOpts(title="地理位置分布",pos_top="2%", pos_left="center",
title_textstyle_opts=opts.TextStyleOpts(color="#fff", font_size=16)),
legend_opts=opts.LegendOpts(is_show=False),
visualmap_opts=opts.VisualMapOpts(
is_show=True,
is_piecewise=True,
min_ = 0,
max_ = 500,
split_number = 5,
series_index=0,
pos_bottom='5%',
pos_left='5%',
textstyle_opts=opts.TextStyleOpts(color="#fff"),
pieces=[
{'max':500, 'min':401, 'label':'401-500', 'color': '#990000'},
{'max':400, 'min':301, 'label':'301-400', 'color': '#CD5C5C'},
{'max':300, 'min':201, 'label':'201-300', 'color': '#F08080'},
{'max':200, 'min':101, 'label':'101-200', 'color': '#FFCC99'},
{'max':100, 'min':0, 'label':'0-100', 'color': '#FFE4E1'},
],
),
)
)
geo.render_notebook()