文章目录

  • 0.前言
  • 1.百分比堆叠柱状图
  • 1.1 导入包,连接数据库
  • 1.2 查看数据
  • 1.3 数据预处理
  • 1.4 生成图表
  • 2.复合柱状图和折线图
  • 2.1 数据预处理
  • 2.2 生成图表
  • 3.竖直缩放条
  • 3.1 数据预处理
  • 3.2 生成图表
  • 4.标记点
  • 5.根据数据范围划分颜色
  • 6.渐变圆柱
  • 7.内外缩放
  • 8.随时间动态变化
  • 8.1 数据预处理
  • 8.2 生成图表


0.前言

  • 该分析系列使用爬取到的网易云歌单数据,对所学的Excel、SQL、Pandas、Pyecharts等数据分析及数据可视化工具进行巩固。

1.百分比堆叠柱状图

1.1 导入包,连接数据库
import numpy as np
import pandas as pd
import pymysql
from pyecharts import options as opts
from pyecharts.charts import Bar, Line
from pyecharts.commons.utils import JsCode
from pyecharts.globals import ThemeType

conn = pymysql.connect(
    host = '127.0.0.1',
    user = 'root',
    password = '123',
    database = 'cloudmusic',
    charset = 'utf8'
)

df = pd.read_sql("select * from playlist", con=conn)
1.2 查看数据
df.head(5)

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据

df.shape

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_缩放_02


共有51203行数据,16个字段

1.3 数据预处理
# 根据type分组,对share_count和comment_count求平均值
type_group = df.groupby("type")['play_count', 'subscribed_count', 'share_count', 'comment_count'].mean().apply(lambda x: round(x, 2))

# 根据share_count降序排序,取前10名
sorted_type_group = type_group.sort_values(by='share_count', ascending=False)[:10]
sorted_type_group

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据_03


统计总数

share_count_sum = sorted_type_group["share_count"].sum()
comment_count_sum = sorted_type_group["comment_count"].sum()

print(share_count_sum, comment_count_sum)

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据预处理_04

xdata = sorted_type_group.index.tolist()

ydata1 = [{"value": y, "percent": round((y / share_count_sum)*100, 2)} for y in sorted_type_group['share_count'].tolist()]
ydata2 = [{"value": y, "percent": round((y / comment_count_sum)*100, 2)} for y in sorted_type_group['comment_count'].tolist()]

ydata1

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_缩放_05

1.4 生成图表
bar1 = (
    Bar(init_opts=opts.InitOpts(width="1000px"))
    .add_xaxis(xdata)
    .add_yaxis("转发量", ydata1, stack="stack1", category_gap="50%", color="#009db2")
    .add_yaxis("评论数", ydata2, stack="stack1", category_gap="50%", color="#f47a75")
    .set_global_opts(
        # 标题配置项
        title_opts = opts.TitleOpts(
            title = "各类型歌单的转发量和评论数",
            subtitle = "列举了前十名",
        ),
        # X轴配置项
        xaxis_opts = opts.AxisOpts(
            name = "类型"
        ),
        # Y轴配置项
        yaxis_opts = opts.AxisOpts(
            name = "数量",
            name_location = "center",
            name_gap = "40"
        ),
        # 区域选择组件
        brush_opts = opts.BrushOpts(),
        # 区域缩放配置项
        datazoom_opts = opts.DataZoomOpts(),
        # 工具箱组件
        toolbox_opts = opts.ToolboxOpts(),
    )
    .set_series_opts(
        # 标签配置项
        label_opts = opts.LabelOpts(
            position = "right",
            formatter = JsCode(
                "function(x){return Number(x.data.percent).toFixed() + '%';}"
            )
        )
        
    )
)

bar1.render_notebook()

图表特色:

  1. X轴缩放
  2. 可框选区域查看数据
  3. 可转为折线图和并排柱状图

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据预处理_06


android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据预处理_07


android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_缩放_08


android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_缩放_09

2.复合柱状图和折线图

2.1 数据预处理
type_group.head(10)

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据预处理_10

type_group10 = type_group[:10]
2.2 生成图表
bar2 = (
    Bar(init_opts=opts.InitOpts(width="1000px"))
    .add_xaxis(xaxis_data=type_group10.index.tolist())
    .add_yaxis(
        series_name = "平均转发量",
        yaxis_data = type_group10['share_count'].tolist(),
        color="#009db2",
        label_opts = opts.LabelOpts(is_show=False)
    )
    .add_yaxis(
        series_name = "平均评论数",
        yaxis_data = type_group10['comment_count'].tolist(),
        color="#f47a75",
        label_opts = opts.LabelOpts(is_show=False)
    )
    .extend_axis(
        yaxis = opts.AxisOpts(
            name = "收藏量",
            type_ = "value",
            axislabel_opts = opts.LabelOpts(
                formatter = JsCode(
                "function(y){return Number(y / 1000) + 'K';}"
                ),
            ),
            axisline_opts = opts.AxisLineOpts(linestyle_opts=opts.LineStyleOpts(color="#e75840")),
        )
    )
    .set_global_opts(
        title_opts = opts.TitleOpts(
            title = "复合柱状图与折线图"
        ),
        tooltip_opts = opts.TooltipOpts(
            is_show = True,
            trigger = "axis",
            axis_pointer_type = "cross"
        ),
        xaxis_opts = opts.AxisOpts(
            type_ = "category",
            axislabel_opts = {'interval': '0'},
            axispointer_opts = opts.AxisPointerOpts(is_show=True, type_="shadow"),
        ),
        yaxis_opts = opts.AxisOpts(
            name = "转发量与评论数",
            type_ = "value",
            min_ = 0,
            max_ = 400,
            interval = 100,
            axisline_opts = opts.AxisLineOpts(linestyle_opts=opts.LineStyleOpts(color="#024b51")),
            axislabel_opts = opts.AxisTickOpts(is_show=True),
            splitline_opts = opts.SplitLineOpts(is_show=True)
        )
    )
)

line2 = (
    Line()
    .add_xaxis(xaxis_data=type_group10.index.tolist())
    .add_yaxis(
        series_name = "平均收藏量",
        yaxis_index = 1,
        y_axis = type_group10['subscribed_count'].tolist(),
        linestyle_opts = opts.LineStyleOpts(color="#e75840", width=1),
        z = 10,
        label_opts = opts.LabelOpts(color="#e75840", is_show=True)
    )
)

图表特色:

  1. 同时展示柱状图和折线图
  2. 增加双轴,且轴颜色不同
  3. 鼠标在图表区内移动时,X、Y轴上有定位,鼠标放置点有信息显示

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据预处理_11

3.竖直缩放条

3.1 数据预处理
type_group30 = type_group[:30].apply(lambda x: x.astype(int))
type_group30.head()

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_缩放_12

3.2 生成图表
bar3 = (
    Bar()
    .add_xaxis(xaxis_data=type_group30.index.tolist())
    .add_yaxis(
        series_name = "平均转发量",
        yaxis_data = type_group30['share_count'].tolist(),
        color="#009db2",
    )
    .set_global_opts(
        title_opts = opts.TitleOpts(
            title = "垂直数据缩放条",
        ),
        datazoom_opts = opts.DataZoomOpts(orient="vertical"),
    )
    
)

图表特色:

  1. 可拉动垂直缩放条选取数据范围

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据_13

4.标记点

bar4 = (
    Bar()
    .add_xaxis(xaxis_data=type_group30.index.tolist())
    .add_yaxis(
        series_name = "平均转发量",
        yaxis_data = type_group30['share_count'].tolist(),
        color="#009db2",
        
    )
    .set_global_opts(
        title_opts = opts.TitleOpts(
            title = "最大最小平均值标记",
        )
    )
    .set_series_opts(
        label_opts = opts.LabelOpts(is_show=False),
        markpoint_opts = opts.MarkPointOpts(
            data = [
                opts.MarkPointItem(type_ = "max", name="最大值"),
                opts.MarkPointItem(type_ = "min", name="最小值"),
                opts.MarkPointItem(type_ = "average", name="平均值"),
            ]
        ),
    )
)

图表特色:

  1. 标记了最大值、最小值和平均值的位置

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_缩放_14

5.根据数据范围划分颜色

color_function = """
        function (params) {
            if (params.value > 0 && params.value <= 100) {
                return '#71c16f';
            } else if (params.value > 100 && params.value <= 200 ) {
                return '#f7af59';
            }
            return '#f06464';
        }
        """

bar5 = (
    Bar()
    .add_xaxis(xaxis_data=type_group30.index.tolist())
    .add_yaxis(
        series_name = "平均转发量",
        yaxis_data = type_group30['share_count'].tolist(),
        itemstyle_opts = opts.ItemStyleOpts(color=JsCode(color_function)),
    )
)

图表特色:

  1. 根据数值的高中低显示不同的柱子颜色

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_缩放_15

6.渐变圆柱

bar6 = (
    Bar()
    .add_xaxis(xaxis_data=type_group10.index.tolist())
    .add_yaxis("平均转发量", yaxis_data=type_group10["share_count"].tolist(), category_gap="50%")
    .set_global_opts(
        title_opts = opts.TitleOpts(
            title = "渐变圆柱",
        ),
        xaxis_opts = opts.AxisOpts(
            type_ = "category",
            axislabel_opts = {'interval': '0'},
        ),
    )
    .set_series_opts(
        itemstyle_opts={
            "normal": {
                "color": JsCode(
                    """new echarts.graphic.LinearGradient(0,0,0,1,
                    [{offset: 0, color: '#0780cf'},
                     {offset: 1, color: '#47aee3'}
                    ],
                    false)"""
                ),
                "barBorderRadius": [30, 30, 30, 30],
                #"shadowColor": "#009db2",
            }
        }
    )
)

图表特色:

  1. 柱子为圆角柱子
  2. 颜色渐变

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据预处理_16

7.内外缩放

bar7 = (
    Bar(
        init_opts = opts.InitOpts(
            animation_opts = opts.AnimationOpts(
                animation_delay=1000, animation_easing="elasticOut"
            )
        )
    )
    .add_xaxis(xaxis_data=type_group30.index.tolist())
    .add_yaxis("平均转发量",yaxis_data=type_group30['share_count'].tolist(), color="#009db2")
    .set_global_opts(
        title_opts = opts.TitleOpts(title="内外缩放"),
        datazoom_opts = [opts.DataZoomOpts(), opts.DataZoomOpts(type_="inside")],
    )
)

图表特色:

  1. 在内部滑动鼠标滚轮可以缩放数据
  2. 在外部拖动滑动条也可以缩放数据

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_缩放_17

8.随时间动态变化

8.1 数据预处理

(1)替换省份字段信息

根据年份和省份对数据进行分组,并规范省份的名称

import re
def replace_str(x):
    rep_list = ['省', '市', '维吾尔','自治区', '壮族', '回族', '维吾尔族', '特别行政区']
    for rep in rep_list:
        x = re.sub(rep, '', x)
    return x

time_df = df.groupby([df['create_time'].str[:4], df['province'].apply(replace_str)]).sum()
time_df

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_缩放_18


(2)重建索引

re_time_df = time_df.reset_index()
re_time_df

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据_19


(3)获取所有省份

province = re_time_df['province'].drop_duplicates().tolist()
province

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_缩放_20


对各年度的省份数据进行计数,发现2013年和2014年有缺失数据

re_time_df['create_time'].value_counts()

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据预处理_21


(4)处理缺失数据

def add_province(df_data):
    # 所有年份
    years = df_data['create_time'].drop_duplicates().tolist()
    for year in years:
        # 每年的省份
        new_province = df_data.loc[df_data['create_time']==year,:]['province'].drop_duplicates().tolist()
        # 缺失的省份 = 所有省份 - 每年的省份
        rest_province = [x for x in province if x not in new_province]
        # 对缺失的省份生成一个DataFrame,填充0值,并与原DataFrame合并
        if len(rest_province):
            rest_df = pd.DataFrame([[year,x,0,0,0,0] for x in rest_province], columns=df_data.columns)
            df_data = pd.concat([df_data, rest_df], ignore_index=True)
    
    return df_data

re_time_df2 = add_province(re_time_df)
re_time_df2

已填充缺失数据

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据_22


我们也可以分步来做这个过程

先处理2013年

new_province2013 = re_time_df.loc[re_time_df['create_time']=='2013',:]['province'].drop_duplicates().tolist()
rest_province2013 = [x for x in province if x not in new_province2013]
rest_province2013

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据_23

rest_df2013 = pd.DataFrame([['2013',x,0,0,0,0] for x in rest_province2013], columns=re_time_df.columns)
rest_df2013

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据_24

re_time_df1 = pd.concat([re_time_df, rest_df2013], ignore_index=True)

再处理2014年

new_province2014 = re_time_df.loc[re_time_df1['create_time']=='2014',:]['province'].drop_duplicates().tolist()
rest_province2014 = [x for x in province if x not in new_province2014]

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据预处理_25

rest_df2014 = pd.DataFrame([['2014',x,0,0,0,0] for x in rest_province2014], columns=re_time_df.columns)
rest_df2014

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_缩放_26

re_time_df2 = pd.concat([re_time_df1, rest_df2014], ignore_index=True)

(5)重建索引,得到最后数据

final_time_df = re_time_df2.sort_values(by=['create_time', 'province']).reset_index(drop=True)

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_缩放_27

(6)提取图表所需数据

省份

final_province = final_time_df['province'].drop_duplicates().tolist()

年份

final_year = final_time_df['create_time'].drop_duplicates().tolist()

播放量

# 播放量
data_play_count = {}
for year in final_year:
    data_play_count[year] = final_time_df.loc[final_time_df['create_time']==year, 'play_count'].tolist()

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据_28


收藏量

# 收藏量
data_subscribed_count = {}
for year in final_year:
    data_subscribed_count[year] = final_time_df.loc[final_time_df['create_time']==year, 'subscribed_count'].tolist()

转发量

# 转发量
data_share_count = {}
for year in final_year:
    data_share_count[year] = final_time_df.loc[final_time_df['create_time']==year, 'share_count'].tolist()

评论数

# 评论数
data_comment_count = {}
for year in final_year:
    data_comment_count[year] = final_time_df.loc[final_time_df['create_time']==year, 'comment_count'].tolist()

汇总到一个字典中

total_data = {}

def format_data(data: dict) -> dict:
    for year in final_year:
        max_data, sum_data = 0, 0
        temp = data[year]
        max_data = max(temp)
        for i in range(len(temp)):
            sum_data += temp[i]
            data[year][i] = {"name": final_province[i], "value": temp[i]}
        data[year + "max"] = int(max_data / 100) * 100
        data[year + "sum"] = sum_data
    return data
total_data['play_count'] = format_data(data=data_play_count)
total_data['subscribed_count'] = format_data(data=data_subscribed_count)
total_data['share_count'] = format_data(data=data_share_count)
total_data['comment_count'] = format_data(data=data_comment_count)

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据_29


android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_缩放_30

8.2 生成图表
from pyecharts.charts import Timeline, Pie
def get_year_overlap_chart(year: str) -> Bar:
    bar = (
        Bar()
        .add_xaxis(xaxis_data=final_province)
        .add_yaxis(
            series_name = "播放量",
            yaxis_data = total_data['play_count'][year],
            is_selected = False,
            label_opts = opts.LabelOpts(is_show=False),
        )
        .add_yaxis(
            series_name = "收藏量",
            yaxis_data = total_data['subscribed_count'][year],
            is_selected = False,
            label_opts = opts.LabelOpts(is_show=False),
        )
        .add_yaxis(
            series_name = "转发量",
            yaxis_data = total_data['share_count'][year],
            label_opts = opts.LabelOpts(is_show=False),
        )
        .add_yaxis(
            series_name = "评论数",
            yaxis_data = total_data['comment_count'][year],
            label_opts = opts.LabelOpts(is_show=False),
        )
        .set_global_opts(
            title_opts = opts.TitleOpts(
                title = "{}年网易云音乐热门歌单数据".format(year),
                subtitle = "数据来源于网易云音乐"
            ),
            tooltip_opts = opts.TooltipOpts(
                is_show = True, trigger = "axis", axis_pointer_type = "shadow"
            ),
        )
    )
    
    pie = (
        Pie()
        .add(
            series_name = "收藏量/转发量/评论数占比",
            data_pair = [
                ["转发量", total_data["share_count"]["{}sum".format(year)]],
                ["评论数", total_data["comment_count"]["{}sum".format(year)]],
            ],
            center = ["80%", "30%"],
            radius = ["14%", "28%"],
            color = ["#f47a75", "#009db2"]
        )
        .set_series_opts(tooltip_opts=opts.TooltipOpts(is_show=True, trigger="item"))
        .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
        .set_colors(["#f47a75", "#009db2"])
    )
    return bar.overlap(pie)

# 生成时间轴
timeline = Timeline(init_opts=opts.InitOpts(width="1000px"))

for y in final_year:
    timeline.add(get_year_overlap_chart(year=y), time_point=y)
    

timeline.add_schema(is_auto_play=True, play_interval=2000)

图表特色:

  1. 可随时间变化动态的显示图表
  2. 不同的年份对应不同的标题
  3. 可以在几种数据中选择展示
  4. 圆环图与柱状图联动,柱状图变化时,圆环图也相应变化

android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_缩放_31


android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据预处理_32


android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据预处理_33


android mpchart 堆叠柱状图设置标签数据 pyecharts堆叠柱状图_数据预处理_34