拿到数据集咱们先看一眼都有哪些数据
一个出行对航空公司的评价数据,里面包含了各种各样的数据(我看不懂先翻译会)。
懵逼。在这个数据集里头有挺多非数字类型的数据,比如性别,满意度,是否为该航班的常客,旅行类型以及选座的机舱位置。我们可以对将性别中的男改成1,女改成0其他的也以此修改下去。
先来分析满意度情况,需要调查满意和不满意各个机舱类型的数量之间的关系。
def Class_satisfaction(self):
'''Class_satisfaction Grid'''
df = self.df
satisfied = df[df['satisfaction'].isin(['satisfied'])].Class.value_counts() # 满意
dissatisfied = df[df['satisfaction'].isin(['dissatisfied'])].Class.value_counts() # 不满意
sat_number = satisfied.values.tolist()
dis_number = dissatisfied.values.tolist()
print(sat_number, dis_number, sep='\n')
接着可视化的部分,想法是在同一页面上弄两张图,一左一右水平放置,一个柱状图分析各个舱位满意和不满意的对比情况,一个饼图分析满意以及不满意的占比情况。
def Class_satisfaction(self):
'''Class_satisfaction Grid'''
df = self.df
total = df.satisfaction.value_counts() # 总数
satisfied = df[df['satisfaction'].isin(['satisfied'])].Class.value_counts() # 满意
dissatisfied = df[df['satisfaction'].isin(['dissatisfied'])].Class.value_counts() # 不满意
print(total, satisfied, dissatisfied, sep='\n')
bar = (
Bar(init_opts=opts.InitOpts(theme=ThemeType.WESTEROS, width='750px', height='570px')
)
.add_xaxis(satisfied.index.tolist())
.add_yaxis('满意', satisfied.values.tolist(), stack='stack1', category_gap='45%')
.add_yaxis('不满意', dissatisfied.values.tolist(), stack='stack1', category_gap='45%')
.set_global_opts(title_opts=opts.TitleOpts(title='不同舱位满意度情况', pos_right='5%'), # 标题位置
xaxis_opts=opts.AxisOpts(axispointer_opts=opts.AxisPointerOpts(is_show=False) # x轴引导线
),
yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True), # 背景分割线
axistick_opts=opts.AxisTickOpts(is_show=True) # y轴刻度线
),
legend_opts=opts.LegendOpts(pos_right='20%') # 标签位置
)
.set_series_opts(label_opts=opts.LabelOpts(is_show=True, position='inside')
)
)
pie = (
Pie(init_opts=opts.InitOpts(theme=ThemeType.WESTEROS)
)
.add('数量', [list(mtype) for mtype in zip(total.index.tolist(), total.values.tolist())],
radius=['35%', '65%'],
center=['22%', '55%'], # 饼图位置
)
.set_global_opts(legend_opts=opts.LegendOpts(pos_left='20%'), # 标签位置
title_opts=opts.TitleOpts(title='满意度总体情况')
)
.set_series_opts(label_opts=opts.LabelOpts(formatter='{b}: {d}% \n {c}')
)
)
grid = Grid(init_opts=opts.InitOpts(theme=ThemeType.WESTEROS, width='1600px')
)
grid.add(bar, grid_opts=opts.GridOpts(pos_left='55%')
)
grid.add(pie, grid_opts=opts.GridOpts(pos_right='55%')
)
grid.render('Class_satisfaction.html')
大概的效果
接着还能分析啥呢?
该数据集内座椅舒适度,饮食等评分都是在0~5之间进行评定,我们可以求出不同舱位的各个平均分,然后使用面积折线图以及雷达图来进行可视化展示。
均值处理的方式就很多了,这里我们用到pandas里的agg聚合函数用来计算mean均值,使用T将表格的行列倒置
Eco_Class_mean = df[df['Class'].isin(['Eco'])].groupby('Class').agg({'Seat comfort': 'mean', 'Departure/Arrival time convenient': 'mean', 'Food and drink': 'mean', 'Gate location': 'mean', 'Inflight wifi service': 'mean', 'Inflight entertainment': 'mean', 'Online support': 'mean', 'Ease of Online booking': 'mean', 'Cleanliness': 'mean'}).round(decimals=2).T # Eco各类均值
EcoPlus_Class_mean = df[df['Class'].isin(['Eco Plus'])].groupby('Class').agg({'Seat comfort': 'mean', 'Departure/Arrival time convenient': 'mean', 'Food and drink': 'mean', 'Gate location': 'mean', 'Inflight wifi service': 'mean', 'Inflight entertainment': 'mean', 'Online support': 'mean', 'Ease of Online booking': 'mean', 'Cleanliness': 'mean'}).round(decimals=2).T # Eco Plus各类均值
Business_Class_mean = df[df['Class'].isin(['Business'])].groupby('Class').agg({'Seat comfort': 'mean', 'Departure/Arrival time convenient': 'mean', 'Food and drink': 'mean', 'Gate location': 'mean', 'Inflight wifi service': 'mean', 'Inflight entertainment': 'mean', 'Online support': 'mean', 'Ease of Online booking': 'mean', 'Cleanliness': 'mean'}).round(decimals=2).T # Business各类均值
print(Eco_Class_mean, EcoPlus_Class_mean, Business_Class_mean, sep='\n')
数据处理完就可以弄可视化部分了,咱们需要弄的是面积折线图和雷达图,分别为Line 和 Rader。
def All_kinds_average_Rader(self):
'''All kinds of evaluations Rader'''
df = self.df
Eco_Class_mean = df[df['Class'].isin(['Eco'])].groupby('Class').agg( {'Seat comfort': 'mean', 'Departure/Arrival time convenient': 'mean', 'Food and drink': 'mean', 'Gate location': 'mean', 'Inflight wifi service': 'mean', 'Inflight entertainment': 'mean', 'Online support': 'mean', 'Ease of Online booking': 'mean', 'Cleanliness': 'mean', 'Baggage handling': 'mean', 'On-board service': 'mean'}).round(decimals=2).T # Eco各类均值
EcoPlus_Class_mean = df[df['Class'].isin(['Eco Plus'])].groupby('Class').agg( {'Seat comfort': 'mean', 'Departure/Arrival time convenient': 'mean', 'Food and drink': 'mean', 'Gate location': 'mean', 'Inflight wifi service': 'mean', 'Inflight entertainment': 'mean', 'Online support': 'mean', 'Ease of Online booking': 'mean', 'Cleanliness': 'mean', 'Baggage handling': 'mean', 'On-board service': 'mean'}).round(decimals=2).T # Eco Plus各类均值
Business_Class_mean = df[df['Class'].isin(['Business'])].groupby('Class').agg( {'Seat comfort': 'mean', 'Departure/Arrival time convenient': 'mean', 'Food and drink': 'mean', 'Gate location': 'mean', 'Inflight wifi service': 'mean', 'Inflight entertainment': 'mean', 'Online support': 'mean', 'Ease of Online booking': 'mean', 'Cleanliness': 'mean', 'Baggage handling': 'mean', 'On-board service': 'mean'}).round(decimals=2).T # Business各类均值
print(Eco_Class_mean, EcoPlus_Class_mean, Business_Class_mean, sep='\n')
rader = (
Radar(init_opts=opts.InitOpts(theme=ThemeType.WESTEROS, width='1200px', height='550px')
)
.add_schema(schema=[opts.RadarIndicatorItem(name=rader_x, max_=max(Business_Class_mean.values.tolist())) for rader_x in Eco_Class_mean.index.tolist()])
.add(series_name='Eco',
data=[Eco_Class_mean.values.tolist()], # data必须为二维数组
areastyle_opts=opts.AreaStyleOpts(opacity=0.1,# 填充颜色
color='blue'
)
)
.add(series_name='Eco Plus',
data=[EcoPlus_Class_mean.values.tolist()], # data必须为二维数组
areastyle_opts=opts.AreaStyleOpts(opacity=0.2, # 填充颜色
color='blue'
)
)
.add(series_name='Business',
data=[Business_Class_mean.values.tolist()], # data必须为二维数组
areastyle_opts=opts.AreaStyleOpts(opacity=0.3, # 填充颜色
color='blue'
)
)
.set_global_opts(title_opts=opts.TitleOpts(title='评分均值对比')
)
)
return rader
弄完了最后使用page整合到同一页上,整体代码如下。
# -*- coding: utf-8 -*-
# @Time : 2022/7/1 11:48
# @Author : IcelandPq
# @FileName: Airline.py
'''
AnalysisAssigned AnalysisFree
satisfaction 满意情况
Gender 性别
Customer Type 顾客类型
Age 年龄
Type of Travel 旅行类型
Class 机舱类型 eco(经济舱) Bus(商务仓)
Flight Distance 飞行距离
Seat comfort 座椅舒适度
Departure/Arrival time convenient 离开/到达的时间
Food and drink 饮食
Gate location 大门位置
Inflight wifi service 机上wifi服务
Inflight entertainment 机上娱乐
Online support 在线支持?
Ease of Online booking 轻松在线预定
On-board service 机上服务
Leg room service 客房服务
Baggage handling 行李处理
Checkin service 值机服务
Cleanliness 清洁度
Online boarding 网上登机
Departure Delay in Minutes 出发延误
Arrival Delay in Minutes 到达延误
'''
import pandas as pd
from pyecharts.charts import Bar, Pie, Radar, Line
from pyecharts import options as opts
from pyecharts.globals import ThemeType
from pyecharts.charts import Grid, Page
class AnalysisAssigned():
'''Invistico_Airline visualization'''
def __init__(self):
'''初始化'''
self.df = pd.read_csv('D:\Python\Py爬虫\爬爬爬爬\航空满意度数据分析\数据\Invistico_Airline.csv')
self.df = self.df.dropna(axis=0) # 删除缺失值所在的行
def Class_satisfaction(self):
'''Class_satisfaction Grid'''
df = self.df
total = df.satisfaction.value_counts() # 总数
satisfied = df[df['satisfaction'].isin(['satisfied'])].Class.value_counts() # 满意
dissatisfied = df[df['satisfaction'].isin(['dissatisfied'])].Class.value_counts() # 不满意
print(total, satisfied, dissatisfied, sep='\n')
bar = (
Bar(init_opts=opts.InitOpts(theme=ThemeType.WESTEROS, width='750px', height='570px')
)
.add_xaxis(satisfied.index.tolist())
.add_yaxis('满意', satisfied.values.tolist(), stack='stack1', category_gap='45%')
.add_yaxis('不满意', dissatisfied.values.tolist(), stack='stack1', category_gap='45%')
.set_global_opts(title_opts=opts.TitleOpts(title='不同舱位满意度情况', pos_right='5%'), # 标题位置
xaxis_opts=opts.AxisOpts(axispointer_opts=opts.AxisPointerOpts(is_show=False) # x轴引导线
),
yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True), # 背景分割线
axistick_opts=opts.AxisTickOpts(is_show=True) # y轴刻度线
),
legend_opts=opts.LegendOpts(pos_right='20%') # 标签位置
)
.set_series_opts(label_opts=opts.LabelOpts(is_show=True, position='inside')
)
)
pie = (
Pie(init_opts=opts.InitOpts(theme=ThemeType.WESTEROS)
)
.add('数量', [list(mtype) for mtype in zip(total.index.tolist(), total.values.tolist())],
radius=['35%', '65%'],
center=['22%', '55%'], # 饼图位置
)
.set_global_opts(legend_opts=opts.LegendOpts(pos_left='20%'), # 标签位置
title_opts=opts.TitleOpts(title='总体满意度情况')
)
.set_series_opts(label_opts=opts.LabelOpts(formatter='{b}: {d}% \n{c}')
)
)
grid = Grid(init_opts=opts.InitOpts(theme=ThemeType.WESTEROS, width='1600px')
)
grid.add(bar, grid_opts=opts.GridOpts(pos_left='55%')
)
grid.add(pie, grid_opts=opts.GridOpts(pos_right='55%')
)
print('Class_satisfaction Success')
return grid
def All_kinds_average(self):
'''All kinds of evaluations'''
df = self.df
Eco_Class_mean = df[df['Class'].isin(['Eco'])].groupby('Class').agg({'Seat comfort': 'mean', 'Departure/Arrival time convenient': 'mean', 'Food and drink': 'mean', 'Gate location': 'mean', 'Inflight wifi service': 'mean', 'Inflight entertainment': 'mean', 'Online support': 'mean', 'Ease of Online booking': 'mean', 'Cleanliness': 'mean', 'Baggage handling': 'mean', 'On-board service': 'mean'}).round(decimals=2).T # Eco各类均值
EcoPlus_Class_mean = df[df['Class'].isin(['Eco Plus'])].groupby('Class').agg({'Seat comfort': 'mean', 'Departure/Arrival time convenient': 'mean', 'Food and drink': 'mean', 'Gate location': 'mean', 'Inflight wifi service': 'mean', 'Inflight entertainment': 'mean', 'Online support': 'mean', 'Ease of Online booking': 'mean', 'Cleanliness': 'mean', 'Baggage handling': 'mean', 'On-board service': 'mean'}).round(decimals=2).T # Eco Plus各类均值
Business_Class_mean = df[df['Class'].isin(['Business'])].groupby('Class').agg({'Seat comfort': 'mean', 'Departure/Arrival time convenient': 'mean', 'Food and drink': 'mean', 'Gate location': 'mean', 'Inflight wifi service': 'mean', 'Inflight entertainment': 'mean', 'Online support': 'mean', 'Ease of Online booking': 'mean', 'Cleanliness': 'mean', 'Baggage handling': 'mean', 'On-board service': 'mean'}).round(decimals=2).T # Business各类均值
print(Eco_Class_mean, EcoPlus_Class_mean, Business_Class_mean, sep='\n')
line = (
Line(init_opts=opts.InitOpts(theme=ThemeType.WESTEROS, width='1200px', height='550px')
)
.add_xaxis(Eco_Class_mean.index.tolist())
.add_yaxis('Eco',
Eco_Class_mean.values.tolist(),
is_smooth=True,
is_symbol_show=True, # 是否显示结点 关闭后LabelOpts无效
areastyle_opts=opts.AreaStyleOpts(opacity=0.4), # 透明度
label_opts=opts.LabelOpts(is_show=True) # 是否显示数字
)
.add_yaxis('Eco Plus',
EcoPlus_Class_mean.values.tolist(),
is_smooth=True,
is_symbol_show=True, # 是否显示结点 关闭后LabelOpts无效
areastyle_opts=opts.AreaStyleOpts(opacity=0.4), # 透明度
label_opts=opts.LabelOpts(is_show=True) # 是否显示数字
)
.add_yaxis('Business',
Business_Class_mean.values.tolist(),
is_smooth=True,
is_symbol_show=True, # 是否显示结点 关闭后LabelOpts无效
areastyle_opts=opts.AreaStyleOpts(opacity=0.4), # 透明度
label_opts=opts.LabelOpts(is_show=True) # 是否显示数字
)
.set_global_opts(yaxis_opts=opts.AxisOpts(axistick_opts=opts.AxisTickOpts(is_show=True),
splitline_opts=opts.SplitLineOpts(is_show=True)
),
xaxis_opts=opts.AxisOpts(axispointer_opts=opts.AxisPointerOpts(is_show=True),
axislabel_opts={'rotate': '30'}
),
title_opts=opts.TitleOpts(title='不同舱位评分均值'),
legend_opts=opts.LegendOpts(pos_left='40%')
)
)
print('Line Success')
return line
def All_kinds_average_Rader(self):
'''All kinds of evaluations Rader'''
df = self.df
Eco_Class_mean = df[df['Class'].isin(['Eco'])].groupby('Class').agg( {'Seat comfort': 'mean', 'Departure/Arrival time convenient': 'mean', 'Food and drink': 'mean', 'Gate location': 'mean', 'Inflight wifi service': 'mean', 'Inflight entertainment': 'mean', 'Online support': 'mean', 'Ease of Online booking': 'mean', 'Cleanliness': 'mean', 'Baggage handling': 'mean', 'On-board service': 'mean'}).round(decimals=2).T # Eco各类均值
EcoPlus_Class_mean = df[df['Class'].isin(['Eco Plus'])].groupby('Class').agg( {'Seat comfort': 'mean', 'Departure/Arrival time convenient': 'mean', 'Food and drink': 'mean', 'Gate location': 'mean', 'Inflight wifi service': 'mean', 'Inflight entertainment': 'mean', 'Online support': 'mean', 'Ease of Online booking': 'mean', 'Cleanliness': 'mean', 'Baggage handling': 'mean', 'On-board service': 'mean'}).round(decimals=2).T # Eco Plus各类均值
Business_Class_mean = df[df['Class'].isin(['Business'])].groupby('Class').agg( {'Seat comfort': 'mean', 'Departure/Arrival time convenient': 'mean', 'Food and drink': 'mean', 'Gate location': 'mean', 'Inflight wifi service': 'mean', 'Inflight entertainment': 'mean', 'Online support': 'mean', 'Ease of Online booking': 'mean', 'Cleanliness': 'mean', 'Baggage handling': 'mean', 'On-board service': 'mean'}).round(decimals=2).T # Business各类均值
print(Eco_Class_mean, EcoPlus_Class_mean, Business_Class_mean, sep='\n')
rader = (
Radar(init_opts=opts.InitOpts(theme=ThemeType.WESTEROS, width='1200px', height='550px')
)
.add_schema(schema=[opts.RadarIndicatorItem(name=rader_x, max_=max(Business_Class_mean.values.tolist())) for rader_x in Eco_Class_mean.index.tolist()])
.add(series_name='Eco',
data=[Eco_Class_mean.values.tolist()], # data必须为二维数组
areastyle_opts=opts.AreaStyleOpts(opacity=0.1,# 填充颜色
color='blue'
)
)
.add(series_name='Eco Plus',
data=[EcoPlus_Class_mean.values.tolist()], # data必须为二维数组
areastyle_opts=opts.AreaStyleOpts(opacity=0.2, # 填充颜色
color='blue'
)
)
.add(series_name='Business',
data=[Business_Class_mean.values.tolist()], # data必须为二维数组
areastyle_opts=opts.AreaStyleOpts(opacity=0.3, # 填充颜色
color='blue'
)
)
.set_global_opts(title_opts=opts.TitleOpts(title='评分均值对比')
)
)
print('Rader Success')
return rader
def main(self):
grid = AnalysisAssigned().Class_satisfaction()
line = AnalysisAssigned().All_kinds_average()
rader = AnalysisAssigned().All_kinds_average_Rader()
page = Page(layout=Page.SimplePageLayout)
page.add(grid)
page.add(line)
page.add(rader)
page.render('Invistico_Airline visualization.html')
if __name__ == '__main__':
AnalysisAssigned().main()
效果展示