一、项目介绍

1.1 项目需求

基于第一部分业务背景,为满足业务自主分析提高运营决策时效,现使用Python、数据仓库实现数据自动更新式可视化面板。

1.2 项目操作流程




python生成数据看板的数据要求_python


二、数据处理

2.1 代码处理分析


python生成数据看板的数据要求_数据_02


2.1.1 聚合生成dw_order_by_day表

从mysql数据库中获取ods_sales_orders订单明细表并按日期分组聚合,得到每日销量和每日销售额。读取日期维度表,与上表合并,得到dw_order_by_day每日环比表。 关键代码如下(部分):


#导入相关模块
import pandas as pd
import pymysql
import random
from sqlalchemy import create_engine
pymysql.install_as_MySQLdb()
import datetime
import warnings
warnings.filterwarnings("ignore")
# 连接adventure_ods_new库
adventure_ods = create_engine('mysql://用户名:密码@106.12.180.221:3306/数据库?charset=gbk')
# 连接datafrog05_adventure_new库
adventure_dw = create_engine('mysql://用户名:密码@106.15.121.232:3306/数据库名?charset=gbk')
"""利用空列表及循环生成对应随机值,与销量订单聚合表合并形成sum_amount_order_goal(销量订单聚合目标表)"""
sum_amount_goal_list = []
sum_order_goal_list = []
# 转为list类型,遍历每个日期
create_date_list = list(sum_amount_order['create_date'])
for i in create_date_list:
    # 生成一个在[0.85,1.1]随机数
    a = random.uniform(0.85, 1.1)
    b = random.uniform(0.85, 1.1)
    # 对应日期下生成总金额(sum_amount)*a的列
    amount_goal = list(sum_amount_order[sum_amount_order['create_date'] == i]['sum_amount'])[0] * a
    # 对应日期下生成总订单数(sum_order)*b的列
    order_goal = list(sum_amount_order[sum_amount_order['create_date'] == i]['sum_order'])[0] * b
    # 将生成的目标值加入空列表
    sum_amount_goal_list.append(amount_goal)
    sum_order_goal_list.append(order_goal)
# 合并sum_amount_order表与刚生成的目标值列,形成sum_amount_order_goal表
sum_amount_order_goal = pd.concat([sum_amount_order, pd.DataFrame(
        {'sum_amount_goal': sum_amount_goal_list, 'sum_order_goal':
            sum_order_goal_list})], axis=1)
sum_amount_order_goal.head()
"""存储新的dw_order_by_day """
#增添环比值amount_diff
# pct_change()表示当前元素与先前元素的相差百分比,默认竖向,例:前面元素x,当前元素y,公式 result = (y-x)/x
dw_order_by_day['amount_diff'] = dw_order_by_day['sum_amount'].pct_change().fillna(0)
# 追加数据至dw_order_by_day
dw_order_by_day.to_sql('dw_order_by_day_jrh', con=adventure_dw,
                       if_exists='replace', index=False)


2.1.2 聚合生成dw_order_diff表

上一步中得到dw_order_by_day每日环比表,将当前时间维度和去年同期维度下相同字段进行聚合,在计算销售额,销售量,和客单价的同比,关键代码如下(部分):


"""求取各阶段的总金额"""
"""当天"""
# 当天的总金额
today_amount = dw_order_by_day[dw_order_by_day['is_today'] == 1]['sum_amount'].sum()
# 去年同期的日期维度
before_year_today = list(dw_order_by_day[dw_order_by_day['is_today'] == 1]
                         ['create_date'] + datetime.timedelta(days=-365))
# 去年同期总金额
before_year_today_amount = dw_order_by_day[dw_order_by_day['create_date'].isin(before_year_today)]['sum_amount'].sum()

"""昨天"""
# 昨天的总金额
yesterday_amount = dw_order_by_day[dw_order_by_day['is_yesterday'] == 1]['sum_amount'].sum()
# 去年同期的日期维度
before_year_yesterday = list(dw_order_by_day[dw_order_by_day['is_yesterday'] == 1]
                             ['create_date'] + datetime.timedelta(days=-365))
# 去年同期总金额
before_year_yesterday_amount = dw_order_by_day[dw_order_by_day['create_date'].isin(
    before_year_yesterday)]['sum_amount'].sum()

"""当前月份"""
# 当月的总金额
month_amount = dw_order_by_day[dw_order_by_day['is_current_month'] == 1]['sum_amount'].sum()
# 去年同期的日期维度
before_year_month = list(dw_order_by_day[dw_order_by_day['is_current_month'] == 1]
                         ['create_date'] + datetime.timedelta(days=-365))
# 去年同期总金额
before_year_month_amount = dw_order_by_day[dw_order_by_day['create_date'].isin(
    before_year_month)]['sum_amount'].sum()

"""当前季度"""
# 当前季度的总金额
quarter_amount = dw_order_by_day[dw_order_by_day['is_current_quarter'] == 1]['sum_amount'].sum()
# 去年同期的日期维度
before_year_quarter = list(dw_order_by_day[dw_order_by_day['is_current_quarter'] == 1]
                           ['create_date'] + datetime.timedelta(days=-365))
# 去年同期总金额
before_year_quarter_amount = dw_order_by_day[dw_order_by_day['create_date'].isin(
    before_year_quarter)]['sum_amount'].sum()

"""当前年份"""
# 当年的总金额
year_amount = dw_order_by_day[dw_order_by_day['is_current_year'] == 1]['sum_amount'].sum()
# 去年同期的日期维度
before_year_year = list(dw_order_by_day[dw_order_by_day['is_current_year'] == 1]
                        ['create_date'] + datetime.timedelta(days=-365))
# 去年同期总金额
before_year_year_amount = dw_order_by_day[dw_order_by_day['create_date'].isin(
    before_year_year)]['sum_amount'].sum()
"""求取各阶段的总金额、订单数的同期对比数据"""
# 做符号简称,横向提取数据方便
amount_dict = {'today_diff': [today_amount / before_year_today_amount - 1,
                              today_order / before_year_today_order - 1,
                              (today_amount / today_order) / (before_year_today_amount /
                                                              before_year_today_order) - 1],
               'yesterday_diff': [yesterday_amount / before_year_yesterday_amount - 1,
                                  yesterday_order / before_year_yesterday_order - 1,
                                  (yesterday_amount / yesterday_order) / (before_year_yesterday_amount /
                                                                          before_year_yesterday_order) - 1],
               'month_diff': [month_amount / before_year_month_amount - 1,
                              month_order / before_year_month_order - 1,
                              (month_amount / month_order) / (before_year_month_amount /
                                                              before_year_month_order) - 1],
               'quarter_diff': [quarter_amount / before_year_quarter_amount - 1,
                                quarter_order / before_year_quarter_order - 1,
                                (quarter_amount / quarter_order) / (before_year_quarter_amount /
                                                                    before_year_quarter_order) - 1],
               'year_diff': [year_amount / before_year_year_amount - 1,
                             year_order / before_year_year_order - 1,
                             (year_amount / year_order) / (before_year_year_amount /
                                                           before_year_year_order) - 1],
               'flag': ['amount', 'order', 'avg']}

dw_amount_diff = pd.DataFrame(amount_dict)
dw_amount_diff
"""存储新的dw_amount_diff表 """
dw_amount_diff.to_sql('dw_amount_diff_jrh', con=adventure_dw, if_exists='replace', index=False)


2.1.3 聚合生成update_sum_data表
将ods_sales_orders订单明细表和ods_customer连接,按照订单日期/产品名/产品子类/产品类别/所在区域/所在省份/所在城市分组聚合,再与日期维度表连接,得到sum_customer_order 关键代码如下(部分):


"""进行数据的聚合"""
sales_customer_order=pd.merge(ods_sales_orders,ods_customer,on='customer_key',how='left')
# 提取订单主键/订单日期/客户编号/产品名/产品子类/产品类别/产品单价/所在区域/所在省份/所在城市
sales_customer_order=sales_customer_order.groupby(['create_date','english_product_name','cpzl_zw',"cplb_zw",
                                                   'chinese_territory','chinese_province','chinese_city'],as_index=False).
agg({'sales_order_key':pd.Series.nunique,'customer_key':pd.Series.nunique,
    'unit_price':'sum'}).rename(columns={'sales_order_key':'order_num','customer_key':'customer_num','unit_price':'sum_amount',
                                         'english_product_name':'product_name'})
                            
# 转化订单日期为字符型格式
sales_customer_order['create_date'] = sales_customer_order['create_date'].apply(lambda x: x.strftime('%Y-%m-%d'))
# 获取当日日期维度
dw_customer_order = pd.merge(sales_customer_order, dim_date_df, on='create_date', how='inner')
dw_customer_order.head()
"""step5:进行数据的存储"""
dw_customer_order.to_sql('dw_customer_order_jrh', con=adventure_dw,
                         if_exists='replace', index=False)


2.2 在MySQL追加索引

在mysql追加索引优化查询速度,类似于让搜索结果不需要遍历整个表,而是根据前缀索引一步步搜寻。


create index index_date on dw_order_by_day_hjf(create_date(8));
select * from dw_order_by_day_hjf where create_date='2019-02-11';
EXPLAIN select * from dw_order_by_day_hjf where create_date='2019-02-11';


2.3 在Linux上做定时部署

在服务器上部署代码,让其每日自动更新,实现方法分三步:

1.定时执行命令:import schedule

2.能够跳出命令窗口输入 python xxx.py 文件使其运行:import os 的os.system()

3.将这个文件挂在后台,等时间一到,自动执行:利用Linux系统自带的&挂后台


import schedule
import time
import datetime
import os
import requests
def job1():
    """
    dw_order_by_day 每日环比表
    """
    print('Job1:每天6:00执行一次')
    print('Job1-startTime:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
    os.system(
        "/home/anaconda3/bin/python3 /home/frog005/adventure/dw_order_by_day_jrh.py >> /home/frog005/adventure_jrh/jrh_logs/dw_order_by_day_schedule.log 2>&1 &")
    time.sleep(20)
    print('Job1-endTime:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
    print('------------------------------------------------------------------------')

def job2():
    """
    dw_order_diff 同比数据表
    """
    print('Job2:每天6:20执行一次')
    print('Job2-startTime:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
    os.system(
        "/home/anaconda3/bin/python3 /home/frog005/adventure_jrh/dw_order_diff_jrh.py >> /home/frog005/adventure_jrh/jrh_logs/dw_order_diff_schedule.log 2>&1 &")
    time.sleep(20)
    print('Job3-endTime:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
    print('------------------------------------------------------------------------')

def job3():
    """
    update_sum_data 生成时间地区产品聚合表
    """
    print('Job3:每天6:40执行一次')
    print('Job3-startTime:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
    os.system(
        "/home/anaconda3/bin/python3 /home/frog005/adventure_jrh/update_sum_data_jrh.py >> /home/frog005/adventure_jrh/jrh_logs/update_sum_data_schedule.log 2>&1 &")
    time.sleep(20)
    print('Job4-endTime:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
    print('------------------------------------------------------------------------')

if __name__ == '__main__':
    schedule.every().day.at('06:00').do(job1)
    schedule.every().day.at('06:20').do(job2)
    schedule.every().day.at('06:40').do(job3)

    while True:
        schedule.run_pending()
        time.sleep(10)
        print("wait", datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))


将schedule程序挂在后台执行,并输出日志:


nohup python3 schedule_job_jrh.py > schedule_job_jrh.log 2>&1 &


查看该schedule程序运行情况(验证是否顺利挂在后台)


ps aux| grep schedule_job_jrh.py


三、可视化看板搭建

power bi可视化链接

3.1 整体销售情况


3.2 最近21天销售趋势图


python生成数据看板的数据要求_mysql_03


3.3 区域销售详情表


python生成数据看板的数据要求_python生成数据看板的数据要求_04