日萌社
人工智能AI:Keras PyTorch MXNet TensorFlow PaddlePaddle 深度学习实战(不定时更新)
用户画像 总文章
================ 用户画像表 结构图================
用户画像表 ADM层
每天汇总出的用户画像表仅包含当天的数据,因此还需要和之前汇总好的用户画像表进行新的合并操作后,才算真正的最新数据的用户画像表
用户画像宽表 ADM层 adm.itcast_adm_personas(时间分区)
用户画像宽表 ADM层 = 用户基本属性表 GDM层 gdm.itcast_gdm_user_basic(时间分区)(第 1 张表)
+ 客户消费模型表 GDM层 gdm.itcast_gdm_user_consume_order(时间分区)(第 7 张表 = 2 + 3 + 4 + 5 + 6)
+ 客户喜好消费的商品分类模型表 GDM层 gdm.itcast_gdm_user_buy_category(时间分区)(第 12 张表 = 8 + 9 + 10 + 11)
+ 用户访问模型表 GDM层 gdm.itcast_gdm_user_visit(时间分区)(第 15 张表 = 13 + 14)
---------------------------------------------
用户标签表
可根据 客户消费模型表 + 客户喜好消费的商品分类模型表 + 用户访问模型表 分析出 该人的购买喜好、购买习惯、购买实力,从而推荐相似的商品给该用户
================ 用户画像表 ================
用户画像 宽表:根据 用户基本属性表、客户消费订单表、客户购买类目表、用户访问信息表 生成 用户画像宽表
create database if not exists adm; //ADM 层(对 GDM 层指标数据 进行指标数据的业务汇总分析):高度聚合
create table if not exists adm.itcast_adm_personas(
user_id string ,--用户ID
user_name string ,--用户登陆名
user_sex string ,--用户性别
user_birthday string ,--用户生日
user_age bigint ,--用户年龄
constellation string ,--用户星座
province string ,--省份
city string ,--城市
city_level string ,--城市等级
hex_mail string ,--邮箱
op_mail string ,--邮箱运营商
hex_phone string ,--手机号
fore_phone string ,--手机前3位
op_phone string ,--手机运营商
add_time timestamp ,--注册时间
login_ip string ,--登陆ip地址
login_source string ,--登陆来源
request_user string ,--邀请人
total_mark bigint ,--会员积分
used_mark bigint ,--已使用积分
level_name string ,--会员等级名称
blacklist bigint ,--用户黑名单
is_married bigint ,--婚姻状况
education string ,--学历
monthly_money double ,--收入
profession string ,--职业
sex_model bigint ,--性别模型
is_pregnant_woman bigint ,--是否孕妇
is_have_children bigint ,--是否有小孩
children_sex_rate double ,--孩子性别概率
children_age_rate double ,--孩子年龄概率
is_have_car bigint ,--是否有车
potential_car_user_rate double ,--潜在汽车用户概率
phone_brand string ,--使用手机品牌
phone_brand_level string ,--使用手机品牌档次
phone_cnt bigint ,--使用多少种不同的手机
change_phone_rate bigint ,--更换手机频率
majia_flag string ,--马甲标志
majie_account_cnt bigint ,--马甲账号数量
loyal_model bigint ,--用户忠诚度
shopping_type_model bigint ,--用户购物类型
figure_model bigint ,--身材
stature_model bigint ,--身高
first_order_time timestamp, --第一次消费时间
last_order_time timestamp, --最近一次消费时间
first_order_ago bigint, --首单距今时间
last_order_ago bigint, --尾单距今时间
month1_hg_order_cnt bigint, --近30天购买次数(不含退拒)
month1_hg_order_amt double, --近30天购买金额(不含退拒)
month2_hg_order_cnt bigint, --近60天购买次数(不含退拒)
month2_hg_order_amt double, --近60天购买金额(不含退拒)
month3_hg_order_cnt bigint, --近90天购买次数(不含退拒)
month3_hg_order_amt double, --近90天购买金额(不含退拒)
month1_order_cnt bigint, --近30天购买次数(含退拒)
month1_order_amt double, --近30天购买金额(含退拒)
month2_order_cnt bigint, --近60天购买次数(含退拒)
month2_order_amt double, --近60天购买金额(含退拒)
month3_order_cnt bigint, --近90天购买次数(含退拒)
month3_order_amt double, --近90天购买金额(含退拒)
max_order_amt double, --最大消费金额
min_order_amt double, --最小消费金额
total_order_cnt bigint, --累计消费次数(不含退拒)
total_order_amt double, --累计消费金额(不含退拒)
user_avg_amt double, --客单价(含退拒)(每一个顾客平均购买商品的金额,客单价也即是平均交易金额)
month3_user_avg_amt double, --近90天的客单价
common_address string, --常用收货地址
common_paytype string, --常用支付方式
month1_cart_cnt bigint, --近30天购物车的次数
month1_cart_goods_cnt bigint, --近30天购物车商品件数
month1_cart_submit_cnt bigint, --近30天购物车提交商品件数
month1_cart_rate double, --近30天购物车成功率
month1_cart_cancle_cnt double, --近30天购物车放弃件数
return_cnt bigint, --退货商品数量
return_amt double, --退货商品金额
reject_cnt bigint, --拒收商品数量
reject_amt double, --拒收商品金额
last_return_time timestamp, --最近一次退货时间
school_order_cnt bigint, --学校下单总数
company_order_cnt bigint, --单位下单总数
home_order_cnt bigint, --家里下单总数
forenoon_order_cnt bigint, --上午下单总数
afternoon_order_cnt bigint, --下午下单总数
night_order_cnt bigint, --晚上下单总数
morning_order_cnt bigint, --凌晨下单总数
first_category_id BIGINT, --一级分类ID
first_category_name STRING, --一级分类名称
second_category_id BIGINT, --二分类ID
second_catery_name STRING, --二级分类名称
third_category_id BIGINT, --三级分类ID
third_category_name STRING, --三级分类名称
month1_category_cnt BIGINT, --近30天购物类目次数
month1_category_amt STRING, --近30天购物类目金额
month3_category_cnt BIGINT, --近90天购物类目次数
month3_category_amt STRING, --近90天购物类目金额
month6_category_cnt BIGINT, --近180天购物类目次数
month6_category_amt STRING, --近180天购物类目金额
total_category_cnt BIGINT, --累计购物类目次数
total_category_amt STRING, --累计购物类目金额
month1_cart_category_cnt BIGINT,--近30天购物车类目次数
month3_cart_category_cnt BIGINT,--近90天购物车类目次数
month6_cart_category_cnt BIGINT,--近180天购物车类目次数
total_cart_category_cnt BIGINT, --累计购物车类目次数
last_category_time TIMESTAMP, --近90天之内最后一次购买类目时间
last_category_ago BIGINT, --近90天之内最后一次购买类目距今天数
latest_pc_visit_date string, --近90天之内最近一次PC端访问日期
latest_app_visit_date string, --近90天之内最近一次APP端访问日期
latest_pc_visit_session string, --近90天之内最近一次PC端访问的session
latest_pc_cookies string, --近90天之内最近一次PC端访问的cookies
latest_pc_pv string, --近90天之内最近一次PC端访问的PV
latest_pc_browser_name string, --近90天之内最近一次PC端访问使用的游览器
latest_pc_visit_os string, --近90天之内最近一次PC端访问使用的操作系统
latest_app_name string, --近90天之内最近一次APP端访问app名称
latest_app_visit_os string, --近90天之内最近一次APP端访问使用的操作系统
latest_visit_ip string, --近90天之内最近一次访问IP(不分APP与PC)
latest_city string, --近90天之内最近一次访问城市(不分APP与PC)
latest_province string, --近90天之内最近一次访问省份(不分APP与PC)
first_pc_visit_date string, --近90天之内第一次PC端访问日期
first_app_visit_date string, --近90天之内第一次APP端访问日期
first_pc_visit_session string, --近90天之内第一次PC端访问的session
first_pc_cookies string, --近90天之内第一次PC端访问的cookies
first_pc_pv string, --近90天之内第一次PC端访问的PV
first_pc_browser_name string, --近90天之内第一次PC端访问使用的游览器
first_pc_visit_os string, --近90天之内第一次PC端访问使用的操作系统
first_app_name string, --近90天之内第一次APP端访问app名称
first_app_visit_os string, --近90天之内第一次APP端访问使用的操作系统
first_visit_ip string, --近90天之内第一次访问IP(不分APP与PC)
first_city string, --近90天之内第一次访问城市(不分APP与PC)
first_province string, --近90天之内第一次访问省份(不分APP与PC)
day7_app_cnt bigint, --近7天APP端访问次数
day15_app_cnt bigint, --近15天APP端访问次数
month1_app_cnt bigint, --近30天APP端访问次数
month2_app_cnt bigint, --近60天APP端访问次数
month3_app_cnt bigint, --近90天APP端访问次数
day7_pc_cnt bigint, --近7天PC端访问次数
day15_pc_cnt bigint, --近15天PC端访问次数
month1_pc_cnt bigint, --近30天PC端访问次数
month2_pc_cnt bigint, --近60天PC端访问次数
month3_pc_cnt bigint, --近90天PC端访问次数
month1_pc_days bigint, --近30天PC端访问天数
month1_pc_pv bigint, --近30天PC端访问PV
month1_pc_avg_pv bigint, --近30天PC端访问平均PV
month1_pc_diff_ip_cnt bigint, --近30天PC端访问不同ip数
month1_pc_diff_cookie_cnt bigint, --近30天PC端访问不同的cookie数
month1_pc_common_ip string, --近30天PC端访问最常用ip
month1_pc_common_cookie string, --近30天PC端访问最常用的cookie
month1_pc_common_browser_name string, --近30天PC端访问最常用游览器
month1_pc_common_os string, --近30天PC端访问最常用的操作系统
month1_hour025_cnt bigint, --近30天PC端0-5点访问次数
month1_hour627_cnt bigint, --近30天PC端6-7点访问次数
month1_hour829_cnt bigint, --近30天PC端8-9点访问次数
month1_hour10212_cnt bigint, --近30天PC端10-12点访问次数
month1_hour13214_cnt bigint, --近30天PC端13-14点访问次数
month1_hour15217_cnt bigint, --近30天PC端15-17点访问次数
month1_hour18219_cnt bigint, --近30天PC端18-19点访问次数
month1_hour20221_cnt bigint, --近30天PC端20-21点访问次数
month1_hour22223_cnt bigint --近30天PC端22-23点访问次数
); //第二版本:partitioned by (dt string);
-----加载数据:根据 用户基本属性表、客户消费订单表、客户购买类目表、用户访问信息表 生成 用户画像宽表
insert overwrite table adm.itcast_adm_personas //第二版本:insert overwrite table adm.itcast_adm_personas partition(dt=${partdt})
select
a.user_id,
a.user_name,
a.user_sex,
a.user_birthday,
a.user_age,
a.constellation,
a.province,
a.city,
a.city_level,
a.hex_mail,
a.op_mail,
a.hex_phone,
a.fore_phone,
a.op_phone,
a.add_time,
a.login_ip,
a.login_source,
a.request_user,
a.total_mark,
a.used_mark,
a.level_name,
a.blacklist,
a.is_married,
a.education,
a.monthly_money,
a.profession,
a.sex_model,
a.is_pregnant_woman,
a.is_have_children,
a.children_sex_rate,
a.children_age_rate,
a.is_have_car,
a.potential_car_user_rate,
a.phone_brand,
a.phone_brand_level,
a.phone_cnt,
a.change_phone_rate,
a.majia_flag,
a.majie_account_cnt,
a.loyal_model,
a.shopping_type_model,
a.figure_model,
a.stature_model,
b.first_order_time,
b.last_order_time,
b.first_order_ago,
b.last_order_ago,
b.month1_hg_order_cnt,
b.month1_hg_order_amt,
b.month2_hg_order_cnt,
b.month2_hg_order_amt,
b.month3_hg_order_cnt,
b.month3_hg_order_amt,
b.month1_order_cnt,
b.month1_order_amt,
b.month2_order_cnt,
b.month2_order_amt,
b.month3_order_cnt,
b.month3_order_amt,
b.max_order_amt,
b.min_order_amt,
b.total_order_cnt,
b.total_order_amt,
b.user_avg_amt,
b.month3_user_avg_amt,
b.common_address,
b.common_paytype,
b.month1_cart_cnt,
b.month1_cart_goods_cnt,
b.month1_cart_submit_cnt,
b.month1_cart_rate,
b.month1_cart_cancle_cnt,
b.return_cnt,
b.return_amt,
b.reject_cnt,
b.reject_amt,
b.last_return_time,
b.school_order_cnt,
b.company_order_cnt,
b.home_order_cnt,
b.forenoon_order_cnt,
b.afternoon_order_cnt,
b.night_order_cnt,
b.morning_order_cnt,
c.first_category_id,
c.first_category_name,
c.second_category_id,
c.second_catery_name,
c.third_category_id,
c.third_category_name,
c.month1_category_cnt,
c.month1_category_amt,
c.month3_category_cnt,
c.month3_category_amt,
c.month6_category_cnt,
c.month6_category_amt,
c.total_category_cnt,
c.total_category_amt,
c.month1_category_cnt,
c.month3_category_cnt,
c.month6_category_cnt,
c.total_category_cnt,
c.last_category_time,
c.last_category_ago,
d.latest_pc_visit_date,
d.latest_app_visit_date,
d.latest_pc_visit_session,
d.latest_pc_cookies,
d.latest_pc_pv,
d.latest_pc_browser_name,
d.latest_pc_visit_os,
d.latest_app_name,
d.latest_app_visit_os,
d.latest_visit_ip,
d.latest_city,
d.latest_province,
d.first_pc_visit_date,
d.first_app_visit_date,
d.first_pc_visit_session,
d.first_pc_cookies,
d.first_pc_pv,
d.first_pc_browser_name,
d.first_pc_visit_os,
d.first_app_name,
d.first_app_visit_os,
d.first_visit_ip,
d.first_city,
d.first_province,
d.day7_app_cnt,
d.day15_app_cnt,
d.month1_app_cnt,
d.month2_app_cnt,
d.month3_app_cnt,
d.day7_pc_cnt,
d.day15_pc_cnt,
d.month1_pc_cnt,
d.month2_pc_cnt,
d.month3_pc_cnt,
d.month1_pc_days,
d.month1_pc_pv,
d.month1_pc_avg_pv,
d.month1_pc_diff_ip_cnt,
d.month1_pc_diff_cookie_cnt,
d.month1_pc_common_ip,
d.month1_pc_common_cookie,
d.month1_pc_common_browser_name,
d.month1_pc_common_os,
d.month1_hour025_cnt,
d.month1_hour627_cnt,
d.month1_hour829_cnt,
d.month1_hour10212_cnt,
d.month1_hour13214_cnt,
d.month1_hour15217_cnt,
d.month1_hour18219_cnt,
d.month1_hour20221_cnt,
d.month1_hour22223_cnt
from gdm.itcast_gdm_user_basic a //用户基本属性表
left join gdm.itcast_gdm_user_consume_order b on a.user_id=b.user_id //客户消费订单表
left join gdm.itcast_gdm_user_buy_category c on a.user_id=c.user_id //客户购买类目表
left join gdm.itcast_gdm_user_visit d on a.user_id=d.user_id; //用户访问信息表
第二版本:
from gdm.itcast_gdm_user_basic a where dt=${partdt}
left join gdm.itcast_gdm_user_consume_order b where dt=${partdt} on a.user_id=b.user_id
left join gdm.itcast_gdm_user_buy_category c where dt=${partdt} on a.user_id=c.user_id
left join gdm.itcast_gdm_user_visit d where dt=${partdt} on a.user_id=d.user_id;
================== 每天的 用户画像表 进行汇总 ==================
beeline -u jdbc:hive2://node1:10000 -n root
1.每天用户画像表:
adm.itcast_adm_personas_日期时间 即为 adm.itcast_adm_personas_${partdt}
--------------------------------------------
2.完整用户画像表:
adm.itcast_adm_personas_complete 必须为 ORC表,因此这样才能实现 更新和删除功能
如:create table adm.itcast_adm_personas_complete(。。。) clustered by(user_id) into 10 buckets stored as orc TBLPROPERTIES('transactional'='true');
注意:adm.itcast_adm_personas_complete 多出了一个字段“rn bigint”,因为使用到row_number() over(distribute by user_id) rn,所以需要存储分组中多出的一个rn序号值,
但是把 adm.itcast_adm_personas_complete 数据 insert select 到 adm.itcast_adm_personas_hbase HIVE-HBASE映射表时,并不把“rn”值也拷贝过去
--------------------------------------------
3.第一次 合并前天的用户画像表(adm.itcast_adm_personas_前天)数据 和 昨天的用户画像表(adm.itcast_adm_personas_昨天)数据 到
完整用户画像表(adm.itcast_adm_personas_complete)中
1.创建 adm.itcast_adm_personas_temp1 临时表1:
用于临时存储前天的用户画像表(adm.itcast_adm_personas_前天)数据,需要对“adm.itcast_adm_personas_前天”中相同用户的多条数据进行分组并取出同一个用户中的
其中一条数据即可,因为同一个用户一天中会购买多个不同商品,因此同一个用户分组下有多条对应不同商品的数据
insert into adm.itcast_adm_personas_temp1 select * from (select *,row_number() over(distribute by user_id) rn from adm.itcast_adm_personas_${yesterday}) t where t.rn=1;
2.创建 adm.itcast_adm_personas_temp2 临时表2:
用于临时存储前天的用户画像表(adm.itcast_adm_personas_昨天)数据,需要对“adm.itcast_adm_personas_昨天”中相同用户的多条数据进行分组并取出同一个用户中的
其中一条数据即可,因为同一个用户一天中会购买多个不同商品,因此同一个用户分组下有多条对应不同商品的数据
insert into adm.itcast_adm_personas_temp2 select * from (select *,row_number() over(distribute by user_id) rn from adm.itcast_adm_personas_${beforeTwoDays}) t where t.rn=1;
3.创建 adm.itcast_adm_personas_complete_temp 临时表:
用于合并 adm.itcast_adm_personas_temp1 临时表1 和 adm.itcast_adm_personas_temp2 临时表2 中的 相同用户的数据
到 dm.itcast_adm_personas_complete_temp 临时表中,然后再把 临时表1 和 临时表2 中的 不同用户的数据
再拷贝到 dm.itcast_adm_personas_complete_temp 临时表中,此时前天和昨天的数据都合并到一张临时表中
insert into adm.itcast_adm_personas_complete_temp select * from adm.itcast_adm_personas_temp1 temp1 where temp1.user_id not in (select completeTemp.user_id from adm.itcast_adm_personas_complete_temp completeTemp);
insert into adm.itcast_adm_personas_complete_temp select * from adm.itcast_adm_personas_temp2 temp2 where temp2.user_id not in (select completeTemp.user_id from adm.itcast_adm_personas_complete_temp completeTemp);
4.将 adm.itcast_adm_personas_complete_temp 临时表 insert select 到 adm.itcast_adm_personas_complete完整用户画像表 即可
insert into adm.itcast_adm_personas_complete select * from adm.itcast_adm_personas_complete_temp;
--------------------------------------------
4.第二次和第二次以后的每次合并 完整用户画像表adm.itcast_adm_personas_complete 和 昨天的用户画像表(adm.itcast_adm_personas_昨天)数据
1.创建 adm.itcast_adm_personas_temp1 临时表1:
用于临时存储前天的用户画像表(adm.itcast_adm_personas_昨天)数据,需要对“adm.itcast_adm_personas_昨天”中相同用户的多条数据进行分组并取出同一个用户中的
其中一条数据即可,因为同一个用户一天中会购买多个不同商品,因此同一个用户分组下有多条对应不同商品的数据
insert into adm.itcast_adm_personas_temp1 select * from (select *,row_number() over(distribute by user_id) rn from adm.itcast_adm_personas_${yesterday}) t where t.rn=1;
2.创建 adm.itcast_adm_personas_complete_temp:
用于把 完整用户画像表adm.itcast_adm_personas_complete 和 adm.itcast_adm_personas_temp1 临时表中 相同用户数据进行合并后
存储到 itcast_adm_personas_complete_temp,临时表1 中有部分用户数据 没有被合并到 itcast_adm_personas_complete_temp 中的话,
代表这部分用户数据 是第一次购买的
3.把 合并后的用户数据 从 完整用户画像表adm.itcast_adm_personas_complete中 删除 旧的用户数据
delete from adm.itcast_adm_personas_complete where user_id in (select user_id from adm.itcast_adm_personas_complete_temp);
4.把 adm.itcast_adm_personas_temp1 临时表1中 没有被合并的第一次购买的 用户数据 也拷贝到 adm.itcast_adm_personas_complete_temp表中
insert into adm.itcast_adm_personas_complete_temp select * from adm.itcast_adm_personas_temp1 temp1 where temp1.user_id not in (select completeTemp.user_id from adm.itcast_adm_personas_complete_temp completeTemp);
5.将 adm.itcast_adm_personas_complete_temp 临时表 insert select 到 adm.itcast_adm_personas_complete完整用户画像表 即可
insert into adm.itcast_adm_personas_complete select * from adm.itcast_adm_personas_complete_temp;
--------------------------------------------
5.例子分析
create table if not exists rimengshe.a1(user_id string,user_name string,num double) clustered by(user_id) into 10 buckets stored as orc TBLPROPERTIES('transactional'='true');
create table if not exists rimengshe.a2(user_id string,user_name string,num double);
drop table if exists rimengshe.temp1;
create table if not exists rimengshe.temp1(user_id string,user_name string,num double);
use rimengshe;
show tables;
insert into rimengshe.a1(user_id,user_name,num) values('1','guzhipeng',10.6);
insert into rimengshe.a1(user_id,user_name,num) values('2','guzhipeng',5.5);
insert into rimengshe.a1(user_id,user_name,num) values('3','guzhipeng',4.1);
select * from rimengshe.a1;
insert into rimengshe.a2(user_id,user_name,num) values('1','guzhipeng',10.6);
insert into rimengshe.a2(user_id,user_name,num) values('2','guzhipeng',5.5);
insert into rimengshe.a2(user_id,user_name,num) values('3','guzhipeng',4.1);
insert into rimengshe.a2(user_id,user_name,num) values('4','guzhipeng',3.7);
insert into rimengshe.a2(user_id,user_name,num) values('5','guzhipeng',1.0);
select * from rimengshe.a2;
insert into rimengshe.temp1 select A1.user_id,A1.user_name,A1.num+A2.num from rimengshe.a1 A1 join rimengshe.a2 A2 on A1.user_id=A2.user_id;
select * from rimengshe.temp1;
delete from rimengshe.a1 where user_id in (select user_id from rimengshe.temp1);
select * from rimengshe.a1;
insert into rimengshe.a1 select * from rimengshe.temp1;
select * from rimengshe.a1;
insert into rimengshe.a1 select * from rimengshe.a2 A2 where A2.user_id not in (select A1.user_id from rimengshe.a1 A1);
select * from rimengshe.a1;
================== 建立hive/hbase关联表 ==================
create table hive_test(
user_id STRING, # user_id/id 代表的是 hbase中的 rowkey
user_name STRING,
......
)STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f1:user_name,......列簇名:列名") # 指定hbase表中的列簇名:列名
TBLPROPERTIES ("hbase.table.name" = "hbase表", # hive表中指定所映射的hbase表
"hbase.mapred.output.outputtable" = "hbase表");
在hive1.2.1 跟 hbase 0.98整合时,需要添加:"hbase.mapred.output.outputtable" = "hbase表"
如果不添加会报错:Must specify table name
建立hive/hbase关联表:hive与hbase整合过程
CREATE TABLE if not exists adm.itcast_adm_personas_hbase (
user_id STRING,
user_name STRING,
user_sex STRING,
user_birthday STRING,
user_age BIGINT,
constellation STRING,
province STRING,
city STRING,
city_level STRING,
hex_mail STRING,
op_mail STRING,
hex_phone STRING,
fore_phone STRING,
op_phone STRING,
add_time TIMESTAMP,
login_ip STRING,
login_source STRING,
request_user STRING,
total_mark BIGINT,
used_mark BIGINT,
level_name STRING,
blacklist BIGINT,
is_married BIGINT,
education STRING,
monthly_money DOUBLE,
profession STRING,
sex_model BIGINT,
is_pregnant_woman BIGINT,
is_have_children BIGINT,
children_sex_rate DOUBLE,
children_age_rate DOUBLE,
is_have_car BIGINT,
potential_car_user_rate DOUBLE,
phone_brand STRING,
phone_brand_level STRING,
phone_cnt BIGINT,
change_phone_rate BIGINT,
majia_flag STRING,
majie_account_cnt BIGINT,
loyal_model BIGINT,
shopping_type_model BIGINT,
figure_model BIGINT,
stature_model BIGINT,
first_order_time TIMESTAMP,
last_order_time TIMESTAMP,
first_order_ago BIGINT,
last_order_ago BIGINT,
month1_hg_order_cnt BIGINT,
month1_hg_order_amt DOUBLE,
month2_hg_order_cnt BIGINT,
month2_hg_order_amt DOUBLE,
month3_hg_order_cnt BIGINT,
month3_hg_order_amt DOUBLE,
month1_order_cnt BIGINT,
month1_order_amt DOUBLE,
month2_order_cnt BIGINT,
month2_order_amt DOUBLE,
month3_order_cnt BIGINT,
month3_order_amt DOUBLE,
max_order_amt DOUBLE,
min_order_amt DOUBLE,
total_order_cnt BIGINT,
total_order_amt DOUBLE,
user_avg_amt DOUBLE,
month3_user_avg_amt DOUBLE,
common_address STRING,
common_paytype STRING,
month1_cart_cnt BIGINT,
month1_cart_goods_cnt BIGINT,
month1_cart_submit_cnt BIGINT,
month1_cart_rate DOUBLE,
month1_cart_cancle_cnt DOUBLE,
return_cnt BIGINT,
return_amt DOUBLE,
reject_cnt BIGINT,
reject_amt DOUBLE,
last_return_time TIMESTAMP,
school_order_cnt BIGINT,
company_order_cnt BIGINT,
home_order_cnt BIGINT,
forenoon_order_cnt BIGINT,
afternoon_order_cnt BIGINT,
night_order_cnt BIGINT,
morning_order_cnt BIGINT,
first_category_id BIGINT,
first_category_name STRING,
second_category_id BIGINT,
second_catery_name STRING,
third_category_id BIGINT,
third_category_name STRING,
month1_category_cnt BIGINT,
month1_category_amt STRING,
month3_category_cnt BIGINT,
month3_category_amt STRING,
month6_category_cnt BIGINT,
month6_category_amt STRING,
total_category_cnt BIGINT,
total_category_amt STRING,
month1_cart_category_cnt BIGINT,
month3_cart_category_cnt BIGINT,
month6_cart_category_cnt BIGINT,
total_cart_category_cnt BIGINT,
last_category_time TIMESTAMP,
last_category_ago BIGINT,
latest_pc_visit_date STRING,
latest_app_visit_date STRING,
latest_pc_visit_session STRING,
latest_pc_cookies STRING,
latest_pc_pv STRING,
latest_pc_browser_name STRING,
latest_pc_visit_os STRING,
latest_app_name STRING,
latest_app_visit_os STRING,
latest_visit_ip STRING,
latest_city STRING,
latest_province STRING,
first_pc_visit_date STRING,
first_app_visit_date STRING,
first_pc_visit_session STRING,
first_pc_cookies STRING,
first_pc_pv STRING,
first_pc_browser_name STRING,
first_pc_visit_os STRING,
first_app_name STRING,
first_app_visit_os STRING,
first_visit_ip STRING,
first_city STRING,
first_province STRING,
day7_app_cnt BIGINT,
day15_app_cnt BIGINT,
month1_app_cnt BIGINT,
month2_app_cnt BIGINT,
month3_app_cnt BIGINT,
day7_pc_cnt BIGINT,
day15_pc_cnt BIGINT,
month1_pc_cnt BIGINT,
month2_pc_cnt BIGINT,
month3_pc_cnt BIGINT,
month1_pc_days BIGINT,
month1_pc_pv BIGINT,
month1_pc_avg_pv BIGINT,
month1_pc_diff_ip_cnt BIGINT,
month1_pc_diff_cookie_cnt BIGINT,
month1_pc_common_ip string,
month1_pc_common_cookie string,
month1_pc_common_browser_name string,
month1_pc_common_os string,
month1_hour025_cnt BIGINT,
month1_hour627_cnt BIGINT,
month1_hour829_cnt BIGINT,
month1_hour10212_cnt BIGINT,
month1_hour13214_cnt BIGINT,
month1_hour15217_cnt BIGINT,
month1_hour18219_cnt BIGINT,
month1_hour20221_cnt BIGINT,
month1_hour22223_cnt BIGINT
) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES (
"hbase.columns.mapping" = ":key,
basicInfo:user_name,
basicInfo:user_sex,
basicInfo:user_birthday,
basicInfo:user_age,
basicInfo:constellation,
basicInfo:province,
basicInfo:city,
basicInfo:city_level,
basicInfo:hex_mail,
basicInfo:op_mail,
basicInfo:hex_phone,
basicInfo:fore_phone,
basicInfo:op_phone,
basicInfo:add_time,
basicInfo:login_ip,
basicInfo:login_source,
basicInfo:request_user,
basicInfo:total_mark,
basicInfo:used_mark,
basicInfo:level_name,
basicInfo:blacklist,
basicInfo:is_married,
basicInfo:education,
basicInfo:monthly_money,
basicInfo:profession,
basicInfo:sex_model,
basicInfo:is_pregnant_woman,
basicInfo:is_have_children,
basicInfo:children_sex_rate,
basicInfo:children_age_rate,
basicInfo:is_have_car,
basicInfo:potential_car_user_rate,
basicInfo:phone_brand,
basicInfo:phone_brand_level,
basicInfo:phone_cnt,
basicInfo:change_phone_rate,
basicInfo:majia_flag,
basicInfo:majie_account_cnt,
basicInfo:loyal_model,
basicInfo:shopping_type_model,
basicInfo:figure_model,
basicInfo:stature_model,
order:first_order_time,
order:last_order_time,
order:first_order_ago,
order:last_order_ago,
order:month1_hg_order_cnt,
order:month1_hg_order_amt,
order:month2_hg_order_cnt,
order:month2_hg_order_amt,
order:month3_hg_order_cnt,
order:month3_hg_order_amt,
order:month1_order_cnt,
order:month1_order_amt,
order:month2_order_cnt,
order:month2_order_amt,
order:month3_order_cnt,
order:month3_order_amt,
order:max_order_amt,
order:min_order_amt,
order:total_order_cnt,
order:total_order_amt,
order:user_avg_amt,
order:month3_user_avg_amt,
order:common_address,
order:common_paytype,
order:month1_cart_cnt,
order:month1_cart_goods_cnt,
order:month1_cart_submit_cnt,
order:month1_cart_rate,
order:month1_cart_cancle_cnt,
order:return_cnt,
order:return_amt,
order:reject_cnt,
order:reject_amt,
order:last_return_time,
order:school_order_cnt,
order:company_order_cnt,
order:home_order_cnt,
order:forenoon_order_cnt,
order:afternoon_order_cnt,
order:night_order_cnt,
order:morning_order_cnt,
category:first_category_id,
category:first_category_name,
category:second_category_id,
category:second_catery_name,
category:third_category_id,
category:third_category_name,
category:month1_category_cnt,
category:month1_category_amt,
category:month3_category_cnt,
category:month3_category_amt,
category:month6_category_cnt,
category:month6_category_amt,
category:total_category_cnt,
category:total_category_amt,
category:month1_cart_category_cnt,
category:month3_cart_category_cnt,
category:month6_cart_category_cnt,
category:total_cart_category_cnt,
category:last_category_time,
category:last_category_ago,
visit:latest_pc_visit_date,
visit:latest_app_visit_date,
visit:latest_pc_visit_session,
visit:latest_pc_cookies,
visit:latest_pc_pv,
visit:latest_pc_browser_name,
visit:latest_pc_visit_os,
visit:latest_app_name,
visit:latest_app_visit_os,
visit:latest_visit_ip,
visit:latest_city,
visit:latest_province,
visit:first_pc_visit_date,
visit:first_app_visit_date,
visit:first_pc_visit_session,
visit:first_pc_cookies,
visit:first_pc_pv,
visit:first_pc_browser_name,
visit:first_pc_visit_os,
visit:first_app_name,
visit:first_app_visit_os,
visit:first_visit_ip,
visit:first_city,
visit:first_province,
visit:day7_app_cnt,
visit:day15_app_cnt,
visit:month1_app_cnt,
visit:month2_app_cnt,
visit:month3_app_cnt,
visit:day7_pc_cnt,
visit:day15_pc_cnt,
visit:month1_pc_cnt,
visit:month2_pc_cnt,
visit:month3_pc_cnt,
visit:month1_pc_days,
visit:month1_pc_pv,
visit:month1_pc_avg_pv,
visit:month1_pc_diff_ip_cnt,
visit:month1_pc_diff_cookie_cnt,
visit:month1_pc_common_ip,
visit:month1_pc_common_cookie,
visit:month1_pc_common_browser_name,
visit:month1_pc_common_os,
visit:month1_hour025_cnt,
visit:month1_hour627_cnt,
visit:month1_hour829_cnt,
visit:month1_hour10212_cnt,
visit:month1_hour13214_cnt,
visit:month1_hour15217_cnt,
visit:month1_hour18219_cnt,
visit:month1_hour20221_cnt,
visit:month1_hour22223_cnt"
) TBLPROPERTIES ("hbase.table.name" = "itcast_adm_personas_hbase_20170101",
"hbase.mapred.output.outputtable"="itcast_adm_personas_hbase_20170101"
) ;
//把用户画像宽表的数据导入到 Hive中的 hive/hbase 的映射关联表 中,然后就可以查询到 hbase中的 hive/hbase 的映射关联表 也有数据了
//Hive表 加载数据,数据来源于另一张hive表hive_source(此时必须启动yarn集群)
insert overwrite table adm.itcast_adm_personas_hbase select * from adm.itcast_adm_personas;
=============== phoenix建立映射表 ==============
create VIEW "itcast_adm_personas_hbase"(
"user_id" varchar(100) primary key,
"basicInfo"."user_name" VARCHAR(100),
"basicInfo"."user_sex" VARCHAR(100),
"basicInfo"."user_birthday" VARCHAR(100),
"basicInfo"."user_age" VARCHAR(100),
"basicInfo"."constellation" VARCHAR(100),
"basicInfo"."province" VARCHAR(100),
"basicInfo"."city" VARCHAR(100),
"basicInfo"."city_level" VARCHAR(100),
"basicInfo"."hex_mail" VARCHAR(100),
"basicInfo"."op_mail" VARCHAR(100),
"basicInfo"."hex_phone" VARCHAR(100),
"basicInfo"."fore_phone" VARCHAR(100),
"basicInfo"."op_phone" VARCHAR(100),
"basicInfo"."add_time" VARCHAR(100),
"basicInfo"."login_ip" VARCHAR(100),
"basicInfo"."login_source" VARCHAR(100),
"basicInfo"."request_user" VARCHAR(100),
"basicInfo"."total_mark" VARCHAR(100),
"basicInfo"."used_mark" VARCHAR(100),
"basicInfo"."level_name" VARCHAR(100),
"basicInfo"."blacklist" VARCHAR(100),
"basicInfo"."is_married" VARCHAR(100),
"basicInfo"."education" VARCHAR(100),
"basicInfo"."monthly_money" VARCHAR(100),
"basicInfo"."profession" VARCHAR(100),
"basicInfo"."sex_model" VARCHAR(100),
"basicInfo"."is_pregnant_woman" VARCHAR(100),
"basicInfo"."is_have_children" VARCHAR(100),
"basicInfo"."children_sex_rate" VARCHAR(100),
"basicInfo"."children_age_rate" VARCHAR(100),
"basicInfo"."is_have_car" VARCHAR(100),
"basicInfo"."potential_car_user_rate" VARCHAR(100),
"basicInfo"."phone_brand" VARCHAR(100),
"basicInfo"."phone_brand_level" VARCHAR(100),
"basicInfo"."phone_cnt" VARCHAR(100),
"basicInfo"."change_phone_rate" VARCHAR(100),
"basicInfo"."majia_flag" VARCHAR(100),
"basicInfo"."majie_account_cnt" VARCHAR(100),
"basicInfo"."loyal_model" VARCHAR(100),
"basicInfo"."shopping_type_model" VARCHAR(100),
"basicInfo"."figure_model" VARCHAR(100),
"basicInfo"."stature_model" VARCHAR(100),
"order"."first_order_time" VARCHAR(100),
"order"."last_order_time" VARCHAR(100),
"order"."first_order_ago" VARCHAR(100),
"order"."last_order_ago" VARCHAR(100),
"order"."month1_hg_order_cnt" VARCHAR(100),
"order"."month1_hg_order_amt" VARCHAR(100),
"order"."month2_hg_order_cnt" VARCHAR(100),
"order"."month2_hg_order_amt" VARCHAR(100),
"order"."month3_hg_order_cnt" VARCHAR(100),
"order"."month3_hg_order_amt" VARCHAR(100),
"order"."month1_order_cnt" VARCHAR(100),
"order"."month1_order_amt" VARCHAR(100),
"order"."month2_order_cnt" VARCHAR(100),
"order"."month2_order_amt" VARCHAR(100),
"order"."month3_order_cnt" VARCHAR(100),
"order"."month3_order_amt" VARCHAR(100),
"order"."max_order_amt" VARCHAR(100),
"order"."min_order_amt" VARCHAR(100),
"order"."total_order_cnt" VARCHAR(100),
"order"."total_order_amt" VARCHAR(100),
"order"."user_avg_amt" VARCHAR(100),
"order"."month3_user_avg_amt" VARCHAR(100),
"order"."common_address" VARCHAR(100),
"order"."common_paytype" VARCHAR(100),
"order"."month1_cart_cnt" VARCHAR(100),
"order"."month1_cart_goods_cnt" VARCHAR(100),
"order"."month1_cart_submit_cnt" VARCHAR(100),
"order"."month1_cart_rate" VARCHAR(100),
"order"."month1_cart_cancle_cnt" VARCHAR(100),
"order"."return_cnt" VARCHAR(100),
"order"."return_amt" VARCHAR(100),
"order"."reject_cnt" VARCHAR(100),
"order"."reject_amt" VARCHAR(100),
"order"."last_return_time" VARCHAR(100),
"order"."school_order_cnt" VARCHAR(100),
"order"."company_order_cnt" VARCHAR(100),
"order"."home_order_cnt" VARCHAR(100),
"order"."forenoon_order_cnt" VARCHAR(100),
"order"."afternoon_order_cnt" VARCHAR(100),
"order"."night_order_cnt" VARCHAR(100),
"order"."morning_order_cnt" VARCHAR(100),
"category"."first_category_id" VARCHAR(100),
"category"."first_category_name" VARCHAR(100),
"category"."second_category_id" VARCHAR(100),
"category"."second_catery_name" VARCHAR(100),
"category"."third_category_id" VARCHAR(100),
"category"."third_category_name" VARCHAR(100),
"category"."month1_category_cnt" VARCHAR(100),
"category"."month1_category_amt" VARCHAR(100),
"category"."month3_category_cnt" VARCHAR(100),
"category"."month3_category_amt" VARCHAR(100),
"category"."month6_category_cnt" VARCHAR(100),
"category"."month6_category_amt" VARCHAR(100),
"category"."total_category_cnt" VARCHAR(100),
"category"."total_category_amt" VARCHAR(100),
"category"."month1_cart_category_cnt" VARCHAR(100),
"category"."month3_cart_category_cnt" VARCHAR(100),
"category"."month6_cart_category_cnt" VARCHAR(100),
"category"."total_cart_category_cnt" VARCHAR(100),
"category"."last_category_time" VARCHAR(100),
"category"."last_category_ago" VARCHAR(100),
"visit"."latest_pc_visit_date" VARCHAR(100),
"visit"."latest_app_visit_date" VARCHAR(100),
"visit"."latest_pc_visit_session" VARCHAR(100),
"visit"."latest_pc_cookies" VARCHAR(100),
"visit"."latest_pc_pv" VARCHAR(100),
"visit"."latest_pc_browser_name" VARCHAR(100),
"visit"."latest_pc_visit_os" VARCHAR(100),
"visit"."latest_app_name" VARCHAR(100),
"visit"."latest_app_visit_os" VARCHAR(100),
"visit"."latest_visit_ip" VARCHAR(100),
"visit"."latest_city" VARCHAR(100),
"visit"."latest_province" VARCHAR(100),
"visit"."first_pc_visit_date" VARCHAR(100),
"visit"."first_app_visit_date" VARCHAR(100),
"visit"."first_pc_visit_session" VARCHAR(100),
"visit"."first_pc_cookies" VARCHAR(100),
"visit"."first_pc_pv" VARCHAR(100),
"visit"."first_pc_browser_name" VARCHAR(100),
"visit"."first_pc_visit_os" VARCHAR(100),
"visit"."first_app_name" VARCHAR(100),
"visit"."first_app_visit_os" VARCHAR(100),
"visit"."first_visit_ip" VARCHAR(100),
"visit"."first_city" VARCHAR(100),
"visit"."first_province" VARCHAR(100),
"visit"."day7_app_cnt" VARCHAR(100),
"visit"."day15_app_cnt" VARCHAR(100),
"visit"."month1_app_cnt" VARCHAR(100),
"visit"."month2_app_cnt" VARCHAR(100),
"visit"."month3_app_cnt" VARCHAR(100),
"visit"."day7_pc_cnt" VARCHAR(100),
"visit"."day15_pc_cnt" VARCHAR(100),
"visit"."month1_pc_cnt" VARCHAR(100),
"visit"."month2_pc_cnt" VARCHAR(100),
"visit"."month3_pc_cnt" VARCHAR(100),
"visit"."month1_pc_days" VARCHAR(100),
"visit"."month1_pc_pv" VARCHAR(100),
"visit"."month1_pc_avg_pv" VARCHAR(100),
"visit"."month1_pc_diff_ip_cnt" VARCHAR(100),
"visit"."month1_pc_diff_cookie_cnt" VARCHAR(100),
"visit"."month1_pc_common_ip" VARCHAR(100),
"visit"."month1_pc_common_cookie" VARCHAR(100),
"visit"."month1_pc_common_browser_name" VARCHAR(100),
"visit"."month1_pc_common_os" VARCHAR(100),
"visit"."month1_hour025_cnt" VARCHAR(100),
"visit"."month1_hour627_cnt" VARCHAR(100),
"visit"."month1_hour829_cnt" VARCHAR(100),
"visit"."month1_hour10212_cnt" VARCHAR(100),
"visit"."month1_hour13214_cnt" VARCHAR(100),
"visit"."month1_hour15217_cnt" VARCHAR(100),
"visit"."month1_hour18219_cnt" VARCHAR(100),
"visit"."month1_hour20221_cnt" VARCHAR(100),
"visit"."month1_hour22223_cnt" VARCHAR(100)
);
=============== 通过 sh 脚本 创建 phoenix中的映射表 ==============
#!/bin/sh
#获取昨天的时间
yesterday=`date -d '-1 day' "+%Y%m%d"`
#指定运行哪天的数据
if [ $1 ];then
yesterday=$1
fi
table=itcast_adm_personas_hbase
touch $table.sql
echo 'create VIEW "'$table'"(
"user_id" varchar(100) primary key,
"basicInfo"."user_name" VARCHAR(100),
"basicInfo"."user_sex" VARCHAR(100),
"basicInfo"."user_birthday" VARCHAR(100),
"basicInfo"."user_age" VARCHAR(100),
"basicInfo"."constellation" VARCHAR(100),
"basicInfo"."province" VARCHAR(100),
"basicInfo"."city" VARCHAR(100),
"basicInfo"."city_level" VARCHAR(100),
"basicInfo"."hex_mail" VARCHAR(100),
"basicInfo"."op_mail" VARCHAR(100),
"basicInfo"."hex_phone" VARCHAR(100),
"basicInfo"."fore_phone" VARCHAR(100),
"basicInfo"."op_phone" VARCHAR(100),
"basicInfo"."add_time" VARCHAR(100),
"basicInfo"."login_ip" VARCHAR(100),
"basicInfo"."login_source" VARCHAR(100),
"basicInfo"."request_user" VARCHAR(100),
"basicInfo"."total_mark" VARCHAR(100),
"basicInfo"."used_mark" VARCHAR(100),
"basicInfo"."level_name" VARCHAR(100),
"basicInfo"."blacklist" VARCHAR(100),
"basicInfo"."is_married" VARCHAR(100),
"basicInfo"."education" VARCHAR(100),
"basicInfo"."monthly_money" VARCHAR(100),
"basicInfo"."profession" VARCHAR(100),
"basicInfo"."sex_model" VARCHAR(100),
"basicInfo"."is_pregnant_woman" VARCHAR(100),
"basicInfo"."is_have_children" VARCHAR(100),
"basicInfo"."children_sex_rate" VARCHAR(100),
"basicInfo"."children_age_rate" VARCHAR(100),
"basicInfo"."is_have_car" VARCHAR(100),
"basicInfo"."potential_car_user_rate" VARCHAR(100),
"basicInfo"."phone_brand" VARCHAR(100),
"basicInfo"."phone_brand_level" VARCHAR(100),
"basicInfo"."phone_cnt" VARCHAR(100),
"basicInfo"."change_phone_rate" VARCHAR(100),
"basicInfo"."majia_flag" VARCHAR(100),
"basicInfo"."majie_account_cnt" VARCHAR(100),
"basicInfo"."loyal_model" VARCHAR(100),
"basicInfo"."shopping_type_model" VARCHAR(100),
"basicInfo"."figure_model" VARCHAR(100),
"basicInfo"."stature_model" VARCHAR(100),
"order"."first_order_time" VARCHAR(100),
"order"."last_order_time" VARCHAR(100),
"order"."first_order_ago" VARCHAR(100),
"order"."last_order_ago" VARCHAR(100),
"order"."month1_hg_order_cnt" VARCHAR(100),
"order"."month1_hg_order_amt" VARCHAR(100),
"order"."month2_hg_order_cnt" VARCHAR(100),
"order"."month2_hg_order_amt" VARCHAR(100),
"order"."month3_hg_order_cnt" VARCHAR(100),
"order"."month3_hg_order_amt" VARCHAR(100),
"order"."month1_order_cnt" VARCHAR(100),
"order"."month1_order_amt" VARCHAR(100),
"order"."month2_order_cnt" VARCHAR(100),
"order"."month2_order_amt" VARCHAR(100),
"order"."month3_order_cnt" VARCHAR(100),
"order"."month3_order_amt" VARCHAR(100),
"order"."max_order_amt" VARCHAR(100),
"order"."min_order_amt" VARCHAR(100),
"order"."total_order_cnt" VARCHAR(100),
"order"."total_order_amt" VARCHAR(100),
"order"."user_avg_amt" VARCHAR(100),
"order"."month3_user_avg_amt" VARCHAR(100),
"order"."common_address" VARCHAR(100),
"order"."common_paytype" VARCHAR(100),
"order"."month1_cart_cnt" VARCHAR(100),
"order"."month1_cart_goods_cnt" VARCHAR(100),
"order"."month1_cart_submit_cnt" VARCHAR(100),
"order"."month1_cart_rate" VARCHAR(100),
"order"."month1_cart_cancle_cnt" VARCHAR(100),
"order"."return_cnt" VARCHAR(100),
"order"."return_amt" VARCHAR(100),
"order"."reject_cnt" VARCHAR(100),
"order"."reject_amt" VARCHAR(100),
"order"."last_return_time" VARCHAR(100),
"order"."school_order_cnt" VARCHAR(100),
"order"."company_order_cnt" VARCHAR(100),
"order"."home_order_cnt" VARCHAR(100),
"order"."forenoon_order_cnt" VARCHAR(100),
"order"."afternoon_order_cnt" VARCHAR(100),
"order"."night_order_cnt" VARCHAR(100),
"order"."morning_order_cnt" VARCHAR(100),
"category"."first_category_id" VARCHAR(100),
"category"."first_category_name" VARCHAR(100),
"category"."second_category_id" VARCHAR(100),
"category"."second_catery_name" VARCHAR(100),
"category"."third_category_id" VARCHAR(100),
"category"."third_category_name" VARCHAR(100),
"category"."month1_category_cnt" VARCHAR(100),
"category"."month1_category_amt" VARCHAR(100),
"category"."month3_category_cnt" VARCHAR(100),
"category"."month3_category_amt" VARCHAR(100),
"category"."month6_category_cnt" VARCHAR(100),
"category"."month6_category_amt" VARCHAR(100),
"category"."total_category_cnt" VARCHAR(100),
"category"."total_category_amt" VARCHAR(100),
"category"."month1_cart_category_cnt" VARCHAR(100),
"category"."month3_cart_category_cnt" VARCHAR(100),
"category"."month6_cart_category_cnt" VARCHAR(100),
"category"."total_cart_category_cnt" VARCHAR(100),
"category"."last_category_time" VARCHAR(100),
"category"."last_category_ago" VARCHAR(100),
"visit"."latest_pc_visit_date" VARCHAR(100),
"visit"."latest_app_visit_date" VARCHAR(100),
"visit"."latest_pc_visit_session" VARCHAR(100),
"visit"."latest_pc_cookies" VARCHAR(100),
"visit"."latest_pc_pv" VARCHAR(100),
"visit"."latest_pc_browser_name" VARCHAR(100),
"visit"."latest_pc_visit_os" VARCHAR(100),
"visit"."latest_app_name" VARCHAR(100),
"visit"."latest_app_visit_os" VARCHAR(100),
"visit"."latest_visit_ip" VARCHAR(100),
"visit"."latest_city" VARCHAR(100),
"visit"."latest_province" VARCHAR(100),
"visit"."first_pc_visit_date" VARCHAR(100),
"visit"."first_app_visit_date" VARCHAR(100),
"visit"."first_pc_visit_session" VARCHAR(100),
"visit"."first_pc_cookies" VARCHAR(100),
"visit"."first_pc_pv" VARCHAR(100),
"visit"."first_pc_browser_name" VARCHAR(100),
"visit"."first_pc_visit_os" VARCHAR(100),
"visit"."first_app_name" VARCHAR(100),
"visit"."first_app_visit_os" VARCHAR(100),
"visit"."first_visit_ip" VARCHAR(100),
"visit"."first_city" VARCHAR(100),
"visit"."first_province" VARCHAR(100),
"visit"."day7_app_cnt" VARCHAR(100),
"visit"."day15_app_cnt" VARCHAR(100),
"visit"."month1_app_cnt" VARCHAR(100),
"visit"."month2_app_cnt" VARCHAR(100),
"visit"."month3_app_cnt" VARCHAR(100),
"visit"."day7_pc_cnt" VARCHAR(100),
"visit"."day15_pc_cnt" VARCHAR(100),
"visit"."month1_pc_cnt" VARCHAR(100),
"visit"."month2_pc_cnt" VARCHAR(100),
"visit"."month3_pc_cnt" VARCHAR(100),
"visit"."month1_pc_days" VARCHAR(100),
"visit"."month1_pc_pv" VARCHAR(100),
"visit"."month1_pc_avg_pv" VARCHAR(100),
"visit"."month1_pc_diff_ip_cnt" VARCHAR(100),
"visit"."month1_pc_diff_cookie_cnt" VARCHAR(100),
"visit"."month1_pc_common_ip" VARCHAR(100),
"visit"."month1_pc_common_cookie" VARCHAR(100),
"visit"."month1_pc_common_browser_name" VARCHAR(100),
"visit"."month1_pc_common_os" VARCHAR(100),
"visit"."month1_hour025_cnt" VARCHAR(100),
"visit"."month1_hour627_cnt" VARCHAR(100),
"visit"."month1_hour829_cnt" VARCHAR(100),
"visit"."month1_hour10212_cnt" VARCHAR(100),
"visit"."month1_hour13214_cnt" VARCHAR(100),
"visit"."month1_hour15217_cnt" VARCHAR(100),
"visit"."month1_hour18219_cnt" VARCHAR(100),
"visit"."month1_hour20221_cnt" VARCHAR(100),
"visit"."month1_hour22223_cnt" VARCHAR(100)
);' > $table.sql
/root/phoenix/bin/psql.py node1,node2,node3:2181 $table.sql
========= 把 mysql中的 用户表/订单表 数据 导入到 Hive中 =========
---------------- MySQL----------------
CREATE DATABASE IF NOT EXISTS adm default charset utf8 COLLATE utf8_general_ci;
use adm;
CREATE TABLE `userPortraitComplete` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`insert_update_date` varchar(10) NOT NULL DEFAULT '' COMMENT '插入或更新时间',
`user_id` varchar(32) NOT NULL DEFAULT '' COMMENT '用户编号',
`username` varchar(100) NOT NULL DEFAULT '' COMMENT '用户名',
`consume_prices` double NOT NULL DEFAULT 0 COMMENT '至今消费金额(所有订单金额总和)',
`consume_numbers` int(11) NOT NULL DEFAULT 0 COMMENT '至今消费次数(订单数)',
`product_name` varchar(255) NOT NULL DEFAULT '' COMMENT '商品名',
`product_id` varchar(50) NOT NULL DEFAULT '' COMMENT '商品编号',
`product_numbers` int(11) NOT NULL DEFAULT 0 COMMENT '购买数量',
`mobile` varchar(50) NOT NULL DEFAULT '' COMMENT '手机号',
`province` varchar(100) NOT NULL DEFAULT '' COMMENT '省份',
`city` varchar(100) NOT NULL DEFAULT '' COMMENT '城市',
PRIMARY KEY (`id`),
KEY `userID_consumePrices` (`user_id`,`consume_prices`),
KEY `userID_consumeNumbers` (`user_id`,`consume_numbers`),
KEY `insertUpdateDate` (`insert_update_date`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='完整用户画像';
source /root/userPortraitComplete.sql;
现在Hive中创建 adm.rimengshe_adm_personas_complete_temp,再把mysql表userPortraitComplete数据导入到Hive表adm.rimengshe_adm_personas_complete_temp
sqoop import \
--connect jdbc:mysql://192.168.20.41:3306/adm \
--username root \
--password admin \
--table userPortraitComplete \
--hive-table adm.rimengshe_adm_personas_complete_temp \
--hive-import \
--m 1
----------------Hive----------------
select * from adm.rimengshe_adm_personas_complete_temp;
select * from adm.rimengshe_adm_personas_complete;
create database if not exists adm;
drop table if exists adm.rimengshe_adm_personas_complete_temp;
create table if not exists adm.rimengshe_adm_personas_complete_temp(
id bigint,
insert_update_date string,
user_id string,
username string,
consume_prices double,
consume_numbers bigint,
product_name string,
product_id string,
product_numbers bigint,
mobile string,
province string,
city string);
drop table if exists adm.rimengshe_adm_personas_complete;
create table if not exists adm.rimengshe_adm_personas_complete(
user_id string,
user_name string,
user_sex string,
user_birthday string,
user_age bigint,
constellation string,
province string,
city string,
city_level string,
hex_mail string,
op_mail string,
hex_phone string,
fore_phone string,
op_phone string,
add_time timestamp,
login_ip string,
login_source string,
request_user string,
total_mark bigint,
used_mark bigint,
level_name string,
blacklist bigint,
is_married bigint,
education string,
monthly_money double,
profession string,
sex_model bigint,
is_pregnant_woman bigint,
is_have_children bigint,
children_sex_rate double,
children_age_rate double,
is_have_car bigint,
potential_car_user_rate double,
phone_brand string,
phone_brand_level string,
phone_cnt bigint,
change_phone_rate bigint,
majia_flag string,
majie_account_cnt bigint,
loyal_model bigint,
shopping_type_model bigint,
figure_model bigint,
stature_model bigint,
first_order_time timestamp,
last_order_time timestamp,
first_order_ago bigint,
last_order_ago bigint,
month1_hg_order_cnt bigint,
month1_hg_order_amt double,
month2_hg_order_cnt bigint,
month2_hg_order_amt double,
month3_hg_order_cnt bigint,
month3_hg_order_amt double,
month1_order_cnt bigint,
month1_order_amt double,
month2_order_cnt bigint,
month2_order_amt double,
month3_order_cnt bigint,
month3_order_amt double,
max_order_amt double,
min_order_amt double,
total_order_cnt bigint,
total_order_amt double,
user_avg_amt double,
month3_user_avg_amt double,
common_address string,
common_paytype string,
month1_cart_cnt bigint,
month1_cart_goods_cnt bigint,
month1_cart_submit_cnt bigint,
month1_cart_rate double,
month1_cart_cancle_cnt double,
return_cnt bigint,
return_amt double,
reject_cnt bigint,
reject_amt double,
last_return_time timestamp,
school_order_cnt bigint,
company_order_cnt bigint,
home_order_cnt bigint,
forenoon_order_cnt bigint,
afternoon_order_cnt bigint,
night_order_cnt bigint,
morning_order_cnt bigint,
first_category_id BIGINT,
first_category_name STRING,
second_category_id BIGINT,
second_catery_name STRING,
third_category_id BIGINT,
third_category_name STRING,
month1_category_cnt BIGINT,
month1_category_amt STRING,
month3_category_cnt BIGINT,
month3_category_amt STRING,
month6_category_cnt BIGINT,
month6_category_amt STRING,
total_category_cnt BIGINT,
total_category_amt STRING,
month1_cart_category_cnt BIGINT,
month3_cart_category_cnt BIGINT,
month6_cart_category_cnt BIGINT,
total_cart_category_cnt BIGINT,
last_category_time TIMESTAMP,
last_category_ago BIGINT,
latest_pc_visit_date string,
latest_app_visit_date string,
latest_pc_visit_session string,
latest_pc_cookies string,
latest_pc_pv string,
latest_pc_browser_name string,
latest_pc_visit_os string,
latest_app_name string,
latest_app_visit_os string,
latest_visit_ip string,
latest_city string,
latest_province string,
first_pc_visit_date string,
first_app_visit_date string,
first_pc_visit_session string,
first_pc_cookies string,
first_pc_pv string,
first_pc_browser_name string,
first_pc_visit_os string,
first_app_name string,
first_app_visit_os string,
first_visit_ip string,
first_city string,
first_province string,
day7_app_cnt bigint,
day15_app_cnt bigint,
month1_app_cnt bigint,
month2_app_cnt bigint,
month3_app_cnt bigint,
day7_pc_cnt bigint,
day15_pc_cnt bigint,
month1_pc_cnt bigint,
month2_pc_cnt bigint,
month3_pc_cnt bigint,
month1_pc_days bigint,
month1_pc_pv bigint,
month1_pc_avg_pv bigint,
month1_pc_diff_ip_cnt bigint,
month1_pc_diff_cookie_cnt bigint,
month1_pc_common_ip string,
month1_pc_common_cookie string,
month1_pc_common_browser_name string,
month1_pc_common_os string,
month1_hour025_cnt bigint,
month1_hour627_cnt bigint,
month1_hour829_cnt bigint,
month1_hour10212_cnt bigint,
month1_hour13214_cnt bigint,
month1_hour15217_cnt bigint,
month1_hour18219_cnt bigint,
month1_hour20221_cnt bigint,
month1_hour22223_cnt bigint);
insert into adm.rimengshe_adm_personas_complete(user_id,user_name,total_order_amt,total_order_cnt,hex_phone,province,city) select user_id,username,consume_prices,consume_numbers,mobile,province,city from adm.rimengshe_adm_personas_complete_temp;