1.准备Hbase数据
在hbase中创建表
create 'mingxing',{NAME => 'base_info',VERSIONS => 1},{NAME => 'extra_info',VERSIONS => 1}
2.插入数据
put 'mingxing','rk001','base_info:name','huangbo'
put 'mingxing','rk001','base_info:age','33'
put 'mingxing','rk001','extra_info:math','44'
put 'mingxing','rk001','extra_info:province','beijing'
put 'mingxing','rk002','base_info:name','xuzheng'
put 'mingxing','rk002','base_info:age','44'
put 'mingxing','rk003','base_info:name','wangbaoqiang'
put 'mingxing','rk003','base_info:age','55'
put 'mingxing','rk003','base_info:gender','male'
put 'mingxing','rk004','extra_info:math','33'
put 'mingxing','rk004','extra_info:province','tianjin'
put 'mingxing','rk004','extra_info:children','3'
put 'mingxing','rk005','base_info:name','liutao'
put 'mingxing','rk006','extra_info:name','liujialing'
3.添加zk的访问路径
在hive中修改
set hbase.zookeeper.quorum=hadoop01:2181,hadoop02:2181,hadoop03:2181;
4.设置hbase在zk中的根目录
set zookeeper.znode.parent=/hbase;
5.添加jar包
将jar包添加到hive的classpath下
add jar /home/hadoop/apps/apache-hive-2.3.2-bin/lib/hive-hbase-handler-2.3.2.jar;
list jar|jars;
6.建表
-
serdeproperties:指定解析的属性
-
hbase.columns.mapping:指定对应关系
- base_info:解析hbase的整个base_info的列族
- :key:获取行健的值
-
tblproperties:指定表的属性
- hbase.table.name:指定需要解析的hbase的表名
6.1整个表的解析
create external table mingxing(rowkey string, base_info map<string, string>, extra_info
map<string, string>)
row format delimited fields terminated by '\t'
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties ("hbase.columns.mapping" = ":key,base_info:,extra_info:")
tblproperties ("hbase.table.name" = "mingxing");
检查数据
select * from mingxing;
结果如下
mingxing.rowkey mingxing.base_info mingxing.extra_info
rk001 {"age":"33","name":"huangbo"} {"math":"44","province":"beijing"}
rk002 {"age":"44","name":"xuzheng"} {}
rk003 {"age":"55","gender":"male","name":"wangbaoqiang"} {}
rk004 {} {"children":"3","math":"33","province":"tianjin"}
rk005 {"name":"liutao"} {}
rk006 {} {"name":"liujialing"}
6.2解析部分字段
create external table mingxing01(rowkey string,name string,age string,province string)
row format delimited fields terminated by '\t'
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties ("hbase.columns.mapping" = ":key,base_info:name,base_info:age,extra_info:province")
tblproperties ("hbase.table.name" = "mingxing");
检查数据
select * from mingxing01;
结果如下
mingxing01.rowkey mingxing01.name mingxing01.age mingxing01.province
rk001 huangbo 33 beijing
rk002 xuzheng 44 NULL
rk003 wangbaoqiang 55 NULL
rk004 NULL NULL tianjin
rk005 liutao NULL NULL