将手机上网日志文件批量导入到Hbase中,操作步骤:
1、将日志文件(请下载附件)上传到HDFS中,利用hadoop的操作命令上传:hadoop fs -put input /
2、创建Hbase表,通过Java操作
1. package
2.
3. import
4.
5. import
6. import
7. import
8. import
9. import
10. import
11. import
12. import
13. import
14. import
15. import
16. import
17.
18. public class
19.
20. public static void main(String[] args) throws
21. "wlan_log";
22. "cf";
23.
24. HbaseDemo.create(tableName, columnFamily);
25.
26. // HbaseDemo.put(tableName, "row1", columnFamily, "cl1", "data");
27. // HbaseDemo.get(tableName, "row1");
28. // HbaseDemo.scan(tableName);
29. // HbaseDemo.delete(tableName);
30. }
31.
32. // hbase操作必备
33. private static
34. Configuration conf = HBaseConfiguration.create();
35. "hbase.rootdir", "hdfs://hadoop1:9000/hbase");
36. // 使用eclipse时必须添加这个,否则无法定位
37. "hbase.zookeeper.quorum", "hadoop1");
38. return
39. }
40.
41. // 创建一张表
42. public static void
43. throws
44. new
45. if
46. "table exists!");
47. else
48. new
49. new
50. admin.createTable(tableDesc);
51. "create table success!");
52. }
53. }
54.
55. // 添加一条记录
56. public static void
57. throws
58. new
59. new
60. p1.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes
61. .toBytes(data));
62. table.put(p1);
63. "put'" + row + "'," + columnFamily + ":"
64. "','" + data + "'");
65. }
66.
67. // 读取一条记录
68. public static void get(String tableName, String row) throws
69. new
70. new
71. Result result = table.get(get);
72. "Get: "
73. }
74.
75. // 显示所有数据
76. public static void scan(String tableName) throws
77. new
78. new
79. ResultScanner scanner = table.getScanner(scan);
80. for
81. "Scan: "
82. }
83. }
84.
85. // 删除表
86. public static void delete(String tableName) throws
87. new
88. if
89. try
90. admin.disableTable(tableName);
91. admin.deleteTable(tableName);
92. catch
93. e.printStackTrace();
94. "Delete " + tableName + " 失败");
95. }
96. }
97. "Delete " + tableName + " 成功");
98. }
99.
100. }
3、将日志文件导入Hbase表wlan_log中:
1. import
2. import
3.
4. import
5. import
6. import
7. import
8. import
9. import
10. import
11. import
12. import
13. import
14. import
15. import
16. import
17.
18. public class
19.
20. public static void main(String[] args) throws
21. final Configuration configuration = new
22. // 设置zookeeper
23. "hbase.zookeeper.quorum", "hadoop1");
24.
25. // 设置hbase表名称
26. "wlan_log");
27.
28. // 将该值改大,防止hbase超时退出
29. "dfs.socket.timeout", "180000");
30.
31. final Job job = new Job(configuration, "HBaseBatchImport");
32.
33. class);
34. class);
35. // 设置map的输出,不设置reduce的输出类型
36. class);
37. class);
38.
39. class);
40. // 不再设置输出路径,而是设置输出格式类型
41. class);
42.
43. "hdfs://hadoop1:9000/input");
44.
45. true);
46. }
47.
48. static class BatchImportMapper extends
49. Mapper<LongWritable, Text, LongWritable, Text> {
50. new SimpleDateFormat("yyyyMMddHHmmss");
51. new
52.
53. protected void
54. throws
55. final String[] splited = value.toString().split("\t");
56. try
57. final Date date = new Date(Long.parseLong(splited[0].trim()));
58. final
59. 1] + ":"
60. "\t"
61. context.write(key, v2);
62. catch
63. final Counter counter = context.getCounter("BatchImport",
64. "ErrorFormat");
65. counter.increment(1L);
66. "出错了" + splited[0] + " "
67. }
68. };
69. }
70.
71. static class BatchImportReducer extends
72. TableReducer<LongWritable, Text, NullWritable> {
73. protected void
74. java.lang.Iterable<Text> values, Context context)
75. throws
76. for
77. final String[] splited = text.toString().split("\t");
78.
79. final Put put = new Put(Bytes.toBytes(splited[0]));
80. "cf"), Bytes.toBytes("date"), Bytes
81. 1]));
82. // 省略其他字段,调用put.add(....)即可
83. context.write(NullWritable.get(), put);
84. }
85. };
86. }
87.
88. }
4、查看导入结果: