本代码基于以下版本
JDK版本1.8
Hadoop版本3.1.3
Zookeeper版本3.5.7
HBase版本2.3.5
1.创建一个Maven工程
导入依赖
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>2.3.5</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>3.1.3</version>
</dependency>
2.在main文件夹下创建资源文件夹,将log4j.properties配置文件放入资源文件夹,log4j.properies配置信息如下
log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=log/hd.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
3.编写java代码(表结构创建和删除、单条和批量插入数据)
注意点:删除表操作前需要先将表禁用,删除命名空间前需要将命名空间下的表删除后才可以执行删除命名空间操作
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class Java2HBase {
//加载配置信息
private static Configuration config = null;
public static void init(String...items) {
config = HBaseConfiguration.create();
for (String item : items) {
String[] ps = item.split("=");
config.set(ps[0],ps[1]);
}
}
//该类用于集中释放资源,方便调用
private static void close(AutoCloseable...closes){
for (AutoCloseable close : closes) {
try {
close.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
//创建与HBase的连接对象
private static Connection con() throws IOException {
return ConnectionFactory.createConnection(config);
}
//创建管理员,执行执行管理员任务
private static Admin admin(Connection con) throws IOException {
return con.getAdmin();
}
//判断命名空间是否存在,存在为true,不存在为false
private static boolean nameSpaceExists(String nameSpace,String[] nss){
for (String ns : nss) {
if (nameSpace.equals(ns)) {
return true;
}
}
return false;
}
//向HBase创建命名空间
public static void createNameSpace(String nameSpace){
Connection con = null;
Admin admin = null;
try {
con =con();
admin=admin(con);
//判断namespace命名空间是否存在,存在抛异常,不存在则创建
if (nameSpaceExists(nameSpace,admin.listNamespaces())){
throw new IOException("nameSpace [ "+nameSpace+" ] created in failure for existence");
}
admin.createNamespace(NamespaceDescriptor.create(nameSpace).build());
System.out.println("nameSpace [ "+nameSpace+" ] created in success");
} catch (IOException e) {
e.printStackTrace();
System.out.println("nameSpace [ "+nameSpace+" ] created in failure");
}finally {
//释放资源
close(admin,con);
}
}
//创建表,String columnFamily,String...columnFamilies 写法,保证列簇至少有一个
public static void createTable(String tableName,String columnFamily,String...columnFamilies){
Connection con = null;
Admin admin = null;
try {
con = con();
admin = admin(con);
TableName tn = TableName.valueOf(tableName);
//验证表是否已存在,存在则抛异常
if (admin.tableExists(tn)){
throw new IOException("table [ "+tableName+" ] created in failure for existence");
}
//根据表名创建表描述构造器
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tn);
//创建列簇集合
List<ColumnFamilyDescriptor> list = new ArrayList<>();
list.add(ColumnFamilyDescriptorBuilder.of(columnFamily));
for (String family : columnFamilies) {
list.add(ColumnFamilyDescriptorBuilder.of(family));
}
//向表描述器中添加列簇
builder.setColumnFamilies(list);
//创建表
admin.createTable(builder.build());
System.out.println("table [ "+tableName+" ] created successfully");
} catch (IOException e) {
e.printStackTrace();
}finally {
//释放资源
close(admin,con);
}
}
public static void dropTable(String tableName){
Connection con = null;
Admin admin = null;
try {
con = con();
admin = admin(con);
TableName tn = TableName.valueOf(tableName);
//验证表是否存在,不存在则抛异常
if (!admin.tableExists(tn)){
throw new IOException("table [ "+tableName+" ] dropped in failure for absence");
}
//验证表是否被禁用,未被禁用则禁用该表
if (admin.isTableEnabled(tn)){
admin.disableTable(tn);
System.out.println("table [ "+tableName+" ] [enabled] and now is disabled");
}
//删除表
admin.deleteTable(tn);
System.out.println("table [ " +tableName+" ] dropped successfully");
} catch (IOException e) {
e.printStackTrace();
}finally {
//释放资源
close(admin,con);
}
//删除命名空间:需要先将该命名空间下的表禁用后删除,才可以删除命名空间
public static void dropNameSpace(String nameSpace){
Connection con = null;
Admin admin = null;
try {
con = con();
admin = admin(con);
if (!nameSpaceExists(nameSpace,admin.listNamespaces())) {
throw new IOException("nameSpace [ "+nameSpace+" ] dropped in failure for absence");
}
//创建命名空间内表的匹配规则
Pattern compile = Pattern.compile(nameSpace+".*?");
//得到命名空间下所有表的集合
List<TableDescriptor> tns = admin.listTableDescriptors(compile);
//遍历所有表,如果启用状态则禁用,禁用后删除表
for (TableDescriptor tn : tns) {
/*TableName tableName = tn.getTableName();
if (admin.isTableEnabled(tableName)) {
admin.disableTable(tableName);
}
admin.deleteTable(tableName);*/
//调用上面dropTableName方法
String tableName = tn.getTableName().toString();
dropTable(tableName);
}
//验证表是否存在
private static boolean tableExists(Connection con,TableName tableName){
Admin admin = null;
try {
admin = admin(con);
return admin.tableExists(tableName);
} catch (IOException e) {
e.printStackTrace();
return false;
}finally {
close(admin);
}
}
//单条插入
public static void put(String tableName,String rowKey,String family,String column,String value){
String msg = "put [ rowKey "+rowKey+" => "+family+" => "+column+" => value("+value+") ] into table [ "+tableName+" ]";
TableName tn = TableName.valueOf(tableName);
Connection con = null;
Table table = null;
try {
con = con();
if (!tableExists(con,tn)){
throw new IOException("table [ "+tableName+" ] not exist error");
}
table = con.getTable(tn);
//构造带有行键的Put对象
Put put = new Put(Bytes.toBytes(rowKey));
put.addColumn(Bytes.toBytes(family),Bytes.toBytes(column),Bytes.toBytes(value));
table.put(put);
System.err.println(msg+"in success");
} catch (IOException e) {
e.printStackTrace();
System.err.println(msg+"in failure");
}finally {
close(table,con);
}
}
/**
* 将file路径指向的文件数据映射到hbase
* 文件名即表名,为了防止命名冲突:tableName_timestamp
* 文件的首行为表结构; :key,cf:col,...
* @param file 指定批量插入的数据文件路径
* @param regexSep 指定分隔符
*/
public static void putBatch(String file,String regexSep){
File data = new File(file);
Connection con = null;
BufferedReader br = null;
BufferedMutator mutator =null;
try {
//输入文件验证
if (!data.exists() || !data.isFile()) {
throw new IOException(file + " not exist or file error");
}
String[] ns = data.getName().split("_|\\.");
String tableName = ns[0]+":"+ns[1];
TableName tn = TableName.valueOf(tableName);
con = con();
//验证hbase表是否存在
if (!tableExists(con,tn)){
throw new IOException("hbase table [ "+tableName+" ] not exist error");
}
//通过文件首行解析hbase表结构
br = new BufferedReader(new FileReader(data));
String line = null;
if(null == (line = br.readLine())){
throw new IOException("file [ "+file+" ] empty error");
}
String[] ps = line.split(regexSep);
//创建批量插入异常侦听
DateTimeFormatter dtf = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
BufferedMutator.ExceptionListener listener = (e,_mutator)->{
System.err.println("put data into table [ "+tableName+" ] error "
+e.getNumExceptions()+" rows,retry put at "+dtf.format(LocalDateTime.now()));
int count = 0;
for (int i = 0; i < e.getNumExceptions(); i++) {
Row row = e.getRow(i);
try {
_mutator.mutate((Put)row);
count++;
} catch (IOException ex) {
ex.printStackTrace();
System.err.println("retry put "+row+" error, please check it");
}
}
System.err.println("retry data into table [ "+tableName+" ] from error total"
+e.getNumExceptions()+" rows, finish "+count+" rows, at "+dtf.format(LocalDateTime.now()));
};
//1.设置缓存大小 2.绑定侦听器
BufferedMutatorParams bmp = new BufferedMutatorParams(tn)
.writeBufferSize(8*1024*1024)
.listener(listener);
mutator = con.getBufferedMutator(bmp);
int count = 0,CAPACITY = 1000;
Put put = null;
List<Put> list = new ArrayList<>(CAPACITY);
while (null != (line = br.readLine())){
String[] arr = line.split(regexSep);
put = new Put(Bytes.toBytes(arr[0]));
for (int i = 1; i < ps.length; i++) {
String[] ts = ps[i].split(":");
put.addColumn(Bytes.toBytes(ts[0]),Bytes.toBytes(ts[1]),Bytes.toBytes(arr[i]));
}
list.add(put);
//达到设定行数时向表中添加
if (list.size()==CAPACITY){
count += list.size();
mutator.mutate(list);
list.clear();
}
}
//出循环后再次溢写
count +=list.size();
mutator.mutate(list);
list.clear();
System.err.println("batch put into [ "+tableName+" , "+count+" rows ] from [ "+file+" ] in success");
} catch (IOException e) {
e.printStackTrace();
System.err.println("batch put from [ "+file+" ] in failure");
}finally {
close(br,mutator,con);
}
}
}
//测试代码,hostname为虚拟机主机名,需要在windows下配置主机别名映射,未配置情况下可使用ip地址代替
public static void main(String[] args) {
init("hbase.zookeeper.quorum=hostname");
//createNameSpace("dsj");
//createTable("dsj:test","cf1","cf2","cf3");
//dropTable("dsj:test");
putBatch("C:\\Users\\Administrator\\Desktop\\dsj_test_1624591726565",",");
}
}
4.测试数据编造代码
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.Random;
public class App
{
public static void main( String[] args ) throws IOException {
BufferedWriter bw = new BufferedWriter(new FileWriter(
new File("C:\\Users\\Administrator\\Desktop\\dsj_test_"+System.currentTimeMillis())));
bw.write(":key,cf1:name,cf2:age,cf2:pos,cf2:salary,cf3:rst");
bw.newLine();
Random random = new Random();
for (int i = 0; i < 1000; i++) {
bw.write(MessageFormat.format("zbstu{0},henry{0},{1},market clerk,{2},how are you"
, i,18+random.nextInt(20),1+random.nextInt(3)));
bw.newLine();
}
bw.close();
}
}
5.批量插入文件中数据代码(自动根据文件名创建命名空间,根据首行创建表)
/**
* 将file路径指向的文件数据映射到HBase
* 文件名即表名,为了防止命名冲突:tableName_timestamp
* 文件的首行为表结构; :key,cf:col,...
* @param file 需要批量导入至HBase的文件(相对路径/绝对路径)
* @param regexSep 指定分隔符
*/
public static void putBatch(String file,String regexSep){
File data = new File(file);
Connection con = null;
Admin admin = null;
BufferedReader br = null;
BufferedMutator mutator =null;
try {
//输入文件验证
if (!data.exists() || !data.isFile()) {
throw new IOException(file + " not exist or file error");
}
String[] ns = data.getName().split("_|\\.");
String nameSpace = ns[0];
String tableName = ns[0]+":"+ns[1];
TableName tn = TableName.valueOf(tableName);
con = con();
admin = admin(con);
//验证命名空间是否存在,不存在则创建
if (!nameSpaceExists(nameSpace,admin.listNamespaces())) {
admin.createNamespace(NamespaceDescriptor.create(nameSpace).build());
System.err.println(nameSpace+" has been created successfully");
}
//通过文件首行解析HBase表结构
br = new BufferedReader(new FileReader(data));
String line = null;
if(null == (line = br.readLine())){
throw new IOException("file [ "+file+" ] empty error");
}
String[] ps = line.split(regexSep);
//验证HBase表是否存在,不存在则创建
List<ColumnFamilyDescriptor> lists = new ArrayList<>();
if (!tableExists(con,tn)){
//根据表名创建表描述构造器
TableDescriptorBuilder bu = TableDescriptorBuilder.newBuilder(tn);
//第一个元素为主键,故i从1开始取值
for (int i = 1; i < ps.length; i++) {
//正则取出列簇
String it = ps[i].split(":")[0];
ColumnFamilyDescriptor of = ColumnFamilyDescriptorBuilder.of(Bytes.toBytes(it));
//如过不存在则添加
if (!lists.contains(of)){
lists.add(of);
}
}
//向表描述中添加列簇
bu.setColumnFamilies(lists);
//创建表
admin.createTable(bu.build());
}
//创建批量插入异常侦听
DateTimeFormatter dtf = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
BufferedMutator.ExceptionListener listener = (e,_mutator)->{
System.err.println("put data into table [ "+tableName+" ] error "
+e.getNumExceptions()+" rows,retry put at "+dtf.format(LocalDateTime.now()));
int count = 0;
for (int i = 0; i < e.getNumExceptions(); i++) {
Row row = e.getRow(i);
try {
_mutator.mutate((Put)row);
count++;
} catch (IOException ex) {
ex.printStackTrace();
System.err.println("retry put "+row+" error, please check it");
}
}
System.err.println("retry data into table [ "+tableName+" ] from error total"
+e.getNumExceptions()+" rows, finish "+count+" rows, at "+dtf.format(LocalDateTime.now()));
};
//1.设置缓存大小 2.绑定侦听器
BufferedMutatorParams bmp = new BufferedMutatorParams(tn)
.writeBufferSize(8*1024*1024)
.listener(listener);
mutator = con.getBufferedMutator(bmp);
int count = 0,CAPACITY = 1000;
Put put = null;
List<Put> list = new ArrayList<>(CAPACITY);
while (null != (line = br.readLine())){
String[] arr = line.split(regexSep);
put = new Put(Bytes.toBytes(arr[0]));
for (int i = 1; i < ps.length; i++) {
String[] ts = ps[i].split(":");
put.addColumn(Bytes.toBytes(ts[0]),Bytes.toBytes(ts[1]),Bytes.toBytes(arr[i]));
}
list.add(put);
//达到设定行数时向表中添加
if (list.size()==CAPACITY){
count += list.size();
mutator.mutate(list);
list.clear();
}
}
count +=list.size();
mutator.mutate(list);
list.clear();
System.err.println("batch put into [ "+tableName+" , "+count+" rows ] from [ "+file+" ] in success");
} catch (IOException e) {
e.printStackTrace();
System.err.println("batch put from [ "+file+" ] in failure");
}finally {
close(br,mutator,admin,con);
}
}