一、实验目的
- 理解HBase在Hadoop体系结构中的角色;
- 熟练使用HBase操作常用的Shell命令;
- 熟悉HBase操作常用的Java API。
二、实验平台
- 操作系统:Linux(建议CentOS);
- Hadoop版本:2.6.1;
- JDK版本:1.7或以上版本;
- Java IDE:IDEA。
实验内容
(1)编程实现以下指定功能,并用Hadoop提供的HBase Shell命令完成相同任务:
①列出HBase所有的表的相关信息,例如表名;
hbase(main):001:0> list
TABLE
student
test1
2 row(s)
Took 0.2874 seconds
=> ["student", "test1"]
②在终端打印出指定的表的所有记录数据;
hbase(main):010:0> scan 'test1'
ROW COLUMN+CELL
001 column=f1:test, timestamp=2021-10-17T14:50:59.163, value=test
1 row(s)
Took 0.0303 seconds
③向已经创建好的表添加和删除指定的列族或列;
# 增加列族
hbase(main):005:0> alter 'test1', 'addColumnFamily'
Updating all regions with the new schema...
1/1 regions updated.
Done.
Took 1.9763
# 增加一列数据
hbase(main):006:0> put 'test1', '001', 'addColumnFamily:newCol', 2
Took 0.0072
# 删除指定列族指定列数据
hbase(main):007:0> delete 'test1', '001', 'addColumnFamily:newCol'
Took 0.0032 seconds
# 删除列族
hbase(main):008:0> alter 'test1', 'delete' => 'addColumnFamily'
Updating all regions with the new schema...
1/1 regions updated.
Done.
Took 1.9997
④清空指定的表的所有记录数据;
hbase(main):001:0> truncate 'test1'
Truncating 'test1' table (it may take a while):
Disabling table...
Truncating table...
Took 2.7971
hbase(main):002:0> scan 'test1'
ROW COLUMN+CELL
0 row(s)
Took 0.0382 seconds
⑤统计表的行数。
hbase(main):003:0> count 'test1'
0 row(s)
Took 0.0273 seconds
=> 0
以上内容java实现代码实现
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.client.coprocessor.AggregationClient;
import org.apache.hadoop.hbase.client.coprocessor.LongColumnInterpreter;
import org.apache.hadoop.hbase.util.Bytes;
import java.nio.charset.StandardCharsets;
import java.util.Scanner;
public class hbase1 {
public static Configuration conf;
public static Admin admin;
public static Connection connection;
public static void getConnect() throws IOException {
conf = HBaseConfiguration.create();
conf.set("Hbase.rootdir", "hdfs://localhost:9000/hbase");
try {
connection = ConnectionFactory.createConnection(conf);
admin = connection.getAdmin();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void closeConnect() throws IOException {
try {
if (admin != null)
admin.close();
connection.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static boolean isTableExist(String tableName) throws IOException {
return admin.tableExists(TableName.valueOf(tableName));
}
public static String getString() {
Scanner sc = new Scanner(System.in);
String str = sc.nextLine();
sc.close();
return str;
}
public static Table stringGetTable(String tableName) throws IOException {
if (!isTableExist(tableName)) {
System.out.println("此表不存在!");
return null;
}
Table table = connection.getTable(TableName.valueOf(tableName));
return table;
}
// ①列出HBase所有的表的相关信息,例如表名;
public void listAllTablesDetail() throws IOException {
// System.out.println(admin.listTableDescriptors().toString());
// 等同于下面操作
for (TableDescriptor it : admin.listTableDescriptors())
System.out.println(it.toString());
}
// ②在终端打印出指定的表的所有记录数据;
public static void viewOneTable() throws IOException {
System.out.println("输入需要查看的表名: ");
String tableName = getString();
Table table = stringGetTable(tableName);
if (table == null) return;
Scan scan = new Scan();
scan.getAllowPartialResults();
for (Result result : table.getScanner(scan))
for (Cell cell : result.rawCells()) {
System.out.print("行键:" + Bytes.toString(CellUtil.cloneRow(cell)));
System.out.print("\t\t列族" + Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.print("\t\t列:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println("\t\t值:" + Bytes.toString(CellUtil.cloneValue(cell)));
System.out.println("\t\t时间戳:" + cell.getTimestamp());
}
table.close();
}
// ③向已经创建好的表添加和删除指定的列族或列;
public static void menu() throws IOException {
System.out.println("输入需要操作的表名: ");
String tableName = getString();
Table table = stringGetTable(tableName);
if (table == null) return;
System.out.println("1. 指定表中创建列族");
System.out.println("2. 指定表和列族插入一列数据");
System.out.println("3. 指定表中删除列族");
System.out.println("4. 指定表中删除列族中一列");
String op = getString();
switch (op) {
case "1": {
System.out.println("输入列族名");
String columnFamily = getString();
addColmunFamily(table, columnFamily);
break;
}
case "2": {
System.out.println("输入行键名 列族名 列名 以及要插入的值(以换行分隔)");
String rowKey = getString(), columnFamily = getString(), column = getString(), value = getString();
addDataToTable(table, rowKey, columnFamily, column, value);
break;
}
case "3": {
System.out.println("输入列族名");
String columnFamily = getString();
deleteColmunFamily(table, columnFamily);
break;
}
case "4": {
System.out.println("输入行键名 列族名 列名(以换行分隔)");
String rowKey = getString(), columnFamily = getString(), column = getString();
deleteColmunData(table, rowKey, columnFamily, column);
break;
}
}
table.close();
}
public static void addColmunFamily(Table table, String columnFamily) throws IOException {
ColumnFamilyDescriptor columnFamilyDescriptor = ColumnFamilyDescriptorBuilder.of(columnFamily);
admin.addColumnFamily(table.getName(), columnFamilyDescriptor);
}
public static void addDataToTable(Table table, String rowKey, String columnFamily, String column, String value) throws IOException {
Put put = new Put(rowKey.getBytes());
put.addColumn(columnFamily.getBytes(), column.getBytes(), value.getBytes());
table.put(put);
}
public static void deleteColmunFamily(Table table, String columnFamily) throws IOException {
admin.deleteColumnFamily(table.getName(), columnFamily.getBytes(StandardCharsets.UTF_8));
}
public static void deleteColmunData(Table table, String rowKey, String columnFamily, String column) throws IOException {
Delete delete = new Delete(Bytes.toBytes(rowKey));
delete.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column));
table.delete(delete);
table.close();
}
public static void deleteRowFromTable(Table table, String rowKey, String columnFamily) throws IOException {
Delete delete = new Delete(Bytes.toBytes(rowKey));
delete.addFamily(Bytes.toBytes(columnFamily));
table.delete(delete);
table.close();
}
// ④清空指定的表的所有记录数据;
public static void emptyTable() throws IOException {
System.out.println("输入需要清空的表名: ");
String tableName = getString();
Table table = stringGetTable(tableName);
if (table == null) return;
Scan scan = new Scan();
scan.getAllowPartialResults();
for (Result result : table.getScanner(scan))
for (Cell cell : result.rawCells()) table.delete(new Delete(CellUtil.cloneRow(cell)));
}
// ⑤统计表的行数。
public static int rowLength(Table table) throws Throwable {
return (int) new AggregationClient(conf).rowCount(table.getName(), new LongColumnInterpreter(), new Scan());
}
}
(2)现有以下关系型数据库中的表和数据,要求将其转换为适合于HBase存储的表并插入数据。
学生表(Student)
学号(S_No) | 姓名(S_Name) | 性别(S_Sex) | 年龄(S_Age) |
2015001 | Zhangsan | male | 23 |
2015002 | Mary | female | 22 |
2015003 | Lisi | male | 24 |
课程表(Course)
课程号(C_No) | 课程名(C_Name) | 学分(C_Credit) |
123001 | Math | 2.0 |
123002 | Computer Science | 5.0 |
123003 | English | 3.0 |
选课表(SC)
学号(SC_Sno) | 课程号(SC_Cno) | 学分(C_Credit) |
2015001 | 123001 | 86 |
2015001 | 123003 | 69 |
2015002 | 123002 | 77 |
2015002 | 123003 | 99 |
2015002 | 123002 | 77 |
2015003 | 123001 | 98 |
2015003 | 123002 | 95 |
同时,请编程实现以下功能:
①createTable(String tableName, String[] fields)
创建表,参数tableName为表的名称,字符串数组fields为存储记录各个字段名称的数组。要求当HBase已经存在名为tableName的表的时候,先删除原有的表,然后再创建新的表。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class test2 {
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
public static void init(){
configuration = HBaseConfiguration.create();
configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
try{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}catch (IOException e){
e.printStackTrace();
}
}
public static void close(){
try {
if(admin != null) {
admin.close();
}
}catch (IOException e){
e.printStackTrace();
}
}
public static void createTable(String myTableName, String[] fields) throws IOException{
TableName tableName= TableName.valueOf(myTableName);
if(admin.tableExists(tableName)) {
System.out.println("table exist");
admin.disableTable(tableName);
admin.deleteTable(tableName);
}
TableDescriptorBuilder tableDescriptor = TableDescriptorBuilder.newBuilder(tableName);
for(String str:fields){
ColumnFamilyDescriptor family = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(str)).build();
tableDescriptor.setColumnFamily(family);
}
admin.createTable(tableDescriptor.build());
System.out.println("table created");
}
public static void main(String[] args)throws IOException{
init();
createTable("table2",new String[] {"score"});
close();
}
}
②addRecord(String tableName, String row, String[] fields, String[] values)
向表tableName、行row(用S_Name表示)和字符串数组fields指定的单元格中添加对应的数据values。其中,fields中每个元素如果对应的列族下还有相应的列限定符的话,用“columnFamily:column”表示。例如,同时向“Math”、“Computer Science”、“English”三列添加成绩时,字符串数组fields为{“Score:Math”, ”Score:Computer Science”, ”Score:English”},数组values存储这三门课的成绩。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import java.io.IOException;
public class test2 {
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
public static void init(){
configuration = HBaseConfiguration.create();
configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
try{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}catch (IOException e){
e.printStackTrace();
}
}
public static void close(){
try {
if(admin != null) {
admin.close();
}
}catch (IOException e){
e.printStackTrace();
}
}
public static void addRecord(String tableName, String row, String[] fields, String[] values) throws IOException{
Table table = connection.getTable(TableName.valueOf(tableName));
int num=0;
for(String column:fields){
Put put = new Put(row.getBytes());
String[] cols = column.split(":");
put.addColumn(cols[0].getBytes(),cols[1].getBytes(),values[num++].getBytes());
table.put(put);
}
}
public static void main(String[] args)throws IOException{
init();
addRecord("Student","zhansan",new String[]{"Score:Math"},new String[]{"98"});
close();
}
}
③scanColumn(String tableName, String column)
浏览表tableName某一列的数据,如果某一行记录中该列数据不存在,则返回null。要求当参数column为某一列族名称时,如果底下有若干个列限定符,则要列出每个列限定符代表的列的数据;当参数column为某一列具体名称(例如“Score:Math”)时,只需要列出该列的数据。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class test2 {
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
public static void init(){
configuration = HBaseConfiguration.create();
configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
try{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}catch (IOException e){
e.printStackTrace();
}
}
public static void close(){
try {
if(admin != null) {
admin.close();
}
}catch (IOException e){
e.printStackTrace();
}
}
public static void scanColumn(String tableName, String column) throws IOException{
Table table = connection.getTable(TableName.valueOf(tableName));
Scan scan = new Scan();
scan.getAllowPartialResults();
ResultScanner resultScanner = table.getScanner(scan);
for (Result result = resultScanner.next(); result != null; result = resultScanner.next()) {
Cell[] cells = result.rawCells();
for (Cell cell : cells) {
System.out.print("行键:" + Bytes.toString(CellUtil.cloneRow(cell)));
System.out.print("\t列族" + Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.print("\t列:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println("\t值:" + Bytes.toString(CellUtil.cloneValue(cell)));
System.out.println("\t时间戳:" + cell.getTimestamp());
}
}
table.close();
}
public static void main(String[] args)throws IOException{
init();
scanColumn("Student","S_Name");
close();
}
}
④modifyData(String tableName, String row, String column)
修改表tableName,行row(可以用学生姓名S_Name表示),列column指定的单元格的数据。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
public class test2 {
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
public static long ts;
public static void init(){
configuration = HBaseConfiguration.create();
configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
try{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}catch (IOException e){
e.printStackTrace();
}
}
public static void close(){
try {
if(admin != null) {
admin.close();
}
}catch (IOException e){
e.printStackTrace();
}
}
public static void modifyData(String tableName, String row,String columnFamily,String column)throws IOException{
Scanner scanner = new Scanner(System.in);
String value = scanner.next();
Table table = connection.getTable(TableName.valueOf(tableName));
Put put = new Put(row.getBytes());
Scan scan = new Scan();
ResultScanner resultScanner = table.getScanner(scan);
for (Result result : resultScanner) {
for (Cell cell : result.getColumnCells(row.getBytes(), column.getBytes())) {
ts = cell.getTimestamp();
}
}
put.addColumn(columnFamily.getBytes(),column.getBytes(),ts,value.getBytes());
table.put(put);
}
public static void main(String[] args)throws IOException{
init();
modifyData("Student","row_1","S_Name");
close();
}
}
⑤deleteRow(String tableName, String row)
删除表tableName中row指定的行的记录。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class test2 {
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
public static void init(){
configuration = HBaseConfiguration.create();
configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
try{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}catch (IOException e){
e.printStackTrace();
}
}
public static void close(){
try {
if(admin != null) {
admin.close();
}
}catch (IOException e){
e.printStackTrace();
}
}
public static void deleteRow(String tableName,String row) throws IOException{
Table table = connection.getTable(TableName.valueOf(tableName));
Delete delete = new Delete(Bytes.toBytes(row));
table.delete(delete);
table.close();
}
public static void main(String[] args)throws IOException{
init();
deleteRow("student", "score");
close();
}
}