一、编程API访问hbase,实现全表扫描以及空间级、表级数据的增删改查 ----------------------------------------------------------------------
/**
* 测试删除数据
* @throws Exception
*/
@Test
public void tsDelData() throws Exception {
//删除row0001行的name 和 id
Delete d = new Delete(Bytes.toBytes("row0001"));
d.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("name"));
d.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("id"));
tb.delete(d);
System.out.println("delete over");
}
/**
* 测试全表扫描
*/
@Test
public void tsScanTable() throws Exception {
Scan scan = new Scan();
scan.setStartRow(Bytes.toBytes("row5000"));
scan.setStopRow(Bytes.toBytes("row8000"));
ResultScanner scanner = tb.getScanner(scan);
Result result = null;
while((result = scanner.next()) != null)
{
System.out.print(Bytes.toString(result.getValue(Bytes.toBytes("f1"), Bytes.toBytes("name"))));
}
}
@Test
public void tsScanTable2() throws Exception
{
Scan scan = new Scan();
scan.setStartRow(Bytes.toBytes("row5000"));
scan.setStopRow(Bytes.toBytes("row5010"));
ResultScanner scanner = tb.getScanner(scan);
Result result = null;
while((result = scanner.next()) != null)
{
Map<byte[],byte[]> map = result.getFamilyMap(Bytes.toBytes("f1"));
Set<byte[]> set = map.keySet();
for( byte [] bs: set )
{
System.out.println(Bytes.toString(bs));
System.out.println(Bytes.toString(map.get(bs)));
}
}
}
@Test
public void tsScanTable3() throws Exception
{
Scan scan = new Scan();
scan.setStartRow(Bytes.toBytes("row5000"));
scan.setStopRow(Bytes.toBytes("row5010"));
ResultScanner scanner = tb.getScanner(scan);
Result result = null;
while((result = scanner.next()) != null)
{
//得到一行的所有map==> [ key=>f1 , value=> map< column , map< timestamp, finalvalue > > ]
NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> map = result.getMap();
Set<byte[]> set1 = map.keySet();
for(byte[] bs1 : set1)
{
//key => f1
String f1 = Bytes.toString(bs1);
//System.out.println("列族名:" +f1);
//value
NavigableMap<byte[], NavigableMap<Long, byte[]>> map2 = map.get(bs1);
Set<byte[]> set2 = map2.keySet();
for(byte[] bs2 : set2)
{
String column = Bytes.toString(bs2);
System.out.println(f1 + "_" + column);
NavigableMap<Long, byte[]> map3 = map2.get(bs2);
Set<Long> set3 = map3.keySet();
for(Long l3 : set3)
{
System.out.println("time_stamp:" + l3);
byte[] value = map3.get(l3);
System.out.println(Bytes.toString(value));
}
}
}
}
}
/**
* API操作名称空间
* @throws Exception
*/
@Test
public void tsCtrateTable() throws Exception {
Admin admin = conn.getAdmin();
NamespaceDescriptor des = NamespaceDescriptor.create("ns2").build();
//create
admin.createNamespace(des);
NamespaceDescriptor[] nds = admin.listNamespaceDescriptors();
for(NamespaceDescriptor nd: nds)
{
System.out.println(nd.getName());
}
//delete
//admin.deleteNamespace("ns2");
//list
NamespaceDescriptor[] nds1 = admin.listNamespaceDescriptors();
for(NamespaceDescriptor nd: nds1)
{
System.out.println(nd.getName());
}
}
/**
* API操作表
*/
@Test
public void tsTable() throws Exception {
Admin admin = conn.getAdmin();
TableName name = TableName.valueOf("ns2:t2");
HTableDescriptor hb = new HTableDescriptor(name);
hb.addFamily(new HColumnDescriptor("f1"));
//create
admin.createTable(hb);
//select
HTableDescriptor hds = admin.getTableDescriptor(name);
System.out.println(hds.getNameAsString());
//delete
admin.disableTable(name);
admin.deleteTable(name);
}
二、手动进行表和区域切割
-------------------------------------------------------------------
1.设定区域切割的限值(默认是10G),当region里存储文件增长到此值时,就会被切分为二
<property>
<name>hbase.hregion.max.filesize</name>
<value>10737418240</value>
<source>hbase-default.xml</source>
</property>
2.折半表切割
$hbase> split 'ns1:t1'
3.自定义区域切割(按照行id=‘row8888’,把区域id=‘1536914987895.eb6152402e9550c3d7594b377cf64127.’的区域切成2个region)
$hbase> split 'ns1:t1,row5185,1536914987895.eb6152402e9550c3d7594b377cf64127.', 'row8888'
三、切割风暴和预切割
--------------------------------------------------------------
1.切割风暴
-- 区域切割默认的限界值hbase.hregion.max.filesize=10G,也就是说,当region里存储文件增长到此值时,就会被切分为二
-- 但是当很多个区域同时到达10G时,很多个区域一起进行切割,这就造成了切割风暴,会严重影响hbase集群的性能
2.避免切割风暴的手段
a.手动切割
-- 设定限定值为100G,然后再区域未达到100G的时候(集群会反馈为很慢,很卡的时候),手动进行切割,然后手动move
b.预切割
-- 创建表的时候,预先对表进行切割[表创建完成的时候就已经切割好了,put数据的时候,会根据rowkey,存放到相应的分区上]
-- $hbase> create 'ns1:t1', 'f1', SPLITS => ['row3000','row6000']
-- 注意:SPLITS 必须是大写
四、手动进行区域移动和合并
----------------------------------------------------------------------
//手动区域移动:move + Region`s EncordID + dest regionserver`s ip,port,timestamp
$hbase> move '1ee30271f70c5780bfe24d9641730d80' , 's300,16020,1536905384175'
//手动区域合并:merge_region + A Region`s EncordID + B Region`s EncordID
$hbase> merge_region '540cb0301b61c3b1de7e471a7145198a','98876d34c7862971e2514548a6921c11'
五、hbase应用hadoop的HA的相关配置(以便于自动容灾,解决hbase master有时候会启动不起来,找不到hdfs的问题)
--------------------------------------------------------------------
1.添加hadoop的配置文件目录到HBASE_CLASSPATH的环境变量中,并分发到所有的节点
[/soft/hbase/conf/hbase-env.sh]
export HBASE_CLASSPATH=$HBASE_CLASSPATH:/soft/hadoop/etc/hadoop
2.在hbase/conf目录下,创建到hadoop的hdfs-site.xml的符号链接,每个节点都要弄
$>ln -s /soft/hadoop/etc/hadoop/hdfs-site.xml /soft/hbase/conf/hdfs-site.xml
3.修改hbase-site.xml文件中hbase.rootdir的目录值。并分发到所有节点
[/soft/hbase/conf/hbase-site.xml]
<property>
<name>hbase.rootdir</name>
<value>hdfs://mycluster/hbase</value>
</property>
六、版本、时间戳、TTL、KEEP_DELETED_CELLS
---------------------------------------------------------
1.创建表时指定版本号(基于列族的版本保留数,列族下的所有列都会应用此版本数进行保留)
$hbase> create 'ns1:t2', {NAME => 'f1', VERSIONS => 3},{NAME => 'f2'}
2.查询表,带指定版本
$hbase> get 'ns1:t2', 'row1', {COLUMN => 'f1', VERSIONS => 4} //同样必须大写,涉及到表结构内容的,必须大写。命令关键字和自己写的字母可以小写
3.查询表,带版本号和时间戳
$hbase> get 'ns1:t2', 'row1', {COLUMN => 'f1', TIMESTAMP => 1536982858288, VERSIONS => 4}
$hbase> get 'ns1:t2', 'row1', {COLUMN => 'f1', TIMERANGE => [1536982858280,1536982858289], VERSIONS => 4} //左闭右开区时间戳查询
4.删除表数据,带版本号和时间戳
$hbase> delete 'ns1:t1', 'row1', 'f1:name', 1536982861068
5.创建表时指定TTL[time to live]存活时间 7s -- 表中的所有数据,仅存活7秒
$hbase> create 'ns1:t4',{NAME=>'f1', TTL=> 7, VERSIONS=>3}
6.创建表时指定 KEEP_DELETED_CELLS[删除key之后,数据是否还在内存保留,但是TTL除外]
$hbase> create 'ns1:t4',{NAME=>'f1', TTL=> 7, VERSIONS=>3 , KEEP_DELETED_CELLS => true}
7.TTL和KEEP_DELETED_CELLS分析
1.创建表时,可以指定TTL和KEEP_DELETED_CELLS,TTL的强制性要大于KEEP_DELETED_CELLS
2.当指定了TTL,那么到TTL指定时间,全表数据都会被删除,从hbase上清除
3.当指定了KEEP_DELETED_CELLS = true[默认值], 删除[delete]的数据仍然在hbase中,即使flush,也没有删除。使用原生扫描,是可以查到的
4.当指定了KEEP_DELETED_CELLS = false,同时flush,数据就会被删除,hbase没有保留。原生扫描也查不到数据了
8.API操作代码分析
/**
* 指定版本数查询
* @throws Exception
*/
@Test
public void tsVersions() throws Exception {
TableName tbName = TableName.valueOf("ns1:t2");
Table table = conn.getTable(tbName);
Get get = new Get(Bytes.toBytes("row1"));
//检索所有版本
get.setMaxVersions();
Result rs = table.get(get);
List<Cell> cells = rs.getColumnCells(Bytes.toBytes("f1"), Bytes.toBytes("name"));
for(Cell c : cells )
{
String family = Bytes.toString(c.getFamily());
String column = Bytes.toString(c.getQualifier());
String value = Bytes.toString(c.getValue());
long timeStamp = c.getTimestamp();
System.out.println(family + "/" + column + "/" + value + "/" + timeStamp);
}
}
/**
* 指定时间戳查询
* @throws Exception
*/
@Test
public void tsTimeStamp() throws Exception
{
TableName tbName = TableName.valueOf("ns1:t2");
Table table = conn.getTable(tbName);
Get get = new Get(Bytes.toBytes("row1"));
//get.setTimeStamp();
get.setColumnFamilyTimeRange(Bytes.toBytes("f1"),1536982855705l,1536982861068l);
get.setMaxVersions();
Result rs = table.get(get);
List<Cell> cells = rs.getColumnCells(Bytes.toBytes("f1"), Bytes.toBytes("name"));
for(Cell c : cells )
{
String family = Bytes.toString(c.getFamily());
String column = Bytes.toString(c.getQualifier());
String value = Bytes.toString(c.getValue());
long timeStamp = c.getTimestamp();
System.out.println(family + "/" + column + "/" + value + "/" + timeStamp);
}
}
/**
* 指定是否保存删除的数据
* @throws Exception
*/
@Test
public void tsKeepDeleteCells() throws Exception
{
Admin admin = conn.getAdmin();
TableName name = TableName.valueOf("ns1:t5");
HTableDescriptor hb = new HTableDescriptor(name);
//创建列族并设置列族属性
HColumnDescriptor col = new HColumnDescriptor("f1");
col.setKeepDeletedCells(true);
col.setMaxVersions(10);
col.setTimeToLive(20);
//向表中添加列族
hb.addFamily(col);
//管理员创建表
admin.createTable(hb);
}