引言:过滤器的类型很多,但是可以分为两大类
- 比较过滤器
- 专用过滤器
过滤器的作用是在服务端判断数据是否满足条件,然后只将满足条件的数据返回给客户端;
hbase过滤器的比较运算符:
LESS <
LESS_OR_EQUAL <=
EQUAL =
NOT_EQUAL <>
GREATER_OR_EQUAL >=
GREATER >
NO_OP 排除所有
Hbase过滤器的比较器(指定比较机制):
BinaryComparator 按字节索引顺序比较指定字节数组,采用Bytes.compareTo(byte[])
BinaryPrefixComparator 跟前面相同,只是比较左端的数据是否相同
NullComparator 判断给定的是否为空
BitComparator 按位比较
RegexStringComparator 提供一个正则的比较器,仅支持 EQUAL 和非EQUAL
SubstringComparator 判断提供的子串是否出现在value中。
1、比较过滤器
1.1、行键过滤器RowFilter
Filter filter1 = new RowFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("row-22")));
scan.setFilter(filter1);
1.2、列族过滤器FamilyFilter
Filter filter1 = new FamilyFilter(CompareFilter.CompareOp.LESS, new BinaryComparator(Bytes.toBytes("colfam3")));
scan.setFilter(filter1);
1.3、列过滤器QualifierFilter
filter = new QualifierFilter(CompareFilter.CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("col-2")));
scan.setFilter(filter1);
1.4、值过滤器ValueFilter
Filter filter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator(".4") );
scan.setFilter(filter1);
2、专用过滤器
2.1、单列值过滤器SingleColumnValueFilter
返回满足条件的整行
SingleColumnValueFilter filter = new SingleColumnValueFilter(
Bytes.toBytes("colfam1"),
Bytes.toBytes("col-5"),
CompareFilter.CompareOp.NOT_EQUAL,
new SubstringComparator("val-5"));
filter.setFilterIfMissing(true); //如果不设置为true,则那些不包含指定column的行也会返回
scan.setFilter(filter1);
2.2、SingleColumnValueExcludeFilter与SingleColumnValueFilter相反
2.3、前缀过滤器PrefixFilter
针对行键
Filter filter = new PrefixFilter(Bytes.toBytes("row1"));
scan.setFilter(filter1);
2.4、列前缀过滤器ColumnPrefixFilter
Filter filter = new ColumnPrefixFilter(Bytes.toBytes("qual2"));
scan.setFilter(filter1);
2.5、分页过滤器PageFilter
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "spark01:2181,spark02:2181,spark03:2181");
String tableName = "testfilter";
String cfName = "f1";
final byte[] POSTFIX = new byte[] { 0x00 };
HTable table = new HTable(conf, tableName);
Filter filter = new PageFilter(3);
byte[] lastRow = null;
int totalRows = 0;
while (true) {
Scan scan = new Scan();
scan.setFilter(filter);
if(lastRow != null){
//注意这里添加了POSTFIX操作,用来重置扫描边界
byte[] startRow = Bytes.add(lastRow,POSTFIX);
scan.setStartRow(startRow);
}
ResultScanner scanner = table.getScanner(scan);
int localRows = 0;
Result result;
while((result = scanner.next()) != null){
System.out.println(localRows++ + ":" + result);
totalRows ++;
lastRow = result.getRow();
}
scanner.close();
if(localRows == 0) break;
}
System.out.println("total rows:" + totalRows);
}
3、多种过滤条件使用
/**
* 多种过滤条件的使用方法
* @throws Exception
*/
@Test
public void testScan() throws Exception{
HTable table = new HTable(conf, "person_info".getBytes());
Scan scan = new Scan(Bytes.toBytes("person_rk_bj_zhang_000001"), Bytes.toBytes("person_rk_bj_zhang_000002"));
//前缀过滤器----针对行键
Filter filter = new PrefixFilter(Bytes.toBytes("rk"));
//行过滤器 ---针对行键
ByteArrayComparable rowComparator = new BinaryComparator(Bytes.toBytes("person_rk_bj_zhang_000001"));
RowFilter rf = new RowFilter(CompareOp.LESS_OR_EQUAL, rowComparator);
/**
* 假设rowkey格式为:创建日期_发布日期_ID_TITLE
* 目标:查找 发布日期 为 2014-12-21 的数据
*/
rf = new RowFilter(CompareOp.EQUAL , new SubstringComparator("_2014-12-21_"));
//单值过滤器1完整匹配字节数组
new SingleColumnValueFilter("base_info".getBytes(), "name".getBytes(), CompareOp.EQUAL, "zhangsan".getBytes());
//单值过滤器2 匹配正则表达式
ByteArrayComparable comparator = new RegexStringComparator("zhang.");
new SingleColumnValueFilter("info".getBytes(), "NAME".getBytes(), CompareOp.EQUAL, comparator);
//单值过滤器3匹配是否包含子串,大小写不敏感
comparator = new SubstringComparator("wu");
new SingleColumnValueFilter("info".getBytes(), "NAME".getBytes(), CompareOp.EQUAL, comparator);
//键值对元数据过滤-----family过滤----字节数组完整匹配
FamilyFilter ff = new FamilyFilter(
CompareOp.EQUAL ,
new BinaryComparator(Bytes.toBytes("base_info")) //表中不存在inf列族,过滤结果为空
);
//键值对元数据过滤-----family过滤----字节数组前缀匹配
ff = new FamilyFilter(
CompareOp.EQUAL ,
new BinaryPrefixComparator(Bytes.toBytes("inf")) //表中存在以inf打头的列族info,过滤结果为该列族所有行
);
//键值对元数据过滤-----qualifier过滤----字节数组完整匹配
filter = new QualifierFilter(
CompareOp.EQUAL ,
new BinaryComparator(Bytes.toBytes("na")) //表中不存在na列,过滤结果为空
);
filter = new QualifierFilter(
CompareOp.EQUAL ,
new BinaryPrefixComparator(Bytes.toBytes("na")) //表中存在以na打头的列name,过滤结果为所有行的该列数据
);
//基于列名(即Qualifier)前缀过滤数据的ColumnPrefixFilter
filter = new ColumnPrefixFilter("na".getBytes());
//基于列名(即Qualifier)多个前缀过滤数据的MultipleColumnPrefixFilter
byte[][] prefixes = new byte[][] {Bytes.toBytes("na"), Bytes.toBytes("me")};
filter = new MultipleColumnPrefixFilter(prefixes);
//为查询设置过滤条件
scan.setFilter(filter);
scan.addFamily(Bytes.toBytes("base_info"));
//一行
// Result result = table.get(get);
//多行的数据
ResultScanner scanner = table.getScanner(scan);
for(Result r : scanner){
/**
for(KeyValue kv : r.list()){
String family = new String(kv.getFamily());
System.out.println(family);
String qualifier = new String(kv.getQualifier());
System.out.println(qualifier);
System.out.println(new String(kv.getValue()));
}
*/
//直接从result中取到某个特定的value
byte[] value = r.getValue(Bytes.toBytes("base_info"), Bytes.toBytes("name"));
System.out.println(new String(value));
}
table.close();
}