再次吐槽公司的sb环境,不让上网不能插优盘,今天有事回家写一下笔记HBase region split
在管理集群时,最容易导致hbase节点发生故障的恐怕就是hbase region split和compact的了,日志有split时间太长;文件找不到;split的时候response too slow等等,所以先看看hbase region split源码,希望对以后能有帮助
HBase region split源码分析
1.HBaseAdmin 发起 hbase split
2.HRegionServer 确定分割点 region split point
3.CompactSplitThread和SplitRequest 进行region分割
3.1SplitTransaction st.prepare()初始化两个子region
3.2splitTransaction execute执行分割
3.2.1两个子region DaughterOpener线程 start
3.2.2若region 需要compact,进行compact路程
二 、hbase region split UML图
1.HBaseAdmin 发起 hbase split
public void split(final byte [] tableNameOrRegionName,
final byte [] splitPoint) throws IOException, InterruptedException {
CatalogTracker ct = getCatalogTracker();
try {
Pair<HRegionInfo, ServerName> regionServerPair
= getRegion(tableNameOrRegionName, ct);//获得HRI,若是但region
if (regionServerPair != null) {
if (regionServerPair.getSecond() == null) {
throw new NoServerForRegionException(Bytes.toStringBinary(tableNameOrRegionName));
} else {
//split region 重点分析方法
split(regionServerPair.getSecond(), regionServerPair.getFirst(), splitPoint);
} else {
//table split流程
final String tableName = tableNameString(tableNameOrRegionName, ct);
List<Pair<HRegionInfo, ServerName>> pairs =
for (Pair<HRegionInfo, ServerName> pair: pairs) {
// May not be a server for a particular row
if (pair.getSecond() == null) continue;
HRegionInfo r = pair.getFirst();
// check for parents
if (r.isSplitParent()) continue;
// if a split point given, only split that particular region
if (splitPoint != null && !r.containsRow(splitPoint)) continue;
// call out to region server to do split now
split(pair.getSecond(), pair.getFirst(), splitPoint);
} finally {
2.HRegionServer 确定分割点 region split point
public void splitRegion(HRegionInfo regionInfo, byte[] splitPoint)
throws NotServingRegionException, IOException {
HRegion region = getRegion(regionInfo.getRegionName());//根据HRI获取region
region.flushcache();//flush cache 有几种情况不进行flush
//the cache is empte | the region is closed.| a flush is already in progress | writes are disabled
region.forceSplit(splitPoint);//设置split point
compactSplitThread.requestSplit(region, region.checkSplit());//获取split point,进行split
protected byte[] getSplitPoint() {
byte[] explicitSplitPoint = this.region.getExplicitSplitPoint();
if (explicitSplitPoint != null) {
return explicitSplitPoint;
Map<byte[], Store> stores = region.getStores();
byte[] splitPointFromLargestStore = null;
long largestStoreSize = 0;
for (Store s : stores.values()) {
byte[] splitPoint = s.getSplitPoint();
long storeSize = s.getSize();
if (splitPoint != null && largestStoreSize < storeSize) {//获得最大store
splitPointFromLargestStore = splitPoint;
largestStoreSize = storeSize;
return splitPointFromLargestStore;
3.CompactSplitThread和SplitRequest 进行region分割
public void run() {
if (this.server.isStopping() || this.server.isStopped()) {
LOG.debug("Skipping split because server is stopping=" +
this.server.isStopping() + " or stopped=" + this.server.isStopped());
try {
final long startTime = System.currentTimeMillis();
SplitTransaction st = new SplitTransaction(parent, midKey);
// If prepare does not return true, for some reason -- logged inside in
// the prepare call -- we are not ready to split just now. Just return.
// 3.1SplitTransaction st.prepare()初始化两个子region
if (!st.prepare()) return;
try {
st.execute(this.server, this.server);//3.2splitTransaction execute执行分割
} catch (Exception e) {
3.2splitTransaction execute执行分割
public PairOfSameType<HRegion> execute(final Server server,
final RegionServerServices services)
throws IOException {
PairOfSameType<HRegion> regions = createDaughters(server, services);
//创建split临时目录,改变region zk状态,关闭region,停止所有store服务
//创建daughter目录,将region storefile放入目录中
//创建子region A、B,在zk上注册,并且设置原HRI下线
openDaughters(server, services, regions.getFirst(), regions.getSecond());
transitionZKNode(server, services, regions.getFirst(), regions.getSecond());
return regions;
3.2.0 createDaughters函数的操作
1. public PairOfSameType<HRegion> stepsBeforePONR(final
2. final RegionServerServices services, boolean testing) throws
3. // Set ephemeral SPLITTING znode up in zk. Mocked servers sometimes don't
4. // have zookeeper so don't do zk stuff if server or zookeeper is null
5. if (server != null && server.getZooKeeper() != null) {
6. try
7. //步骤1@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
8. createNodeSplitting(server.getZooKeeper(),
9. parent.getRegionInfo(), server.getServerName(), hri_a, hri_b);
10. catch
11. throw new IOException("Failed creating PENDING_SPLIT znode on "
12. this.parent.getRegionNameAsString(), e);
13. }
14. }
15. this.journal.add(JournalEntry.SET_SPLITTING_IN_ZK);
16. if (server != null && server.getZooKeeper() != null) {
17. // After creating the split node, wait for master to transition it
18. // from PENDING_SPLIT to SPLITTING so that we can move on. We want master
19. // knows about it and won't transition any region which is splitting.
20. //步骤2@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
21. znodeVersion = getZKNode(server, services);
22. }
24. //步骤3@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
25. this.parent.getRegionFileSystem().createSplitsDir();
26. this.journal.add(JournalEntry.CREATE_SPLIT_DIR);
28. byte[], List<StoreFile>> hstoreFilesToSplit = null;
29. null;
30. try{
31. //步骤4@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
32. this.parent.close(false);
33. catch
34. exceptionToThrow = e;
35. }
36. if (exceptionToThrow == null && hstoreFilesToSplit == null) {
37. // The region was closed by a concurrent thread. We can't continue
38. // with the split, instead we must just abandon the split. If we
39. // reopen or split this could cause problems because the region has
40. // probably already been moved to a different server, or is in the
41. // process of moving to a different server.
42. exceptionToThrow = closedByOtherException;
43. }
44. if
45. this.journal.add(JournalEntry.CLOSED_PARENT_REGION);
46. }
47. if (exceptionToThrow != null) {
48. if (exceptionToThrow instanceof IOException) throw
49. throw new
50. }
51. if
52. //步骤5@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
53. this.parent, null);
54. }
55. this.journal.add(JournalEntry.OFFLINED_PARENT);
57. // TODO: If splitStoreFiles were multithreaded would we complete steps in
58. // less elapsed time? St.Ack 20100920
59. //
60. // splitStoreFiles creates daughter region dirs under the parent splits dir
61. // Nothing to unroll here if failure -- clean up of CREATE_SPLIT_DIR will
62. // clean this up.
63. //步骤6@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
64. splitStoreFiles(hstoreFilesToSplit);
66. // Log to the journal that we are creating region A, the first daughter
67. // region. We could fail halfway through. If we do, we could have left
68. // stuff in fs that needs cleanup -- a storefile or two. Thats why we
69. // add entry to journal BEFORE rather than AFTER the change.
70. //步骤7@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
71. this.journal.add(JournalEntry.STARTED_REGION_A_CREATION);
72. this.parent.createDaughterRegionFromSplits(this.hri_a);
74. // Ditto
75. this.journal.add(JournalEntry.STARTED_REGION_B_CREATION);
76. this.parent.createDaughterRegionFromSplits(this.hri_b);
77. return new
78. }
1.RegionSplitPolicy.getSplitPoint()获得region split的split point ,最大store的中间点midpoint最为split point
1.createDaughters 创建两个region,获得parent region的写锁
1在zk上创建一个临时的node splitting point,
(split row在hfile中的不管,其他的都进行引用,把引用文件分别写到region下边)
2.stepsAfterPONR 调用DaughterOpener类run打开两个子region,调用initilize
将子Region添加到rs的online region列表上,并添加到meta表上
3.2.1两个子region DaughterOpener线程 start
final RegionServerServices services, HRegion a, HRegion b)
throws IOException {
boolean stopped = server != null && server.isStopped();
boolean stopping = services != null && services.isStopping();
// TODO: Is this check needed here?
if (stopped || stopping) {
LOG.info("Not opening daughters " +
b.getRegionInfo().getRegionNameAsString() +
" and " +
a.getRegionInfo().getRegionNameAsString() +
" because stopping=" + stopping + ", stopped=" + stopped);
} else {
// Open daughters in parallel.创建两个字region打开操作类
DaughterOpener aOpener = new DaughterOpener(server, a);
DaughterOpener bOpener = new DaughterOpener(server, b);
try {
} catch (InterruptedException e) {
throw new IOException("Interrupted " + e.getMessage());
if (aOpener.getException() != null) {
throw new IOException("Failed " +
aOpener.getName(), aOpener.getException());
if (bOpener.getException() != null) {
throw new IOException("Failed " +
bOpener.getName(), bOpener.getException());
if (services != null) {
try {
// add 2nd daughter first (see HBASE-4335)
services.postOpenDeployTasks(b, server.getCatalogTracker(), true);
// Should add it to OnlineRegions
services.postOpenDeployTasks(a, server.getCatalogTracker(), true);
} catch (KeeperException ke) {
throw new IOException(ke);
调用HRegion 打开方法openHRegion
protected HRegion openHRegion(final CancelableProgressable reporter)
throws IOException {
long seqid = initialize(reporter);
//2.cleanupTempDir 清空老region临时目录
//3.初始化HRegion store,加载hfile
if (this.log != null) {
return this;
3.2.2若region 需要compact,进行compact过程
2.交给coprocessor做处理,选择compact storefile
3.3 isMajorCompaction判断是否需要major compact
3.3.1当ttl大于storefile中最大文件compact time,则不需要
3.3.2 以上反之,需要
3.3.3 最后一次major compaction时间大于majorCompactionTime,需要
3.4 当compact文件大于compact文件最大数,且需要major compaction活强制major compaction,则进行major compaction
3.5或则进行minor compact,他两个的区别在于一个compact文件数是所有并且删除就tts和version的数据,一个compact文件数不大于maxcompactfile配置
public CompactionRequest requestCompaction(int priority) throws IOException {
// don't even select for compaction if writes are disabled
if (!this.region.areWritesEnabled()) {
return null;
CompactionRequest ret = null;
try {
synchronized (filesCompacting) {
// candidates = all storefiles not already in compaction queue
List<StoreFile> candidates = Lists.newArrayList(storefiles);
if (!filesCompacting.isEmpty()) {
// exclude all files older than the newest file we're currently
// compacting. this allows us to preserve contiguity (HBASE-2856)
StoreFile last = filesCompacting.get(filesCompacting.size() - 1);
int idx = candidates.indexOf(last);
Preconditions.checkArgument(idx != -1);
candidates.subList(0, idx + 1).clear();
boolean override = false;
if (region.getCoprocessorHost() != null) {
override = region.getCoprocessorHost().preCompactSelection(
this, candidates);
CompactSelection filesToCompact;
if (override) {
// coprocessor is overriding normal file selection
filesToCompact = new CompactSelection(conf, candidates);
} else {
filesToCompact = compactSelection(candidates, priority);
if (region.getCoprocessorHost() != null) {
// no files to compact
if (filesToCompact.getFilesToCompact().isEmpty()) {
return null;
// basic sanity check: do not try to compact the same StoreFile twice.
if (!Collections.disjoint(filesCompacting, filesToCompact.getFilesToCompact())) {
// TODO: change this from an IAE to LOG.error after sufficient testing
Preconditions.checkArgument(false, "%s overlaps with %s",
filesToCompact, filesCompacting);
Collections.sort(filesCompacting, StoreFile.Comparators.FLUSH_TIME);
// major compaction iff all StoreFiles are included
boolean isMajor = (filesToCompact.getFilesToCompact().size() == this.storefiles.size());
if (isMajor) {
// since we're enqueuing a major, update the compaction wait interval
this.forceMajor = false;
// everything went better than expected. create a compaction request
int pri = getCompactPriority(priority);
ret = new CompactionRequest(region, this, filesToCompact, isMajor, pri);
} finally {
if (ret != null) {
return ret;
public CompactionRequest selectCompaction(Collection<StoreFile> candidateFiles,
final List<StoreFile> filesCompacting, final boolean isUserCompaction,
final boolean mayUseOffPeak, final boolean forceMajor) throws IOException {
// Preliminary compaction subject to filters
ArrayList<StoreFile> candidateSelection = new ArrayList<StoreFile>(candidateFiles);
// Stuck and not compacting enough (estimate). It is not guaranteed that we will be
// able to compact more if stuck and compacting, because ratio policy excludes some
// non-compacting files from consideration during compaction (see getCurrentEligibleFiles).
int futureFiles = filesCompacting.isEmpty() ? 0 : 1;
boolean mayBeStuck = (candidateFiles.size() - filesCompacting.size() + futureFiles)
>= storeConfigInfo.getBlockingFileCount();
candidateSelection = getCurrentEligibleFiles(candidateSelection, filesCompacting);
LOG.debug("Selecting compaction from " + candidateFiles.size() + " store files, " +
filesCompacting.size() + " compacting, " + candidateSelection.size() +
" eligible, " + storeConfigInfo.getBlockingFileCount() + " blocking");
long cfTtl = this.storeConfigInfo.getStoreFileTtl();
if (!forceMajor) {
// If there are expired files, only select them so that compaction deletes them
if (comConf.shouldDeleteExpired() && (cfTtl != Long.MAX_VALUE)) {
ArrayList<StoreFile> expiredSelection = selectExpiredStoreFiles(
candidateSelection, EnvironmentEdgeManager.currentTimeMillis() - cfTtl);
if (expiredSelection != null) {
return new CompactionRequest(expiredSelection);
candidateSelection = skipLargeFiles(candidateSelection);
// Force a major compaction if this is a user-requested major compaction,
// or if we do not have too many files to compact and this was requested
// as a major compaction.
// Or, if there are any references among the candidates.
boolean majorCompaction = (
(forceMajor && isUserCompaction)
|| ((forceMajor || isMajorCompaction(candidateSelection))
&& (candidateSelection.size() < comConf.getMaxFilesToCompact()))
|| StoreUtils.hasReferences(candidateSelection)
if (!majorCompaction) {
// we're doing a minor compaction, let's see what files are applicable
candidateSelection = filterBulk(candidateSelection);
candidateSelection = applyCompactionPolicy(candidateSelection, mayUseOffPeak, mayBeStuck);
candidateSelection = checkMinFilesCriteria(candidateSelection);
candidateSelection = removeExcessFiles(candidateSelection, isUserCompaction, majorCompaction);
CompactionRequest result = new CompactionRequest(candidateSelection);
result.setOffPeak(!candidateSelection.isEmpty() && !majorCompaction && mayUseOffPeak);
return result;
// If daughter of a split, update whole row, not just location.更新meta表 loaction和rowkey
MetaEditor.addDaughter(ct, r.getRegionInfo(),
public void addToOnlineRegions(HRegion region) {
this.onlineRegions.put(region.getRegionInfo().getEncodedName(), region);
/* package */void transitionZKNode(final Server server,
final RegionServerServices services, HRegion a, HRegion b)
throws IOException {
// Tell master about split by updating zk. If we fail, abort.
if (server != null && server.getZooKeeper() != null) {
try {
this.znodeVersion = transitionNodeSplit(server.getZooKeeper(),
parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
server.getServerName(), this.znodeVersion);
int spins = 0;
// Now wait for the master to process the split. We know it's done
// when the znode is deleted. The reason we keep tickling the znode is
// that it's possible for the master to miss an event.
do {
if (spins % 10 == 0) {
LOG.debug("Still waiting on the master to process the split for " +
// When this returns -1 it means the znode doesn't exist
this.znodeVersion = tickleNodeSplit(server.getZooKeeper(),
parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
server.getServerName(), this.znodeVersion);
} while (this.znodeVersion != -1 && !server.isStopped()
&& !services.isStopping());