Hash算法
1. Memcached Hash介绍
我们在前面的文章中已经介绍过了Memcached的内存管理方式,LRU的策略。由于Memcached的数据存储方式基本上是基于双向链表来实现的,而链表实现的最大好处在于可以快速的进行增删改,但其最大的不足在于其数据的获取只能通过遍历链表的方式来进行。而Memcached使用了Hash算法来进行数据的快速读取。
2. Hash算法
Memcached的Hash算法原理上非常简单。我们用下面的图来说明。
这个数据结构跟我们熟知的HashMap是一致的,数据hash到不同的桶中,当Hash发生冲突的时候,采用了链表来记录相同Hash值的数据。使用Hash算法最重要的一点是如何解决Hash冲突,Memcached采用的链表来解决Hash冲突是较为基本的方式。这种方式的缺陷是当数据量增多,Hash冲突增多时,会发生链表过长的情况。Memcached在这种情况下,会采用扩大桶数量的方式来优化。Memcached的Hash算法本身并不复杂,这里也不再花大篇幅来介绍其Hash算法。
3. 源代码分析
首先我们来看看Memcached的Hash算法:
unsigned int hashpower = HASHPOWER_DEFAULT;/* 这里的hash算法采用的还是按位与的方式来定位Bucket,1<<(n)表示hash桶的数量 */#define hashsize(n) ((ub4)1<<(n))/* 这里是Hash的掩码,数据的hash值与掩码取与操作可以定位到唯一的Hash桶 */#define hashmask(n) (hashsize(n)-1)
下面我们来看看Memcached的增删查找操作:
/* hash列表中Item元素的查找 */item *assoc_find(const char *key, const size_t nkey, const uint32_t hv) { item *it; unsigned int oldbucket; /* 这一步是找到hash的桶号 */ if (expanding && /* 在Hash列表进行rehash的时候,是按照桶号顺序进行的,所以如果该桶号>=目前正在处理的桶号时,意味着该数据还是旧Hash表中*/ (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket) { it = old_hashtable[oldbucket]; } else { it = primary_hashtable[hv & hashmask(hashpower)]; } item *ret = NULL; int depth = 0; /* 这一步是Hash冲突列表的遍历查找 */ while (it) { /* Item值匹配的标准: 1. key的长度相等 2. key值相等 */ if ((nkey == it->nkey) && (memcmp(key, ITEM_key(it), nkey) == 0)) { ret = it; break; } it = it->h_next; ++depth; } MEMCACHED_ASSOC_FIND(key, nkey, depth); return ret; }/* 该方法是插入操作,该Key值必须是不存在才行 */int assoc_insert(item *it, const uint32_t hv) { unsigned int oldbucket; /* 这一步是找到该数据应存储的桶号 */ if (expanding && (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket) { it->h_next = old_hashtable[oldbucket]; old_hashtable[oldbucket] = it; } else { it->h_next = primary_hashtable[hv & hashmask(hashpower)]; primary_hashtable[hv & hashmask(hashpower)] = it; } pthread_mutex_lock(&hash_items_counter_lock); hash_items++; /* 进行rehash的条件判断,满足rehash的条件如下: 1. 目前不是正处在rehash中 2. hash表中的所有数据量>hash表容量的1.5倍 */ if (! expanding && hash_items > (hashsize(hashpower) * 3) / 2) { assoc_start_expand(); } pthread_mutex_unlock(&hash_items_counter_lock); MEMCACHED_ASSOC_INSERT(ITEM_key(it), it->nkey, hash_items); return 1; }/* hash表中元素的删除 */void assoc_delete(const char *key, const size_t nkey, const uint32_t hv) { /* 指针的指针,要删除元素的地址指针*/ item **before = _hashitem_before(key, nkey, hv); if (*before) { item *nxt; pthread_mutex_lock(&hash_items_counter_lock); hash_items--; pthread_mutex_unlock(&hash_items_counter_lock); /* The DTrace probe cannot be triggered as the last instruction * due to possible tail-optimization by the compiler */ MEMCACHED_ASSOC_DELETE(key, nkey, hash_items); nxt = (*before)->h_next; (*before)->h_next = 0; /* probably pointless, but whatever. */ *before = nxt; return; } /* Note: we never actually get here. the callers don't delete things they can't find. */ assert(*before != 0); }static item** _hashitem_before (const char *key, const size_t nkey, const uint32_t hv) { item **pos; unsigned int oldbucket; if (expanding && (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket) { pos = &old_hashtable[oldbucket]; } else { pos = &primary_hashtable[hv & hashmask(hashpower)]; } while (*pos && ((nkey != (*pos)->nkey) || memcmp(key, ITEM_key(*pos), nkey))) { pos = &(*pos)->h_next; } return pos; }
在看过了Memcached Hash表中数据的增删查,下面来看看Hash表的扩容实现:
/* 该方法只是Hash扩容的初始化方法 */static void assoc_expand(void) { old_hashtable = primary_hashtable; /* 从这里可以看出,Hash扩容的方式是重新申请两倍大小的Hash表*/ primary_hashtable = calloc(hashsize(hashpower + 1), sizeof(void *)); if (primary_hashtable) { if (settings.verbose > 1) fprintf(stderr, "Hash table expansion starting\n"); hashpower++; expanding = true; expand_bucket = 0; STATS_LOCK(); stats.hash_power_level = hashpower; stats.hash_bytes += hashsize(hashpower) * sizeof(void *); stats.hash_is_expanding = 1; STATS_UNLOCK(); } else { primary_hashtable = old_hashtable; /* Bad news, but we can keep running. */ } }static volatile int do_run_maintenance_thread = 1;#define DEFAULT_HASH_BULK_MOVE 1int hash_bulk_move = DEFAULT_HASH_BULK_MOVE;/* ReHash的线程任务 */static void *assoc_maintenance_thread(void *arg) { mutex_lock(&maintenance_lock); while (do_run_maintenance_thread) { int ii = 0; /* There is only one expansion thread, so no need to global lock. */ /* 这里的hash_bulk_move标记一次rehash的桶的最小个数*/ for (ii = 0; ii < hash_bulk_move && expanding; ++ii) { item *it, *next; int bucket; void *item_lock = NULL; /* bucket = hv & hashmask(hashpower) =>the bucket of hash table * is the lowest N bits of the hv, and the bucket of item_locks is * also the lowest M bits of hv, and N is greater than M. * So we can process expanding with only one item_lock. cool! */ /*这里对整个桶进行加锁*/ if ((item_lock = item_trylock(expand_bucket))) { for (it = old_hashtable[expand_bucket]; NULL != it; it = next) { next = it->h_next; bucket = hash(ITEM_key(it), it->nkey) & hashmask(hashpower); it->h_next = primary_hashtable[bucket]; primary_hashtable[bucket] = it; } /* 已经处理掉的桶置为NULL */ old_hashtable[expand_bucket] = NULL; expand_bucket++; /* rehash完成的标记 */ if (expand_bucket == hashsize(hashpower - 1)) { expanding = false; free(old_hashtable); STATS_LOCK(); stats.hash_bytes -= hashsize(hashpower - 1) * sizeof(void *); stats.hash_is_expanding = 0; STATS_UNLOCK(); if (settings.verbose > 1) fprintf(stderr, "Hash table expansion done\n"); } } else { usleep(10*1000); } if (item_lock) { item_trylock_unlock(item_lock); item_lock = NULL; } } if (!expanding) { /* We are done expanding.. just wait for next invocation */ started_expanding = false; pthread_cond_wait(&maintenance_cond, &maintenance_lock); /* assoc_expand() swaps out the hash table entirely, so we need * all threads to not hold any references related to the hash * table while this happens. * This is instead of a more complex, possibly slower algorithm to * allow dynamic hash table expansion without causing significant * wait times. */ pause_threads(PAUSE_ALL_THREADS); assoc_expand(); pause_threads(RESUME_ALL_THREADS); } } return NULL; }