redis源码分析——set

  • set是什么
  • 数据结构
  • intset
  • 数据结构
  • 插入元素
  • 删除元素
  • 查找元素
  • dict
  • 数据结构
  • 插入元素
  • 删除元素
  • 查找元素
  • rehash


set是什么

set是string类型的无序集合,不能出现重复的数据。

数据结构

typedef struct redisObject {
    unsigned type:4;
    unsigned encoding:4;
    unsigned lru:LRU_BITS; /* LRU time (relative to global lru_clock) or
                            * LFU data (least significant 8 bits frequency
                            * and most significant 16 bits access time). */
    int refcount;
    void *ptr;
} robj;

redis中set的类型是robj,其中type为OBJ_SET,encoding为OBJ_ENCODING_INTSET或OBJ_ENCODING_HT,refcount为该set的引用计数,ptr指向实际存储数据的对象(intset或dict)。

set采用了两种方式存储数据,分别是intset和dict。当set中元素全为数字且元素个数不超过512时使用intset,其他情况下使用dict。

intset

intset内部使用一个数组来存储数据,元素从小到大排列。

数据结构

#define INTSET_ENC_INT16 (sizeof(int16_t))
#define INTSET_ENC_INT32 (sizeof(int32_t))
#define INTSET_ENC_INT64 (sizeof(int64_t)

typedef struct intset {
    uint32_t encoding;
    uint32_t length;
    int8_t contents[];
} intset

其中encoding分别是上面三个宏定义之一,表示contents数组中存的是16位,32位,64位的数字。intset创建时encoding默认为INTSET_ENC_INT16,当后续需要添加大于16位的数字时,intset会将encoding改为INT32或INT64,然后调用realloc扩大contents数组。

插入元素

/* Insert an integer in the intset */
intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
	//首先计算待插入的值的encoding(16位,32位,64位)
    uint8_t valenc = _intsetValueEncoding(value);
    uint32_t pos;
    if (success) *success = 1;

    /* Upgrade encoding if necessary. If we need to upgrade, we know that
     * this value should be either appended (if > 0) or prepended (if < 0),
     * because it lies outside the range of existing values. */
    //如果新元素encoding超过当前set,则将set升级到新的encoding,这里涉及到内存扩容及数据拷贝
    if (valenc > intrev32ifbe(is->encoding)) {
        /* This always succeeds, so we don't need to curry *success. */
        return intsetUpgradeAndAdd(is,value);
    } else {
        /* Abort if the value is already present in the set.
         * This call will populate "pos" with the right position to insert
         * the value when it cannot be found. */
        //使用二分法查找待插入元素在数组中的位置,如果set中已有该元素则直接返回
        if (intsetSearch(is,value,&pos)) {
            if (success) *success = 0;
            return is;
        }
		//扩大数组并将数据往后挪为待插入的元素腾出位置
        is = intsetResize(is,intrev32ifbe(is->length)+1);
        if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1);
    }
	//插入元素
    _intsetSet(is,pos,value);
    is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
    return is;
}

插入元素涉及到二分法查找插入位置及数据拷贝,所以时间复杂度为O(n)。

删除元素

/* Delete integer from intset */
intset *intsetRemove(intset *is, int64_t value, int *success) {
	//首先计算待插入的值的encoding(16位,32位,64位)
    uint8_t valenc = _intsetValueEncoding(value);
    uint32_t pos;
    if (success) *success = 0;

	//如果待删元素的encoding比set的encoding大,则肯定不在set中,否则通过二分法找出它的位置
    if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) {
        uint32_t len = intrev32ifbe(is->length);

        /* We know we can delete */
        if (success) *success = 1;

        /* Overwrite value with tail and update length */
        //拷贝数据,将待删元素后面的数据往前挪,并减小数组的大小
        if (pos < (len-1)) intsetMoveTail(is,pos+1,pos);
        is = intsetResize(is,len-1);
        is->length = intrev32ifbe(len-1);
    }
    return is;
}

删除元素也涉及到二分法查找及数据拷贝,所以时间复杂度也是O(n)。

查找元素

uint8_t intsetFind(intset *is, int64_t value) {
    uint8_t valenc = _intsetValueEncoding(value);
    return valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,NULL);
}

查找 元素很简单,只有一个二分法查找,所以时间复杂度为O(logn)。

dict

当set中有非数字或元素个数超过512时,set会将intset转为dict形式

数据结构

typedef struct dict {
    dictType *type;
    void *privdata;
    dictht ht[2];
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    unsigned long iterators; /* number of iterators currently running */
} dict;

typedef struct dictht {
    dictEntry **table;
    unsigned long size;
    unsigned long sizemask;
    unsigned long used;
} dictht;

typedef struct dictType {
    uint64_t (*hashFunction)(const void *key);
    void *(*keyDup)(void *privdata, const void *key);
    void *(*valDup)(void *privdata, const void *obj);
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    void (*keyDestructor)(void *privdata, void *key);
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

typedef struct dictEntry {
    void *key;
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;
} dictEntry;

dict中包括两个dictht,正常情况一只有ht[0]中有数据,当元素个数达到一定比例时,会进行rehash,此时会将ht[0]中的数据rehash到ht[1]中,当哈希完成后交换ht[0]和ht[1],此时又只有ht[0]有数据,ht[1]为空。

dictht是一个哈希表,其中table是一个dictEntry*的数组,数组中每个元素形成一个链表,size表示数组的长度,used表示哈希表中的元素总数,sizemask是数组长度的掩码。

dict中的rehashidx用于表示rehash的进度,当rehashidx==-1时表示没有进行rehash,当rehashidx>=0时,表示要对table中下标为rehashidx的链表进行rehash。

插入元素

/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
    dictEntry *entry = dictAddRaw(d,key,NULL);

    if (!entry) return DICT_ERR;
    dictSetVal(d, entry, val);
    return DICT_OK;
}

dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
    long index;
    dictEntry *entry;
    dictht *ht;
	//如果正在进行rehash,则对下一个bucket进行rehash,暂时还不知道这一步的目的是什么
    if (dictIsRehashing(d)) _dictRehashStep(d);

    /* Get the index of the new element, or -1 if
     * the element already exists. */
    //根据哈希算法算出key对应的哈希值在table中的下标
    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
        return NULL;

    /* Allocate the memory and store the new entry.
     * Insert the element in top, with the assumption that in a database
     * system it is more likely that recently added entries are accessed
     * more frequently. */
    //若当前正在进行rehash,则直接插入到新表中
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry));
    //将新元素插入到链表的头部
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;

    /* Set the hash entry fields. */
    dictSetKey(d, entry, key);
    return entry;
}

插入元素的时间复杂度为O(1)。

删除元素

int dictDelete(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
}

static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
    uint64_t h, idx;
    dictEntry *he, *prevHe;
    int table;
	//如果两个哈希表中都没有元素则直接返回
    if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;

    if (dictIsRehashing(d)) _dictRehashStep(d);
    h = dictHashKey(d, key);
	//在两个哈希表中找到哈希值对应的bucket,并遍历bucket中的每一个元素进行比较
    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        prevHe = NULL;
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                /* Unlink the element from the list */
                if (prevHe)
                    prevHe->next = he->next;
                else
                    d->ht[table].table[idx] = he->next;
                if (!nofree) {
                    dictFreeKey(d, he);
                    dictFreeVal(d, he);
                    zfree(he);
                }
                d->ht[table].used--;
                return he;
            }
            prevHe = he;
            he = he->next;
        }
        //如果当前没有在进行rehash,则table[1]肯定为空,可以直接跳过
        if (!dictIsRehashing(d)) break;
    }
    return NULL; /* not found */
}

查找元素

dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    uint64_t h, idx, table;

    if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */
    if (dictIsRehashing(d)) _dictRehashStep(d);
    h = dictHashKey(d, key);
    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key))
                return he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) return NULL;
    }
    return NULL;
}

查找的过程与删除类似,时间复杂度都是O(1)。

rehash

在往dict中插入元素时,会进行是否需要rehash的判断,

static int _dictExpandIfNeeded(dict *d)
{
	//若d->rehashidx!=-1,说明正在进行expand中,直接返回
    /* Incremental rehashing already in progress. Return. */
    if (dictIsRehashing(d)) return DICT_OK;

	//dict初始化的时候size=0
    /* If the hash table is empty expand it to the initial size. */
    if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);

    /* If we reached the 1:1 ratio, and we are allowed to resize the hash
     * table (global setting) or we should avoid it but the ratio between
     * elements/buckets is over the "safe" threshold, we resize doubling
     * the number of buckets. */
    if (d->ht[0].used >= d->ht[0].size &&
        (dict_can_resize ||
         d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
    {
        return dictExpand(d, d->ht[0].used*2);
    }
    return DICT_OK;
}

nt dictExpand(dict *d, unsigned long size)
{
    dictht n; /* the new hash table */
    //realsize为2的n次方,且大于等于size
    unsigned long realsize = _dictNextPower(size);

    /* the size is invalid if it is smaller than the number of
     * elements already inside the hash table */
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;

    /* Rehashing to the same table size is not useful. */
    if (realsize == d->ht[0].size) return DICT_ERR;

    /* Allocate the new hash table and initialize all pointers to NULL */
    n.size = realsize;
    n.sizemask = realsize-1;
    n.table = zcalloc(realsize*sizeof(dictEntry*));
    n.used = 0;

    /* Is this the first initialization? If so it's not really a rehashing
     * we just set the first hash table so that it can accept keys. */
    if (d->ht[0].table == NULL) {
        d->ht[0] = n;
        return DICT_OK;
    }

    /* Prepare a second hash table for incremental rehashing */
    d->ht[1] = n;
    d->rehashidx = 0;
    return DICT_OK;
}

其中DICT_HT_INITIAL_SIZE为4,dict_force_resize_ratio为5。

dictExpand函数将d->rehashidx设为0,即开始rehash,但并没有实际对table中的数据进行rehash,真正的rehash要等到插入、删除、查找元素时进行,每次操作都会进行单步的rehash。