创建SLAB高速缓存kmem_cache_create() 作者:李万鹏 于北京 borqs


这篇主要讲解专用高速缓存的创建函数kmem_cache_create()的流程。kmem_cache_create()主要是建立的cache描述符,填充了其成员,设置cpu local slab,设置外置slab描述符还是内置slab描述符,并没有分配slab块和对象。最终将cache描述符添加到cache_chain链表上。

首先写了一个简单的测试程序:


#include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/gfp.h> struct slab_test{ int val; }; static int n; struct kmem_cache *test_cachep = NULL; struct slab_test *object1 = NULL, *object2 = NULL; void slab_ctor(void *cachep){ printk("slab_ctor is called! object %d has been inited!\n", n); n++; } static int __init slab_test_init(void){ printk("slab test module init\n"); n = 0; test_cachep = kmem_cache_create("slab_test_cachep", sizeof(struct slab_test), 0, SLAB_HWCACHE_ALIGN, slab_ctor); if(!test_cachep) return -ENOMEM; object1 = kmem_cache_alloc(test_cachep, GFP_KERNEL); if(!object1) return -ENOMEM; else printk("object one has been created!\n"); object2 = kmem_cache_alloc(test_cachep, GFP_KERNEL); if(!object2) return -ENOMEM; else printk("object two has been created!\n"); return 0; } static void __exit slab_test_exit(void){ printk("slab test module exit\n"); kmem_cache_free(test_cachep, object1); kmem_cache_free(test_cachep, object2); if(test_cachep) kmem_cache_destroy(test_cachep); } module_init(slab_test_init); module_exit(slab_test_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("embeddedlwp@"); MODULE_DESCRIPTION("slab test module"); 
dmesg一下看log:
[ 103.617426] slab test module init
[ 103.617427] slab_ctor is called! object 0 has been inited!
[ 103.617429] slab_ctor is called! object 1 has been inited!
[ 103.617430] slab_ctor is called! object 2 has been inited!
 ............
 ............
[ 103.617761] slab_ctor is called! object 506 has been inited!
[ 103.617762] slab_ctor is called! object 507 has been inited!
[ 103.617763] object one has been created!
[ 103.617766] object two has been created!


当我没有调用kmem_cache_alloc(),只调用kmem_cache_create()的时候是没有调用对象的构造函数的,可以对照源码,调用kmem_cache_create()并没有分配slab,是在创建对象的时候发现没有空闲对象,调用cache_grow()分配一个slab,然后再分配对象。所以这里调用构造函数初始化的object都是刚分配的slab中的。cat/proc/slabinfo看一下,发现最上边出现我们新创建的cache,slab_test_cachep,其中active_objs为2,num_objs为508,正好与我们的实际信息相符合。


kmem_cache_t *kmem_cache_create (const char *name, size_t size, size_t align,
 unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
 void (*dtor)(void*, kmem_cache_t *, unsigned long))
{
 size_t left_over, slab_size, ralign;
 kmem_cache_t *cachep = NULL;
 /*高速缓存的名字不能为空,不能在中断上下文(这个函数应该会导致睡眠),大小不能太大,也不能太小,如果有析构函数就必须有构造函数*/ if ((!name) ||
 in_interrupt() ||
 (size < BYTES_PER_WORD) ||
 (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
 (dtor && !ctor)) {
 printk(KERN_ERR "%s: Early error in slab %s\n",
 __FUNCTION__, name);
 BUG();
 }

 /*缺省的情况下,创建的cache大小要字对齐的*/ if (size & (BYTES_PER_WORD-1)) {
 size += (BYTES_PER_WORD-1);
 size &= ~(BYTES_PER_WORD-1);
 }

 /* *如果想让新创建的slab高速缓存所包含的对象在第一级硬件高速缓存中对齐,则设置SLAB_HWCACHE_ALIGN标志
 *这里的处理方式是,如果对象的大小大于cache line大小的一半,就在RAM中根据L1_CACHE_BYTES的倍数对齐对象。
 *否则,对象的大小就是L1_CACHE_BYTES的因子取整。这可以保证一个小对象不会横跨两个高速缓存行。
对齐的时候需要填充空白,即通过人为地增加对象的大小来获得较好的高速缓存性能,是一个以时间换空间的过程。
 */
 if (flags & SLAB_HWCACHE_ALIGN) {
 ralign = cache_line_size();
 while (size <= ralign/2)
 ralign /= 2;
 } else {
 ralign = BYTES_PER_WORD;
 }

 align = ralign; /*在上一篇已经提到cache_cache包含其他高速缓存使用的高速缓存描述符,它也是一个cache,专门用来分配cache描述符的*/
 /* Get cache's description obj. */
 cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
 if (!cachep)
goto opps; /*将这个cache清0*/
 memset(cachep, 0, sizeof(kmem_cache_t));

 /*如果对象的大小大于1/8的页大小,则使用外部slab描述符*/
 /* Determine if the slab management is 'on' or 'off' slab. */
 if (size >= (PAGE_SIZE>>3))
 /*
  * Size is large, assume best to place the slab management obj
  * off-slab (should allow better packing of objs).
  */
 flags |= CFLGS_OFF_SLAB;


 size = ALIGN(size, align);

 /*在内碎片和cache所占页数之前取一个平衡值*/
 if ((flags & SLAB_RECLAIM_ACCOUNT) && size <= PAGE_SIZE) {
 /*
  * A VFS-reclaimable slab tends to have most allocations
  * as GFP_NOFS and we really don't want to have to be allocating
  * higher-order pages when we are unable to shrink dcache.
  */
 cachep->gfporder = 0;
 cache_estimate(cachep->gfporder, size, align, flags,
 &left_over, &cachep->num);
 } else {
 /*
  * Calculate size (in pages) of slabs, and the num of objs per
  * slab. This could be made much more intelligent. For now,
  * try to avoid using high page-orders for slabs. When the
  * gfp() funcs are more friendly towards high-order requests,
  * this should be changed.
  */
do {
 unsigned int break_flag = 0;
cal_wastage: /*计算内碎片的大小*/
 cache_estimate(cachep->gfporder, size, align, flags,
 &left_over, &cachep->num);
 if (break_flag)
 break;
 if (cachep->gfporder >= MAX_GFP_ORDER)
 break;
 if (!cachep->num)
 goto next;
 if (flags & CFLGS_OFF_SLAB &&
 cachep->num > offslab_limit) {
 /* This num of objs will cause problems. */
 cachep->gfporder--;
 break_flag++;
 goto cal_wastage;
 }


 /*
  * Large num of objs is good, but v. large slabs are
  * currently bad for the gfp()s.
  */
 if (cachep->gfporder >= slab_break_gfp_order)
 break;
 /*平衡的值*/

 if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
 break; /* Acceptable internal fragmentation. */
next:
 cachep->gfporder++;
 } while (1);
 }


 if (!cachep->num) {
 printk("kmem_cache_create: couldn't create cache %s.\n", name);
 kmem_cache_free(&cache_cache, cachep);
 cachep = NULL;
 goto opps;
}
 /*按照align对齐*/
 slab_size = ALIGN(cachep->num*sizeof(kmem_bufctl_t)
 + sizeof(struct slab), align);


 /*如果内碎片的大小比slab管理对象(即slab描述符的大小+所有对象描述符的大小)大,则改成内部slab描述符*/
 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
 flags &= ~CFLGS_OFF_SLAB;
 left_over -= slab_size;
 }


 /*如果是外置slab描述符,就不用对齐了*/
 if (flags & CFLGS_OFF_SLAB) {
 /* really off slab. No need for manual alignment */
 slab_size = cachep->num*sizeof(kmem_bufctl_t)+sizeof(struct slab);
 }

 /*着色粒度的单位为CPU一级硬件高速缓存中cache line的大小*/
 cachep->colour_off = cache_line_size();
 /* Offset must be a multiple of the alignment. */
 if (cachep->colour_off < align)
 cachep->colour_off = align;
 /*计算着色粒度的最大值*/
 cachep->colour = left_over/cachep->colour_off;
 /*获得单个slab的大小*/
 cachep->slab_size = slab_size;
 cachep->flags = flags;
 cachep->gfpflags = 0;
 if (flags & SLAB_CACHE_DMA)
 cachep->gfpflags |= GFP_DMA;
 spin_lock_init(&cachep->spinlock);
 cachep->objsize = size;
 /* NUMA */
 /*初始化slabs_full,slabs_partial,slabs_free链表头*/
 INIT_LIST_HEAD(&cachep->lists.slabs_full);
 INIT_LIST_HEAD(&cachep->lists.slabs_partial);
 INIT_LIST_HEAD(&cachep->lists.slabs_free);

 /*
 *如果是外置的slab描述符,则从malloc_sizes指向的26个高速还从中选择合适的,
 *这里并没有分配外置的slab管理对象,而是在调用cache_grow()分配新的slab块的时候才分配的
 */ 
 if (flags & CFLGS_OFF_SLAB)
 cachep->slabp_cache = kmem_find_general_cachep(slab_size,0);
 /*设置cache的名字和构造,析构函数*/
 cachep->ctor = ctor;
 cachep->dtor = dtor;
 cachep->name = name;

 /* Don't let CPUs to come and go */
 lock_cpu_hotplug();

 /*设置CPU local slab*/
 if (g_cpucache_up == FULL) {
 enable_cpucache(cachep);
 } else {
 if (g_cpucache_up == NONE) {
 /* Note: the first kmem_cache_create must create
  * the cache that's used by kmalloc(24), otherwise
  * the creation of further caches will BUG().
  */
 cachep->array[smp_processor_id()] = &initarray_generic.cache;
 g_cpucache_up = PARTIAL;
 } else {
 cachep->array[smp_processor_id()] = kmalloc(sizeof(struct arraycache_init),GFP_KERNEL);
 }
 BUG_ON(!ac_data(cachep));
 ac_data(cachep)->avail = 0;
 ac_data(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
 ac_data(cachep)->batchcount = 1;
 ac_data(cachep)->touched = 0;
 cachep->batchcount = 1;
 cachep->limit = BOOT_CPUCACHE_ENTRIES;
 cachep->free_limit = (1+num_online_cpus())*cachep->batchcount
 + cachep->num;
 }
 cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 +
 ((unsigned long)cachep)%REAPTIMEOUT_LIST3;


 /*获得使链表免受被同时访问的cache_chain_sem信号量*/ 
 down(&cache_chain_sem);
 {
 struct list_head *p;
 mm_segment_t old_fs;


 old_fs = get_fs();
 set_fs(KERNEL_DS);
 list_for_each(p, &cache_chain) {
 kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
 char tmp;
 /* This happens when the module gets unloaded and doesn't
  destroy its slab cache and noone else reuses the vmalloc
  area of the module. Print a warning. */
 if (__get_user(tmp,pc->name)) {
 printk("SLAB: cache with size %d has lost its name\n",
 pc->objsize);
 continue;
 }  
 if (!strcmp(pc->name,name)) {
 printk("kmem_cache_create: duplicate cache %s\n",name);
 up(&cache_chain_sem);
 unlock_cpu_hotplug();
 BUG();
 } 
 }
 set_fs(old_fs);
 }

 /*将新建立的cache插入cache_chain链表中*/ list_add(&cachep->next, &cache_chain);
 /*释放这个信号量*/
 up(&cache_chain_sem);
 unlock_cpu_hotplug();
opps:
 if (!cachep && (flags & SLAB_PANIC))
 panic("kmem_cache_create(): failed to create slab `%s'\n",
 name);
 return cachep;
}
EXPORT_SYMBOL(kmem_cache_create);