上篇分析了一下cache的创建过程,现在cache已经创建完成,跟踪一下slab对象的申请过程。
目前使用的申请方式主要是kmalloc(从general cache中申请)和kmem_cache_alloc(从专用cache中申请)。
先看一下kmalloc
static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
struct kmem_cache *cachep;
void *ret;
/* __builtin_constant_p Gcc的内置函数,用于判断一个值是否为常量,如果是常量则返回1 */
if (__builtin_constant_p(size)) {
int i = 0;
if (!size)
return ZERO_SIZE_PTR;
#define CACHE(x) \
if (size <= x) \
goto found; \
else \
i++;
#include <linux/kmalloc_sizes.h>
#undef CACHE
return NULL;
found:
#ifdef CONFIG_ZONE_DMA
if (flags & GFP_DMA)
cachep = malloc_sizes[i].cs_dmacachep;
else
#endif
cachep = malloc_sizes[i].cs_cachep;
ret = kmem_cache_alloc_notrace(cachep, flags);
trace_kmalloc(_THIS_IP_, ret,
size, slab_buffer_size(cachep), flags);
return ret;
}
/* 正常调用的分配函数 */
return __kmalloc(size, flags);
}
void *__kmalloc(size_t size, gfp_t flags)
{
return __do_kmalloc(size, flags, NULL);
}
/**
* __do_kmalloc - allocate memory
* @size: how many bytes of memory are required.
* @flags: the type of memory to allocate (see kmalloc).
* @caller: function caller for debug tracking of the caller
*/
static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
void *caller)
{
struct kmem_cache *cachep;
void *ret;
/* If you want to save a few bytes .text space: replace
* __ with kmem_.
* Then kmalloc uses the uninlined functions instead of the inline
* functions.
*/
/* 根据size大小,查找对应的general cache */
cachep = __find_general_cachep(size, flags);
/* 对于0size的kmalloc请求,直接返回cache的地址 */
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
/* 具体分配在这里进行 */
ret = __cache_alloc(cachep, flags, caller);
trace_kmalloc((unsigned long) caller, ret,
size, cachep->buffer_size, flags);
return ret;
}
而__cache_alloc函数中实际上调用的是__do_cache_alloc,对于非NUMA架构
static __always_inline void *
__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
return ____cache_alloc(cachep, flags);
}
static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
void *objp;
struct array_cache *ac;
check_irq_off();
/* cachep->array[smp_processor_id()],获取当前cpu对应的array_cache */
ac = cpu_cache_get(cachep);
/* 检查是否存在可用对象,avail指向当前可用的节点 */
if (likely(ac->avail)) {
/* 如果存在可用对象,更新local cache的命中次数 */
STATS_INC_ALLOCHIT(cachep);
/* 标示最近使用过local_cache */
ac->touched = 1;
/* 获取空闲对象,从后向前,当avail变为0时表示已无可用对象 */
objp = ac->entry[--ac->avail];
} else {
/* local cache中已无空闲对象,更新未命中次数 */
STATS_INC_ALLOCMISS(cachep);
/* local cache中无空闲对象,则从slab的几个链表中提取空闲对象放入local cache中 */
objp = cache_alloc_refill(cachep, flags);
}
/*
* To avoid a false negative, if an object that is in one of the
* per-CPU caches is leaked, we need to make sure kmemleak doesn't
* treat the array pointers as a reference to the object.
*/
/* 对于分配出去的对象,将对应的指针置为NULL */
kmemleak_erase(&ac->entry[ac->avail]);
return objp;
}
static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
{
int batchcount;
struct kmem_list3 *l3;
struct array_cache *ac;
int node;
retry:
check_irq_off();
/* 获取当前的NUMA节点 */
node = numa_node_id();
/* 获取local cache */
ac = cpu_cache_get(cachep);
/* 批量填充的数目 */
batchcount = ac->batchcount;
/* 如果最近未使用过该local cache,则一次填充的上限为BATCHREFILL_LIMIT个 */
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
/*
* If there was little recent activity on this cache, then
* perform only a partial refill. Otherwise we could generate
* refill bouncing.
*/
batchcount = BATCHREFILL_LIMIT;
}
/* 获取本内存节点的kmem_list3的几个slab链表 */
l3 = cachep->nodelists[node];
BUG_ON(ac->avail > 0 || !l3);
spin_lock(&l3->list_lock);
/* See if we can refill from the shared array */
/* shared local cache 用于多核中,所有cpu共享,首先从shared中批量获取slab对象到local */
if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
goto alloc_done;
/* 如果shared为空,或者已无空闲对象,则从slab链表中分配 */
while (batchcount > 0) {
struct list_head *entry;
struct slab *slabp;
/* Get slab alloc is to come from. */
/* 先从部分未满的slab链表中分配 */
entry = l3->slabs_partial.next;
/* 判断是否为空 */
if (entry == &l3->slabs_partial) {
/* 标示刚访问了空链表 */
l3->free_touched = 1;
entry = l3->slabs_free.next;
/* 如果空链表为空,则必须新增slab */
if (entry == &l3->slabs_free)
goto must_grow;
}
/* 从链表上获取到了一个slab */
slabp = list_entry(entry, struct slab, list);
check_slabp(cachep, slabp);
check_spinlock_acquired(cachep);
/*
* The slab was either on partial or free list so
* there must be at least one object available for
* allocation.
*/
BUG_ON(slabp->inuse >= cachep->num);
/* 当前slab的对象活跃数必须小于每个slab的最大对象数 */
while (slabp->inuse < cachep->num && batchcount--) {
STATS_INC_ALLOCED(cachep);
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
/* 从slab中提取空闲对象,将虚拟地址插入到local cache中 */
ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
node);
}
check_slabp(cachep, slabp);
/* move slabp to correct slabp list: */
/* 从原链表中删除slab */
list_del(&slabp->list);
if (slabp->free == BUFCTL_END)
/* 此slab中已经没有空闲对象,移动到full链表中 */
list_add(&slabp->list, &l3->slabs_full);
else
/* 此slab中还有空闲对象,移动到partial链表中 */
list_add(&slabp->list, &l3->slabs_partial);
}
must_grow:
/* 从slab链表中添加了avail个空闲对象到local cache中,空闲的对象数量需要更新一下 */
l3->free_objects -= ac->avail;
alloc_done:
spin_unlock(&l3->list_lock);
/* slab链表中也无空闲对象,创建新的slab */
if (unlikely(!ac->avail)) {
int x;
/* 创建空slab */
x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
/* cache_grow can reenable interrupts, then ac could change. */
/* 看注释,由于cache_grow开启了中断,local cache指针可能发生裱花,ac需要重新获取 */
ac = cpu_cache_get(cachep);
/* 新的slab创建失败 */
if (!x && ac->avail == 0) /* no objects in sight? abort */
return NULL;
/* 新增slab成功,重新填充local cache */
if (!ac->avail) /* objects refilled by interrupt? */
goto retry;
}
/* 设置近期访问的标志 */
ac->touched = 1;
/* 返回空闲对象的地址 */
return ac->entry[--ac->avail];
}
分析一下几个函数
/*
* Transfer objects in one arraycache to another.
* Locking must be handled by the caller.
*
* Return the number of entries transferred.
*/
/* 从shared local cache中移动对象到local cache中,shared local cache 被同一NUMA节点的CPU所共享 */
static int transfer_objects(struct array_cache *to,
struct array_cache *from, unsigned int max)
{
/* Figure out how many entries to transfer */
int nr = min(min(from->avail, max), to->limit - to->avail);
if (!nr)
return 0;
/* 拷贝并更新相关成员 */
memcpy(to->entry + to->avail, from->entry + from->avail -nr,
sizeof(void *) *nr);
from->avail -= nr;
to->avail += nr;
to->touched = 1;
return nr;
}
static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
int nodeid)
{
/* 获取空闲对象,free是slabp中第一个空闲对象索引 */
/* index_to_obj: slab->s_mem + cache->buffer_size * idx; s_mem是slab中第一个对象的起始地址,buffer_size是每个对象的大小*/
void *objp = index_to_obj(cachep, slabp, slabp->free);
kmem_bufctl_t next;
/* 更新当前slab中活跃对象的数量 */
slabp->inuse++;
/* 获取下一个空闲对象的索引 */
/* slab_bufctl: (kmem_bufctl_t *) (slabp + 1) */
next = slab_bufctl(slabp)[slabp->free];
#if DEBUG
slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
WARN_ON(slabp->nodeid != nodeid);
#endif
/* 指向下一个空闲对象索引 */
slabp->free = next;
return objp;
}