Linux内存管理之slab分配器分析(四申请对象kmalloc)

魏臻

2023-12-01

上篇分析了一下cache的创建过程，现在cache已经创建完成，跟踪一下slab对象的申请过程。
目前使用的申请方式主要是kmalloc（从general cache中申请）和kmem_cache_alloc（从专用cache中申请）。

先看一下kmalloc

static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
    struct kmem_cache *cachep;
    void *ret;

    /* __builtin_constant_p Gcc的内置函数，用于判断一个值是否为常量，如果是常量则返回1 */
    if (__builtin_constant_p(size)) {
        int i = 0;

        if (!size)
            return ZERO_SIZE_PTR;

#define CACHE(x) \
        if (size <= x) \
            goto found; \
        else \
            i++;
#include <linux/kmalloc_sizes.h>
#undef CACHE
        return NULL;
found:
#ifdef CONFIG_ZONE_DMA
        if (flags & GFP_DMA)
            cachep = malloc_sizes[i].cs_dmacachep;
        else
#endif
            cachep = malloc_sizes[i].cs_cachep;

        ret = kmem_cache_alloc_notrace(cachep, flags);

        trace_kmalloc(_THIS_IP_, ret,
             size, slab_buffer_size(cachep), flags);

        return ret;
    }
    /* 正常调用的分配函数 */
    return __kmalloc(size, flags);
}

void *__kmalloc(size_t size, gfp_t flags)
{
    return __do_kmalloc(size, flags, NULL);
}

/**
 * __do_kmalloc - allocate memory
 * @size: how many bytes of memory are required.
 * @flags: the type of memory to allocate (see kmalloc).
 * @caller: function caller for debug tracking of the caller
 */
static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
                     void *caller)
{
    struct kmem_cache *cachep;
    void *ret;

    /* If you want to save a few bytes .text space: replace
     * __ with kmem_.
     * Then kmalloc uses the uninlined functions instead of the inline
     * functions.
     */
    /* 根据size大小，查找对应的general cache */
    cachep = __find_general_cachep(size, flags);
    /* 对于0size的kmalloc请求，直接返回cache的地址 */
    if (unlikely(ZERO_OR_NULL_PTR(cachep)))
        return cachep;
    /* 具体分配在这里进行 */
    ret = __cache_alloc(cachep, flags, caller);

    trace_kmalloc((unsigned long) caller, ret,
         size, cachep->buffer_size, flags);

    return ret;
}

而__cache_alloc函数中实际上调用的是__do_cache_alloc，对于非NUMA架构

static __always_inline void *
__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
    return ____cache_alloc(cachep, flags);
}

static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
    void *objp;
    struct array_cache *ac;

    check_irq_off();
    /* cachep->array[smp_processor_id()]，获取当前cpu对应的array_cache */
    ac = cpu_cache_get(cachep);
    /* 检查是否存在可用对象，avail指向当前可用的节点 */
    if (likely(ac->avail)) {
        /* 如果存在可用对象，更新local cache的命中次数 */
        STATS_INC_ALLOCHIT(cachep);
        /* 标示最近使用过local_cache */
        ac->touched = 1;
        /* 获取空闲对象，从后向前，当avail变为0时表示已无可用对象 */
        objp = ac->entry[--ac->avail];
    } else {
        /* local cache中已无空闲对象，更新未命中次数 */
        STATS_INC_ALLOCMISS(cachep);
        /* local cache中无空闲对象，则从slab的几个链表中提取空闲对象放入local cache中 */
        objp = cache_alloc_refill(cachep, flags);
    }
    /*
     * To avoid a false negative, if an object that is in one of the
     * per-CPU caches is leaked, we need to make sure kmemleak doesn't
     * treat the array pointers as a reference to the object.
     */
    /* 对于分配出去的对象，将对应的指针置为NULL */
    kmemleak_erase(&ac->entry[ac->avail]);
    return objp;
}

static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
{
    int batchcount;
    struct kmem_list3 *l3;
    struct array_cache *ac;
    int node;

retry:
    check_irq_off();
    /* 获取当前的NUMA节点 */
    node = numa_node_id();
    /* 获取local cache */
    ac = cpu_cache_get(cachep);
    /* 批量填充的数目 */
    batchcount = ac->batchcount;
    /* 如果最近未使用过该local cache，则一次填充的上限为BATCHREFILL_LIMIT个 */
    if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
        /*
         * If there was little recent activity on this cache, then
         * perform only a partial refill. Otherwise we could generate
         * refill bouncing.
         */
        batchcount = BATCHREFILL_LIMIT;
    }
    /* 获取本内存节点的kmem_list3的几个slab链表 */
    l3 = cachep->nodelists[node];

    BUG_ON(ac->avail > 0 || !l3);
    spin_lock(&l3->list_lock);

    /* See if we can refill from the shared array */
    /* shared local cache 用于多核中，所有cpu共享，首先从shared中批量获取slab对象到local */
    if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
        goto alloc_done;
    /* 如果shared为空，或者已无空闲对象，则从slab链表中分配 */
    while (batchcount > 0) {
        struct list_head *entry;
        struct slab *slabp;
        /* Get slab alloc is to come from. */
        /* 先从部分未满的slab链表中分配 */
        entry = l3->slabs_partial.next;
        /* 判断是否为空 */
        if (entry == &l3->slabs_partial) {
            /* 标示刚访问了空链表 */
            l3->free_touched = 1;
            entry = l3->slabs_free.next;
            /* 如果空链表为空，则必须新增slab */
            if (entry == &l3->slabs_free)
                goto must_grow;
        }
        /* 从链表上获取到了一个slab */
        slabp = list_entry(entry, struct slab, list);
        check_slabp(cachep, slabp);
        check_spinlock_acquired(cachep);

        /*
         * The slab was either on partial or free list so
         * there must be at least one object available for
         * allocation.
         */
        BUG_ON(slabp->inuse >= cachep->num);
        /* 当前slab的对象活跃数必须小于每个slab的最大对象数 */
        while (slabp->inuse < cachep->num && batchcount--) {
            STATS_INC_ALLOCED(cachep);
            STATS_INC_ACTIVE(cachep);
            STATS_SET_HIGH(cachep);
            /* 从slab中提取空闲对象，将虚拟地址插入到local cache中 */
            ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
                             node);
        }
        check_slabp(cachep, slabp);

        /* move slabp to correct slabp list: */
        /* 从原链表中删除slab */
        list_del(&slabp->list);
        if (slabp->free == BUFCTL_END)
            /* 此slab中已经没有空闲对象，移动到full链表中 */
            list_add(&slabp->list, &l3->slabs_full);
        else
            /* 此slab中还有空闲对象，移动到partial链表中 */
            list_add(&slabp->list, &l3->slabs_partial);
    }

must_grow:
    /* 从slab链表中添加了avail个空闲对象到local cache中，空闲的对象数量需要更新一下 */
    l3->free_objects -= ac->avail;
alloc_done:
    spin_unlock(&l3->list_lock);
    /* slab链表中也无空闲对象，创建新的slab */
    if (unlikely(!ac->avail)) {
        int x;
        /* 创建空slab */
        x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);

        /* cache_grow can reenable interrupts, then ac could change. */
        /* 看注释，由于cache_grow开启了中断，local cache指针可能发生裱花，ac需要重新获取 */
        ac = cpu_cache_get(cachep);
        /* 新的slab创建失败 */
        if (!x && ac->avail == 0)    /* no objects in sight? abort */
            return NULL;
        /* 新增slab成功，重新填充local cache */
        if (!ac->avail)        /* objects refilled by interrupt? */
            goto retry;
    }
    /* 设置近期访问的标志 */
    ac->touched = 1;
    /* 返回空闲对象的地址 */
    return ac->entry[--ac->avail];
}

分析一下几个函数

/*
 * Transfer objects in one arraycache to another.
 * Locking must be handled by the caller.
 *
 * Return the number of entries transferred.
 */
/* 从shared local cache中移动对象到local cache中，shared local cache 被同一NUMA节点的CPU所共享 */
static int transfer_objects(struct array_cache *to,
        struct array_cache *from, unsigned int max)
{
    /* Figure out how many entries to transfer */
    int nr = min(min(from->avail, max), to->limit - to->avail);

    if (!nr)
        return 0;
    /* 拷贝并更新相关成员 */
    memcpy(to->entry + to->avail, from->entry + from->avail -nr,
            sizeof(void *) *nr);

    from->avail -= nr;
    to->avail += nr;
    to->touched = 1;
    return nr;
}

static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
                int nodeid)
{
    /* 获取空闲对象，free是slabp中第一个空闲对象索引 */
    /* index_to_obj:  slab->s_mem + cache->buffer_size * idx; s_mem是slab中第一个对象的起始地址，buffer_size是每个对象的大小*/
    void *objp = index_to_obj(cachep, slabp, slabp->free);
    kmem_bufctl_t next;
    /* 更新当前slab中活跃对象的数量 */
    slabp->inuse++;
    /* 获取下一个空闲对象的索引  */
    /* slab_bufctl:  (kmem_bufctl_t *) (slabp + 1) */
    next = slab_bufctl(slabp)[slabp->free];
#if DEBUG
    slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
    WARN_ON(slabp->nodeid != nodeid);
#endif
    /* 指向下一个空闲对象索引 */
    slabp->free = next;

    return objp;
}

Linux内存管理之slab分配器分析(四申请对象kmalloc)

相关阅读

相关文章

相关问答

相关文档

Linux内存管理之slab分配器分析(四 申请对象kmalloc)

相关阅读

相关文章

相关问答

相关文档

Linux内存管理之slab分配器分析(四申请对象kmalloc)