当文件系统准备好数据之后,就会调用submit_bio提交一个IO请求,这里主要介绍bio数据管理和分配.
这里列举重要的数据结构:
struct bio {
sector_t bi_sector; /*磁盘起始扇区号 */
struct block_device *bi_bdev;/* bio操作的块设备 */
unsigned long bi_flags; /* 状态标志位 */
unsigned long bi_rw; /*读写 */
unsigned short bi_vcnt; /* bio_vec's 个数*/
unsigned short bi_idx; /* 当前bvl_vec 数组的index*/
unsigned int bi_size; /* 整个bio的大小:所有bi_io_vec->len之和 */
bio_end_io_t *bi_end_io;/* bio完成时调用*/
void *bi_private;/*bio私有数据 */
unsigned int bi_max_vecs; /* bio携带的最大bio_vec数量(实际使用的bio_vec由bi_vcnt表示) */
atomic_t bi_cnt; /* bio引用计数*/
struct bio_vec *bi_io_vec; /* bio_vec数组 */
struct bio_set *bi_pool;/*bio_set维护了若干不同大小的bio slab */
struct bio_vec bi_inline_vecs[0];/* bio 内嵌的bio_vec*/
};
这里需要注意的是bi_io_vec和bi_inline_vecs两个变量:
bio需要携带的bio_vec数量不超过BIO_INLINE_VECS(4)时,bi_io_vec指向bi_inline_vecs,如果超过BIO_INLINE_VECS,
则从bio_vec专有的slab中分配,此时bi_inline_vecs的空间就被浪费了.
bio_vec表示一段连续的内存数据,最大为一个page
一般情况下,bio_vec与page一一对应. 所有bio_vec就组成 了一个bio携带的所有数据
struct bio_vec {
struct page *bv_page;/*数据所属的page */
unsigned int bv_len;/*数据大小 */
unsigned int bv_offset;/*数据在page内的偏移 */
};
在分析bio分配之前,先要弄清楚bio slab和bio_vec slab的初始化
内核定义了两个全局变量
static struct bio_slab *bio_slabs/*包含不同大小bio的slab */
#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
/*包含大小1到BIO_MAX_PAGES个bio_vec的slab*/
static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
};
static int __init init_bio(void)
{
/*bio slab数组初始化 */
bio_slab_max = 2;
bio_slab_nr = 0;
/*默认分配两个,后续会动态创建不同大小的bio slab */
bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
if (!bio_slabs)
panic("bio: can't allocate bios\n");
/*bio_vec slab 数组初始化 */
biovec_init_slabs();
/*创建bioset 内存池 */
fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
if (!fs_bio_set)
panic("bio: can't allocate bios\n");
return 0;
}
bio_vec slab初始化
static void __init biovec_init_slabs(void)
{
int i;
for (i = 0; i < BIOVEC_NR_POOLS; i++) {
int size;
struct biovec_slab *bvs = bvec_slabs + i;
/*小于BIO_INLINE_VECS的直接使用bio内嵌的bio_vec */
if (bvs->nr_vecs <= BIO_INLINE_VECS) {
bvs->slab = NULL;
continue;
}
/*创建bio_vec slab */
size = bvs->nr_vecs * sizeof(struct bio_vec);
bvs->slab = kmem_cache_create(bvs->name, size, 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
}
}
bio_set 初始化: 建立bio和bio_vec内存池
struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
{
unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
struct bio_set *bs;
bs = kzalloc(sizeof(*bs), GFP_KERNEL);
if (!bs)
return NULL;
/*front_pad可以携带私有数据 */
bs->front_pad = front_pad;
spin_lock_init(&bs->rescue_lock);
bio_list_init(&bs->rescue_list);
INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
/*查找or创建带inline bio_vec的bio slab */
bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
if (!bs->bio_slab) {
kfree(bs);
return NULL;
}
/*创建bio 内存池 */
bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
if (!bs->bio_pool)
goto bad;
/*创建biovec 内存池 */
bs->bvec_pool = biovec_create_pool(bs, pool_size);
if (!bs->bvec_pool)
goto bad;
return bs;
}
从这里可以看出,一个bio内存由4部分组成 front_pad + sizeof(bio) + BIO_INLINE_VECS * sizeof(struct bio_vec) + sizeof(struct bio_aux)
bio_find_or_create_slab会查找符合大小的bio slab,或者重新分配bio slab
static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
{
unsigned int sz = sizeof(struct bio) + sizeof(struct bio_aux) + extra_size;
while (i < bio_slab_nr) {
bslab = &bio_slabs[i];
/*找到大小符合的bio slab */
else if (bslab->slab_size == sz) {
slab = bslab->slab;
bslab->slab_ref++;
break;
}
i++;
}
if (slab)
goto out_unlock;
/*重新分配bio slab数组 */
if (bio_slab_nr == bio_slab_max && entry == -1) {
new_bio_slab_max = bio_slab_max << 1;
new_bio_slabs = krealloc(bio_slabs,
new_bio_slab_max * sizeof(struct bio_slab),
GFP_KERNEL);
if (!new_bio_slabs)
goto out_unlock;
bio_slab_max = new_bio_slab_max;
bio_slabs = new_bio_slabs;
}
/*创建新的bio slab */
snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN,
SLAB_HWCACHE_ALIGN, NULL);
if (!slab)
goto out_unlock;
bslab->slab = slab;
bslab->slab_ref = 1;
bslab->slab_size = sz;
return slab;
}
bio分配主要有以下几个函数
struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)/*默认使用fs_bio_set 相关内存池分配 */
struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)/*直接使用kmalloc来分配bio和bio_vec内存 */
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)/*bio_alloc和bio_kmalloc最终调用的函数 */
分配bio和对应数量的bio_vec
1. 从bio_set slab中分配或者直接用kmalloc分配
2. nr_iovecs大于inline bio_vec个数时,需要重新分配bio_vec
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
{
/*没有bio_set,直接用kmalloc分配 */
if (!bs) {
if (nr_iovecs > UIO_MAXIOV)
return NULL;
p = kmalloc(sizeof(struct bio) + sizeof(struct bio_aux) +
nr_iovecs * sizeof(struct bio_vec),
gfp_mask);
front_pad = 0;
inline_vecs = nr_iovecs;
} else {
/* 从bio slab中分配bio */
p = mempool_alloc(bs->bio_pool, gfp_mask);
if (!p && gfp_mask != saved_gfp) {
punt_bios_to_rescuer(bs);
gfp_mask = saved_gfp;
p = mempool_alloc(bs->bio_pool, gfp_mask);
}
front_pad = bs->front_pad;
inline_vecs = BIO_INLINE_VECS;
}
if (unlikely(!p))
return NULL;
/*初始化bio和bio_aux */
bio = p + front_pad;
bio_init(bio);
bio_aux = p + front_pad +
sizeof(struct bio) + (inline_vecs * sizeof(struct bio_vec));
bio_init_aux(bio, bio_aux);
/*如果需要的iovecs大于inline vec,则需要重新分配bio_vec */
if (nr_iovecs > inline_vecs) {
bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
if (!bvl && gfp_mask != saved_gfp) {
punt_bios_to_rescuer(bs);
gfp_mask = saved_gfp;
bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
}
if (unlikely(!bvl))
goto err_free;
/*表示需要显示释放bio_vec */
bio->bi_flags |= 1 << BIO_OWNS_VEC;
} else if (nr_iovecs) {
/*使用inline bio_vec */
bvl = bio->bi_inline_vecs;
}
bio->bi_pool = bs;
/*idx表示bio_vec的slab数组下标 */
bio->bi_flags |= idx << BIO_POOL_OFFSET;
bio->bi_max_vecs = nr_iovecs;
bio->bi_io_vec = bvl;
return bio;
}
bio_endio:bio 操作完成回调函数
bio_split: bio分隔成两个bio
bio_trim: 截取bio某段数据
bio_add_page:建立bio与page联系