之前我们讲过vfs虚拟文件系统的lookup文件查找的实现,知道当内核的缓存没有找到的时候,就会调用到inode_operations结构体的lookup函数来实现,我们今天以ext2文件系统为例,来讲解一下对应文件系统的lookup函数的实现。
首先ext2的inode_operation结构体定义在ext2/namei.c,定义如下
const struct inode_operations ext2_dir_inode_operations = {
.create = ext2_create,
.lookup = ext2_lookup,
.link = ext2_link,
.unlink = ext2_unlink,
.symlink = ext2_symlink,
.mkdir = ext2_mkdir,
.rmdir = ext2_rmdir,
.mknod = ext2_mknod,
.rename = ext2_rename,
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext2_listxattr,
.removexattr = generic_removexattr,
#endif
.setattr = ext2_setattr,
.permission = ext2_permission,
};
可以看出来lookup函数是ext2_lookup,这个函数也定义在ext2/namei.c,定义如下
static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
{
struct inode * inode;
ino_t ino;
/*如果目录项的名字长度太长会返回错误*/
if (dentry->d_name.len > EXT2_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
/*核心的操作,ext2文件系统通过文件名寻找inode,返回inode的编号*/
ino = ext2_inode_by_name(dir, dentry);
inode = NULL;
if (ino) {
/*根据索引节点号来创造inode结构*/
inode = iget(dir->i_sb, ino);
if (!inode)
return ERR_PTR(-EACCES);
}
/*说明出现错误,inode和dentry断开连接,处理*/
return d_splice_alias(inode, dentry);
}
然后我们看一下ext2_inode_by_name函数,这个函数定义在fs/ext2/dir.c,定义如下
ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry)
{
ino_t res = 0;
struct ext2_dir_entry_2 * de;
struct page *page;
/*关键操作*/
de = ext2_find_entry (dir, dentry, &page);
if (de) {
/*如果返回正确,得到inode号码,然后释放page*/
res = le32_to_cpu(de->inode);
ext2_put_page(page);
}
return res;
}
我们继续看ext2_find_entry函数,ext2_find_entry函数定义在fs/ext2/dir.c,定义如下
struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
struct dentry *dentry, struct page ** res_page)
{
/*父目录的名字和长度*/
const char *name = dentry->d_name.name;
int namelen = dentry->d_name.len;
/*指向下一个目录项的指针,以时间换空间的做法*/
unsigned reclen = EXT2_DIR_REC_LEN(namelen);
unsigned long start, n;
/*返回这个inode在ext2文件系统占用的page数目*/
unsigned long npages = dir_pages(dir);
struct page *page = NULL;
struct ext2_inode_info *ei = EXT2_I(dir);
ext2_dirent * de;
/*如果这个目录是空的,就直接返回*/
if (npages == 0)
goto out;
/* OFFSET_CACHE */
*res_page = NULL;
/*开始查找的页数*/
start = ei->i_dir_start_lookup;
if (start >= npages)
start = 0;
n = start;
/*大循环,一个页一个页的查找*/
do {
char *kaddr;
/*从缓存中寻找,由inode结构体得到对应的页的数据,如果缓存上没有,就去硬盘上读取*/
page = ext2_get_page(dir, n);
/*查找成功,就在这个页上寻找*/
if (!IS_ERR(page)) {
/*由page得到虚拟地址*/
kaddr = page_address(page);
de = (ext2_dirent *) kaddr;
/*这个页的最后地址*/
kaddr += ext2_last_byte(dir, n) - reclen;
/*只要没有到这个页的末尾,就继续循环*/
while ((char *) de <= kaddr) {
/*如果rec_len为0,就返回错误*/
if (de->rec_len == 0) {
ext2_error(dir->i_sb, __FUNCTION__,
"zero-length directory entry");
ext2_put_page(page);
goto out;
}
/*比对,如果一致,就返回找到的目录项*/
if (ext2_match (namelen, name, de))
goto found;
/*当前地址加上rec_len就是下一个地址*/
de = ext2_next_entry(de);
}
/*如果当前目录项查找完毕,没有找到,就释放*/
ext2_put_page(page);
}
/*n标记对应的开始页数*/
if (++n >= npages)
n = 0;
/* next page is past the blocks we've got */
if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) {
ext2_error(dir->i_sb, __FUNCTION__,
"dir %lu size %lld exceeds block count %llu",
dir->i_ino, dir->i_size,
(unsigned long long)dir->i_blocks);
goto out;
}
} while (n != start);
out:
return NULL;
found:
/*找到了,就返回inode的页和ext信息结构体*/
*res_page = page;
ei->i_dir_start_lookup = n;
return de;
}
然后我们回去看一下iget函数,就是通过inode号码返回inode结构体的函数,定义在linux/fs.h,定义如下
static inline struct inode *iget(struct super_block *sb, unsigned long ino)
{
/*从挂载的文件系统里寻找inode*/
struct inode *inode = iget_locked(sb, ino);
/*如果没有找到就调用super_block的函数,直接读取inode*/
if (inode && (inode->i_state & I_NEW)) {
sb->s_op->read_inode(inode);
unlock_new_inode(inode);
}
return inode;
}
调用的super_operations结构体的read_inode函数,super_operations定义在/fs/ext2/super.c里,定义如下
static const struct super_operations ext2_sops = {
.alloc_inode = ext2_alloc_inode,
.destroy_inode = ext2_destroy_inode,
.read_inode = ext2_read_inode,
.write_inode = ext2_write_inode,
.put_inode = ext2_put_inode,
.delete_inode = ext2_delete_inode,
.put_super = ext2_put_super,
.write_super = ext2_write_super,
.statfs = ext2_statfs,
.remount_fs = ext2_remount,
.clear_inode = ext2_clear_inode,
.show_options = ext2_show_options,
#ifdef CONFIG_QUOTA
.quota_read = ext2_quota_read,
.quota_write = ext2_quota_write,
#endif
};
从上边可以看出来,read_inode函数就是ext2_read_inode函数,这个函数定义在fs/ext2/inode.c里,定义如下
void ext2_read_inode (struct inode * inode)
{
struct ext2_inode_info *ei = EXT2_I(inode);
ino_t ino = inode->i_ino;
struct buffer_head * bh;
/*从super_block块和块号得到ext2_inode结构体*/
struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
int n;
#ifdef CONFIG_EXT2_FS_POSIX_ACL
ei->i_acl = EXT2_ACL_NOT_CACHED;
ei->i_default_acl = EXT2_ACL_NOT_CACHED;
#endif
if (IS_ERR(raw_inode))
goto bad_inode;
/*把得到的ext2_inode结构体编程inode结构体*/
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
if (!(test_opt (inode->i_sb, NO_UID32))) {
inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
}
inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
inode->i_size = le32_to_cpu(raw_inode->i_size);
inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
/* We now have enough fields to check if the inode was active or not.
* This is needed because nfsd might try to access dead inodes
* the test is that same one that e2fsck uses
* NeilBrown 1999oct15
*/
if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) {
/* this inode is deleted */
brelse (bh);
goto bad_inode;
}
inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
ei->i_frag_no = raw_inode->i_frag;
ei->i_frag_size = raw_inode->i_fsize;
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
ei->i_dir_acl = 0;
if (S_ISREG(inode->i_mode))
inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
else
ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
ei->i_dtime = 0;
inode->i_generation = le32_to_cpu(raw_inode->i_generation);
ei->i_state = 0;
ei->i_next_alloc_block = 0;
ei->i_next_alloc_goal = 0;
ei->i_prealloc_count = 0;
ei->i_block_group = (ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
ei->i_dir_start_lookup = 0;
/*
* NOTE! The in-memory inode i_data array is in little-endian order
* even on big-endian machines: we do NOT byteswap the block numbers!
*/
for (n = 0; n < EXT2_N_BLOCKS; n++)
ei->i_data[n] = raw_inode->i_block[n];
/*处理特殊文件*/
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext2_file_inode_operations;
if (ext2_use_xip(inode->i_sb)) {
inode->i_mapping->a_ops = &ext2_aops_xip;
inode->i_fop = &ext2_xip_file_operations;
} else if (test_opt(inode->i_sb, NOBH)) {
inode->i_mapping->a_ops = &ext2_nobh_aops;
inode->i_fop = &ext2_file_operations;
} else {
inode->i_mapping->a_ops = &ext2_aops;
inode->i_fop = &ext2_file_operations;
}
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = &ext2_dir_inode_operations;
inode->i_fop = &ext2_dir_operations;
if (test_opt(inode->i_sb, NOBH))
inode->i_mapping->a_ops = &ext2_nobh_aops;
else
inode->i_mapping->a_ops = &ext2_aops;
} else if (S_ISLNK(inode->i_mode)) {
if (ext2_inode_is_fast_symlink(inode))
inode->i_op = &ext2_fast_symlink_inode_operations;
else {
inode->i_op = &ext2_symlink_inode_operations;
if (test_opt(inode->i_sb, NOBH))
inode->i_mapping->a_ops = &ext2_nobh_aops;
else
inode->i_mapping->a_ops = &ext2_aops;
}
} else {
inode->i_op = &ext2_special_inode_operations;
if (raw_inode->i_block[0])
init_special_inode(inode, inode->i_mode,
old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
else
init_special_inode(inode, inode->i_mode,
new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
}
brelse (bh);
ext2_set_inode_flags(inode);
return;
bad_inode:
make_bad_inode(inode);
return;
}
接下来看主要的逻辑函数ext2_get_inode,这个函数定义在fs/ext2/inode.c文件,定义如下
static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino,
struct buffer_head **p)
{
struct buffer_head * bh;
unsigned long block_group;
unsigned long block;
unsigned long offset;
struct ext2_group_desc * gdp;
*p = NULL;
/*检验参数是不是合法*/
if ((ino != EXT2_ROOT_INO && ino < EXT2_FIRST_INO(sb)) ||
ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count))
goto Einval;
/*把inode的编号除以每一个块组的块的数目就得到想要的块在第几个块组*/
block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
/*由块组号码得到对应的块组描述符*/
gdp = ext2_get_group_desc(sb, block_group, &bh);
if (!gdp)
goto Egdp;
/*块号减一取余每一个块组的块的数目再乘上一个inode的大小,就得到了在对应块内的偏移*/
offset = ((ino - 1) % EXT2_INODES_PER_GROUP(sb)) * EXT2_INODE_SIZE(sb);
/*从块组描述符得到bg_inode_table字段,也就是这个块组的第一个inode表所在的块号,然后再加上偏移,就得到了块号对应的inode结构体*/
block = le32_to_cpu(gdp->bg_inode_table) +
(offset >> EXT2_BLOCK_SIZE_BITS(sb));
/*算出是对应的块的第几个字节,调用sb_bread函数读取到bufferhead里*/
if (!(bh = sb_bread(sb, block)))
goto Eio;
*p = bh;
/*得到了对应的块,然后offset偏移后,使用(struct ext2_inode *)指针转化过后就得到了*/
offset &= (EXT2_BLOCK_SIZE(sb) - 1);
return (struct ext2_inode *) (bh->b_data + offset);
Einval:
ext2_error(sb, "ext2_get_inode", "bad inode number: %lu",
(unsigned long) ino);
return ERR_PTR(-EINVAL);
Eio:
ext2_error(sb, "ext2_get_inode",
"unable to read inode block - inode=%lu, block=%lu",
(unsigned long) ino, block);
Egdp:
return ERR_PTR(-EIO);
}
这个函数里边比较重要的是ext2_get_group_desc函数,这个函数定义在fs/ext2/balloc.c,定义如下
struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
unsigned int block_group,
struct buffer_head ** bh)
{
/*传入的block_group是inode在第几个块组*/
unsigned long group_desc;
unsigned long offset;
struct ext2_group_desc * desc;
struct ext2_sb_info *sbi = EXT2_SB(sb);
/*检验参数是否合法*/
if (block_group >= sbi->s_groups_count) {
ext2_error (sb, "ext2_get_group_desc",
"block_group >= groups_count - "
"block_group = %d, groups_count = %lu",
block_group, sbi->s_groups_count);
return NULL;
}
/*首先得到块组究竟是在第几个块上,块组描述符由连续的几个块组成,EXT2_DESC_PER_BLOCK_BITS(sb)返回一个块含有的块组描述符个数对应的含有二进制位数。比如一个块1024字节,一个块描述符由32字节,所以一个块有32个块描述符,所以就是返回5,因为32编程二进制有5个1,就等于块描述符除以32,得到在第几个块*/
group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
/*找到块组描述符在块内的偏移*/
offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
/*都算好以后就直接从sbi的superblock结构体里边找*/
if (!sbi->s_group_desc[group_desc]) {
ext2_error (sb, "ext2_get_group_desc",
"Group descriptor not loaded - "
"block_group = %d, group_desc = %lu, desc = %lu",
block_group, group_desc, offset);
return NULL;
}
desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data;
if (bh)
*bh = sbi->s_group_desc[group_desc];
return desc + offset;
}