当前位置: 首页 > 工具软件 > Ext2Read > 使用案例 >

ext2文件系统之ext2_lookup函数源代码分析

裴金鑫
2023-12-01
之前我们讲过vfs虚拟文件系统的lookup文件查找的实现,知道当内核的缓存没有找到的时候,就会调用到inode_operations结构体的lookup函数来实现,我们今天以ext2文件系统为例,来讲解一下对应文件系统的lookup函数的实现。
首先ext2的inode_operation结构体定义在ext2/namei.c,定义如下
const struct inode_operations ext2_dir_inode_operations = {
	.create		= ext2_create,
	.lookup		= ext2_lookup,
	.link		= ext2_link,
	.unlink		= ext2_unlink,
	.symlink	= ext2_symlink,
	.mkdir		= ext2_mkdir,
	.rmdir		= ext2_rmdir,
	.mknod		= ext2_mknod,
	.rename		= ext2_rename,
#ifdef CONFIG_EXT2_FS_XATTR
	.setxattr	= generic_setxattr,
	.getxattr	= generic_getxattr,
	.listxattr	= ext2_listxattr,
	.removexattr	= generic_removexattr,
#endif
	.setattr	= ext2_setattr,
	.permission	= ext2_permission,
};

可以看出来lookup函数是ext2_lookup,这个函数也定义在ext2/namei.c,定义如下
static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
{
	struct inode * inode;
	ino_t ino;
	/*如果目录项的名字长度太长会返回错误*/
	if (dentry->d_name.len > EXT2_NAME_LEN)
		return ERR_PTR(-ENAMETOOLONG);
	/*核心的操作,ext2文件系统通过文件名寻找inode,返回inode的编号*/
	ino = ext2_inode_by_name(dir, dentry);
	inode = NULL;
	if (ino) {
		/*根据索引节点号来创造inode结构*/
		inode = iget(dir->i_sb, ino);
		if (!inode)
			return ERR_PTR(-EACCES);
	}
	/*说明出现错误,inode和dentry断开连接,处理*/
	return d_splice_alias(inode, dentry);
}

然后我们看一下ext2_inode_by_name函数,这个函数定义在fs/ext2/dir.c,定义如下
ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry)
{
	ino_t res = 0;
	struct ext2_dir_entry_2 * de;
	struct page *page;
	/*关键操作*/
	de = ext2_find_entry (dir, dentry, &page);
	if (de) {
		/*如果返回正确,得到inode号码,然后释放page*/
		res = le32_to_cpu(de->inode);
		ext2_put_page(page);
	}
	return res;
}

我们继续看ext2_find_entry函数,ext2_find_entry函数定义在fs/ext2/dir.c,定义如下
struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
			struct dentry *dentry, struct page ** res_page)
{
	/*父目录的名字和长度*/
	const char *name = dentry->d_name.name;
	int namelen = dentry->d_name.len;
	/*指向下一个目录项的指针,以时间换空间的做法*/
	unsigned reclen = EXT2_DIR_REC_LEN(namelen);
	unsigned long start, n;
	/*返回这个inode在ext2文件系统占用的page数目*/
	unsigned long npages = dir_pages(dir);
	struct page *page = NULL;
	struct ext2_inode_info *ei = EXT2_I(dir);
	ext2_dirent * de;
	/*如果这个目录是空的,就直接返回*/
	if (npages == 0)
		goto out;


	/* OFFSET_CACHE */
	*res_page = NULL;
	/*开始查找的页数*/
	start = ei->i_dir_start_lookup;
	if (start >= npages)
		start = 0;
	n = start;
	/*大循环,一个页一个页的查找*/
	do {
		char *kaddr;
		/*从缓存中寻找,由inode结构体得到对应的页的数据,如果缓存上没有,就去硬盘上读取*/
		page = ext2_get_page(dir, n);
		/*查找成功,就在这个页上寻找*/
		if (!IS_ERR(page)) {
			/*由page得到虚拟地址*/
			kaddr = page_address(page);
			de = (ext2_dirent *) kaddr;
			/*这个页的最后地址*/
			kaddr += ext2_last_byte(dir, n) - reclen;
			/*只要没有到这个页的末尾,就继续循环*/
			while ((char *) de <= kaddr) {
				/*如果rec_len为0,就返回错误*/
				if (de->rec_len == 0) {
					ext2_error(dir->i_sb, __FUNCTION__,
						"zero-length directory entry");
					ext2_put_page(page);
					goto out;
				}
				/*比对,如果一致,就返回找到的目录项*/
				if (ext2_match (namelen, name, de))
					goto found;
				/*当前地址加上rec_len就是下一个地址*/
				de = ext2_next_entry(de);
			}
			/*如果当前目录项查找完毕,没有找到,就释放*/
			ext2_put_page(page);
		}
		/*n标记对应的开始页数*/
		if (++n >= npages)
			n = 0;
		/* next page is past the blocks we've got */
		if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) {
			ext2_error(dir->i_sb, __FUNCTION__,
				"dir %lu size %lld exceeds block count %llu",
				dir->i_ino, dir->i_size,
				(unsigned long long)dir->i_blocks);
			goto out;
		}
	} while (n != start);
out:
	return NULL;


found:
	/*找到了,就返回inode的页和ext信息结构体*/
	*res_page = page;
	ei->i_dir_start_lookup = n;
	return de;
}


然后我们回去看一下iget函数,就是通过inode号码返回inode结构体的函数,定义在linux/fs.h,定义如下
static inline struct inode *iget(struct super_block *sb, unsigned long ino)
{
	/*从挂载的文件系统里寻找inode*/
	struct inode *inode = iget_locked(sb, ino);
	/*如果没有找到就调用super_block的函数,直接读取inode*/
	if (inode && (inode->i_state & I_NEW)) {
		sb->s_op->read_inode(inode);
		unlock_new_inode(inode);
	}


	return inode;
}


调用的super_operations结构体的read_inode函数,super_operations定义在/fs/ext2/super.c里,定义如下
static const struct super_operations ext2_sops = {
	.alloc_inode	= ext2_alloc_inode,
	.destroy_inode	= ext2_destroy_inode,
	.read_inode	= ext2_read_inode,
	.write_inode	= ext2_write_inode,
	.put_inode	= ext2_put_inode,
	.delete_inode	= ext2_delete_inode,
	.put_super	= ext2_put_super,
	.write_super	= ext2_write_super,
	.statfs		= ext2_statfs,
	.remount_fs	= ext2_remount,
	.clear_inode	= ext2_clear_inode,
	.show_options	= ext2_show_options,
#ifdef CONFIG_QUOTA
	.quota_read	= ext2_quota_read,
	.quota_write	= ext2_quota_write,
#endif
};


从上边可以看出来,read_inode函数就是ext2_read_inode函数,这个函数定义在fs/ext2/inode.c里,定义如下
void ext2_read_inode (struct inode * inode)
{
	struct ext2_inode_info *ei = EXT2_I(inode);
	ino_t ino = inode->i_ino;
	struct buffer_head * bh;
	/*从super_block块和块号得到ext2_inode结构体*/
	struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
	int n;


#ifdef CONFIG_EXT2_FS_POSIX_ACL
	ei->i_acl = EXT2_ACL_NOT_CACHED;
	ei->i_default_acl = EXT2_ACL_NOT_CACHED;
#endif
	if (IS_ERR(raw_inode))
 		goto bad_inode;
 	/*把得到的ext2_inode结构体编程inode结构体*/
	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
	if (!(test_opt (inode->i_sb, NO_UID32))) {
		inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
		inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
	}
	inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
	inode->i_size = le32_to_cpu(raw_inode->i_size);
	inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
	inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
	inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
	inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
	ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
	/* We now have enough fields to check if the inode was active or not.
	 * This is needed because nfsd might try to access dead inodes
	 * the test is that same one that e2fsck uses
	 * NeilBrown 1999oct15
	 */
	if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) {
		/* this inode is deleted */
		brelse (bh);
		goto bad_inode;
	}
	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
	ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
	ei->i_frag_no = raw_inode->i_frag;
	ei->i_frag_size = raw_inode->i_fsize;
	ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
	ei->i_dir_acl = 0;
	if (S_ISREG(inode->i_mode))
		inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
	else
		ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
	ei->i_dtime = 0;
	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
	ei->i_state = 0;
	ei->i_next_alloc_block = 0;
	ei->i_next_alloc_goal = 0;
	ei->i_prealloc_count = 0;
	ei->i_block_group = (ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
	ei->i_dir_start_lookup = 0;


	/*
	 * NOTE! The in-memory inode i_data array is in little-endian order
	 * even on big-endian machines: we do NOT byteswap the block numbers!
	 */
	for (n = 0; n < EXT2_N_BLOCKS; n++)
		ei->i_data[n] = raw_inode->i_block[n];
	/*处理特殊文件*/
	if (S_ISREG(inode->i_mode)) {
		inode->i_op = &ext2_file_inode_operations;
		if (ext2_use_xip(inode->i_sb)) {
			inode->i_mapping->a_ops = &ext2_aops_xip;
			inode->i_fop = &ext2_xip_file_operations;
		} else if (test_opt(inode->i_sb, NOBH)) {
			inode->i_mapping->a_ops = &ext2_nobh_aops;
			inode->i_fop = &ext2_file_operations;
		} else {
			inode->i_mapping->a_ops = &ext2_aops;
			inode->i_fop = &ext2_file_operations;
		}
	} else if (S_ISDIR(inode->i_mode)) {
		inode->i_op = &ext2_dir_inode_operations;
		inode->i_fop = &ext2_dir_operations;
		if (test_opt(inode->i_sb, NOBH))
			inode->i_mapping->a_ops = &ext2_nobh_aops;
		else
			inode->i_mapping->a_ops = &ext2_aops;
	} else if (S_ISLNK(inode->i_mode)) {
		if (ext2_inode_is_fast_symlink(inode))
			inode->i_op = &ext2_fast_symlink_inode_operations;
		else {
			inode->i_op = &ext2_symlink_inode_operations;
			if (test_opt(inode->i_sb, NOBH))
				inode->i_mapping->a_ops = &ext2_nobh_aops;
			else
				inode->i_mapping->a_ops = &ext2_aops;
		}
	} else {
		inode->i_op = &ext2_special_inode_operations;
		if (raw_inode->i_block[0])
			init_special_inode(inode, inode->i_mode,
			   old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
		else 
			init_special_inode(inode, inode->i_mode,
			   new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
	}
	brelse (bh);
	ext2_set_inode_flags(inode);
	return;
	
bad_inode:
	make_bad_inode(inode);
	return;
}

接下来看主要的逻辑函数ext2_get_inode,这个函数定义在fs/ext2/inode.c文件,定义如下
static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino,
					struct buffer_head **p)
{
	struct buffer_head * bh;
	unsigned long block_group;
	unsigned long block;
	unsigned long offset;
	struct ext2_group_desc * gdp;


	*p = NULL;
	/*检验参数是不是合法*/
	if ((ino != EXT2_ROOT_INO && ino < EXT2_FIRST_INO(sb)) ||
	    ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count))
		goto Einval;
	/*把inode的编号除以每一个块组的块的数目就得到想要的块在第几个块组*/
	block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
	/*由块组号码得到对应的块组描述符*/
	gdp = ext2_get_group_desc(sb, block_group, &bh);
	if (!gdp)
		goto Egdp;
	/*块号减一取余每一个块组的块的数目再乘上一个inode的大小,就得到了在对应块内的偏移*/
	offset = ((ino - 1) % EXT2_INODES_PER_GROUP(sb)) * EXT2_INODE_SIZE(sb);
	/*从块组描述符得到bg_inode_table字段,也就是这个块组的第一个inode表所在的块号,然后再加上偏移,就得到了块号对应的inode结构体*/
	block = le32_to_cpu(gdp->bg_inode_table) +
		(offset >> EXT2_BLOCK_SIZE_BITS(sb));
	/*算出是对应的块的第几个字节,调用sb_bread函数读取到bufferhead里*/
	if (!(bh = sb_bread(sb, block)))
		goto Eio;


	*p = bh;
	/*得到了对应的块,然后offset偏移后,使用(struct ext2_inode *)指针转化过后就得到了*/
	offset &= (EXT2_BLOCK_SIZE(sb) - 1);
	return (struct ext2_inode *) (bh->b_data + offset);


Einval:
	ext2_error(sb, "ext2_get_inode", "bad inode number: %lu",
		   (unsigned long) ino);
	return ERR_PTR(-EINVAL);
Eio:
	ext2_error(sb, "ext2_get_inode",
		   "unable to read inode block - inode=%lu, block=%lu",
		   (unsigned long) ino, block);
Egdp:
	return ERR_PTR(-EIO);
}


这个函数里边比较重要的是ext2_get_group_desc函数,这个函数定义在fs/ext2/balloc.c,定义如下
struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
					     unsigned int block_group,
					     struct buffer_head ** bh)
{
	/*传入的block_group是inode在第几个块组*/
	unsigned long group_desc;
	unsigned long offset;
	struct ext2_group_desc * desc;
	struct ext2_sb_info *sbi = EXT2_SB(sb);
	/*检验参数是否合法*/
	if (block_group >= sbi->s_groups_count) {
		ext2_error (sb, "ext2_get_group_desc",
			    "block_group >= groups_count - "
			    "block_group = %d, groups_count = %lu",
			    block_group, sbi->s_groups_count);


		return NULL;
	}
	/*首先得到块组究竟是在第几个块上,块组描述符由连续的几个块组成,EXT2_DESC_PER_BLOCK_BITS(sb)返回一个块含有的块组描述符个数对应的含有二进制位数。比如一个块1024字节,一个块描述符由32字节,所以一个块有32个块描述符,所以就是返回5,因为32编程二进制有5个1,就等于块描述符除以32,得到在第几个块*/
	group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
	/*找到块组描述符在块内的偏移*/
	offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
	/*都算好以后就直接从sbi的superblock结构体里边找*/
	if (!sbi->s_group_desc[group_desc]) {
		ext2_error (sb, "ext2_get_group_desc",
			    "Group descriptor not loaded - "
			    "block_group = %d, group_desc = %lu, desc = %lu",
			     block_group, group_desc, offset);
		return NULL;
	}


	desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data;
	if (bh)
		*bh = sbi->s_group_desc[group_desc];
	return desc + offset;
}

 类似资料: