最近比较忙,做毕设(sheepdog),要在sheepdog的基础上做点东西,也就是说要改sheepdog源码。我只有一个月时间,所以最近一直都在读sheepdog源码。C 语言太菜,里面好多用法都不会,基本都是现边看边查。。。
sheepdog 可以干什么,首先,你需要启动sheep,启动的命令可以用下面这个。当然可以用 test 目录下的测试脚本来启动一个 虚拟的 sheepdog 集群。何谓虚拟呢?就是这个脚本可以在你的主机上启动5个sheep进程,每个sheep进程占用不同的端口,这样这5个sheep进程就组成了一个sheepdog集群了。这样,你也就拥有一个sheepdog集群了。之后,你就可以用dog命令来看一些东西了,比如: dog node list, dog vdi list. 之类的,具体是什么意思就需要你去 sheepdog github 去读点文档啦。。。和系统相关的先说这些吧,这只是个开始,但并不是我重点。再罗嗦一点,这样默认启动后,sheepdog 的 日志 目录是在 /tmp/sheepdog/ 目录下。这个目录下面有这样几个子目录。首先可能是 0/ 1/ 2/ 3/ ... 以数字命名的目录,这表示这是 node ID 的log 目录。因为你是再 主机上虚拟出的几个 sheep,每一个sheep 都要有一个 log 目录,这是自然的啦。。。进入任意一个目录,就拿0/作比方吧,进去可以看到有这样几个文件,obj/ sheep.log epoch sock config 等文件。其中 obj 就是用来存放 数据块的 目录哦,如果你给你创建的vdi 写入了比较多的东西,那么这个目录下应该有很多文件,并且每一个 都是 4M。哈哈,不信你可以试一下。当然,要读系统debug 日志的话,打开 sheep.log 读就可以了。
sheep /tmp/sheepdog/4 -z 4 -p 7004 -c local -n -y 127.0.0.1 -d
# 当然,这是我从 test 目录下的脚本里偷出来的哈。。。
dog 是 sheepdog 中很重要的一个组成部分,它提供了一系列的系统命令。要分析整个系统的功能,我们当然可以从这里入手,顺藤摸瓜。。。好吧。开始吧。首先我们找到这个文件,会是谁呢,当然是 dog.c 啦,还能有谁啊,打开它啊,从哪里对呢,当然是从 main开始啊。开始看吧。
下面是 dog.c 的main函数部分。当然,有些内容我省略掉了。首先是这个 init_commands(&commands) 这个函数,我们需要注意下,我们跟进去,看看这个函数具体在干什么。
int main(int argc, char **argv)
{
int ch, longindex, ret;
unsigned long flags;
struct option *long_options;
const struct command *commands;
const char *short_options;
char *p;
const struct sd_option *sd_opts;
uint8_t sdhost[16];
int sdport;
install_crash_handler(crash_handler);
init_commands(&commands);
if (argc < 2)
usage(commands, 0);
flags = setup_commands(commands, argv[1], argv[2]);
optind = 3;
sd_opts = build_sd_options(command_opts);
long_options = build_long_options(sd_opts);
short_options = build_short_options(sd_opts);
mytest_func();
while ((ch = getopt_long(argc, argv, short_options, long_options,
&longindex)) >= 0) {
......
}
if (!is_stdout_console() || raw_output)
highlight = false;
if (flags & CMD_NEED_NODELIST) {
ret = update_node_list(SD_MAX_NODES);
if (ret < 0) {
sd_err("Failed to get node list");
exit(EXIT_SYSFAIL);
}
}
if (flags & CMD_NEED_ARG && argc == optind)
subcommand_usage(argv[1], argv[2], EXIT_USAGE);
if (init_event(EPOLL_SIZE) < 0)
exit(EXIT_SYSFAIL);
if (init_work_queue(get_nr_nodes) != 0) {
sd_err("Failed to init work queue");
exit(EXIT_SYSFAIL);
}
if (sockfd_init()) {
sd_err("sockfd_init() failed");
exit(EXIT_SYSFAIL);
}
ret = command_fn(argc, argv);
if (ret == EXIT_USAGE)
subcommand_usage(argv[1], argv[2], EXIT_USAGE);
return ret;
}
init_commands(const struct command **commands) 命令初始化函数。
static void init_commands(const struct command **commands)
{
// This is static and be assignment and returned as a pointer.
static struct command *cmds;
struct command command_list[] = {
vdi_command, // in vdi.c
node_command, // in node.c
cluster_command, // in cluster.c
trace_command, // in dog.h
{NULL,}
};
if (!cmds) {
cmds = (struct command *)xmalloc(sizeof(command_list));
memcpy(cmds, command_list, sizeof(command_list));
}
*commands = cmds;
return;
}
vdi_command,
struct command vdi_command = {
"vdi",
vdi_cmd,
vdi_parser
};
vdi_cmd. 好啦,到这里就差不多了,这里你应该有点感觉了,你再命令行中敲出来的命令,都是存放再这个地方的,对你的每一个命令的响应,也是再这里做出的。比如 dog vdi create.就是第二个 vdi_cmd[1] 所对应的内容. vdi_create 是一个指向函数的指针。我们可以去看看这个函数的具体内容。
static struct subcommand vdi_cmd[] = {
{"check", "<vdiname>", "saph", "check and repair image's consistency",
NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
vdi_check, vdi_options},
{"create", "<vdiname> <size>", "Pycaphrv", "create an image",
NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
vdi_create, vdi_options},
{"snapshot", "<vdiname>", "saphrv", "create a snapshot",
NULL, CMD_NEED_ARG,
vdi_snapshot, vdi_options},
{"clone", "<src vdi> <dst vdi>", "sPcaphrv", "clone an image",
NULL, CMD_NEED_ARG,
vdi_clone, vdi_options},
{"delete", "<vdiname>", "saph", "delete an image",
NULL, CMD_NEED_ARG,
vdi_delete, vdi_options},
{"rollback", "<vdiname>", "saphfrv", "rollback to a snapshot",
NULL, CMD_NEED_ARG,
vdi_rollback, vdi_options},
{"list", "[vdiname]", "aprh", "list images",
NULL, 0, vdi_list, vdi_options},
{"tree", NULL, "aph", "show images in tree view format",
NULL, 0, vdi_tree, vdi_options},
{"graph", NULL, "aph", "show images in Graphviz dot format",
NULL, 0, vdi_graph, vdi_options},
{"object", "<vdiname>", "isaph", "show object information in the image",
NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
vdi_object, vdi_options},
{"track", "<vdiname>", "isaph", "show the object epoch trace in the image",
NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
vdi_track, vdi_options},
{"setattr", "<vdiname> <key> [value]", "dxaph", "set a VDI attribute",
NULL, CMD_NEED_ARG,
vdi_setattr, vdi_options},
{"getattr", "<vdiname> <key>", "aph", "get a VDI attribute",
NULL, CMD_NEED_ARG,
vdi_getattr, vdi_options},
{"resize", "<vdiname> <new size>", "aph", "resize an image",
NULL, CMD_NEED_ARG,
vdi_resize, vdi_options},
{"read", "<vdiname> [<offset> [<len>]]", "saph", "read data from an image",
NULL, CMD_NEED_ARG,
vdi_read, vdi_options},
{"write", "<vdiname> [<offset> [<len>]]", "apwh", "write data to an image",
NULL, CMD_NEED_ARG,
vdi_write, vdi_options},
{"backup", "<vdiname> <backup>", "sFaph", "create an incremental backup between two snapshots",
NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
vdi_backup, vdi_options},
{"restore", "<vdiname> <backup>", "saph", "restore snapshot images from a backup",
NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
vdi_restore, vdi_options},
{"cache", "<vdiname>", "saph", "Run 'dog vdi cache' for more information",
vdi_cache_cmd, CMD_NEED_ARG,
vdi_cache, vdi_options},
{NULL,},
};
vdi_create 额,不好意思,我只是随便选了一个,没想到这个函数这么长,但是创建一个 vdi 的过程就是这样的。当然里面又引出了很多新的东西,这就是需要我们去认真分析的东西。相信已经看到了,里面最重要的过程应该是那个 ret = do_vdi_create() 过程。那是下一个应该考虑的过程。
static int vdi_create(int argc, char **argv)
{
const char *vdiname = argv[optind++];
uint64_t size;
uint32_t vid;
uint64_t oid;
uint32_t idx, max_idx, ret, nr_copies = vdi_cmd_data.nr_copies;
struct sd_inode *inode = NULL;
if (!argv[optind]) {
sd_err("Please specify the VDI size");
return EXIT_USAGE;
}
ret = option_parse_size(argv[optind], &size);
if (ret < 0)
return EXIT_USAGE;
if (size > SD_OLD_MAX_VDI_SIZE && 0 == vdi_cmd_data.store_policy) {
sd_err("VDI size is larger than %s bytes, please use '-y' to "
"create a hyper volume with size up to %s bytes",
strnumber(SD_OLD_MAX_VDI_SIZE),
strnumber(SD_MAX_VDI_SIZE));
return EXIT_USAGE;
}
if (size > SD_MAX_VDI_SIZE) {
sd_err("VDI size is too large");
return EXIT_USAGE;
}
if (nr_copies > sd_nodes_nr) {
sd_err("There are not enough nodes(%d) to hold the copies(%d)",
sd_nodes_nr, nr_copies);
return EXIT_USAGE;
}
ret = do_vdi_create(vdiname, size, 0, &vid, false,
vdi_cmd_data.nr_copies, vdi_cmd_data.copy_policy,
vdi_cmd_data.store_policy);
if (ret != EXIT_SUCCESS || !vdi_cmd_data.prealloc)
goto out;
inode = xmalloc(sizeof(*inode));
ret = dog_read_object(vid_to_vdi_oid(vid), inode, sizeof(*inode), 0,
true);
if (ret != SD_RES_SUCCESS) {
sd_err("Failed to read a newly created VDI object");
ret = EXIT_FAILURE;
goto out;
}
max_idx = DIV_ROUND_UP(size, SD_DATA_OBJ_SIZE);
for (idx = 0; idx < max_idx; idx++) {
vdi_show_progress(idx * SD_DATA_OBJ_SIZE, inode->vdi_size);
oid = vid_to_data_oid(vid, idx);
ret = dog_write_object(oid, 0, NULL, 0, 0, 0, inode->nr_copies,
inode->copy_policy, true, true);
if (ret != SD_RES_SUCCESS) {
ret = EXIT_FAILURE;
goto out;
}
INODE_SET_VID(inode, idx, vid);
ret = sd_inode_write_vid(dog_bnode_writer, inode, idx, vid, vid,
0, false, true);
if (ret) {
ret = EXIT_FAILURE;
goto out;
}
}
vdi_show_progress(idx * SD_DATA_OBJ_SIZE, inode->vdi_size);
ret = EXIT_SUCCESS;
if (verbose) {
if (raw_output)
printf("%x\n", vid);
else
printf("VDI ID of newly created VDI: %x\n", vid);
}
out:
free(inode);
return ret;
}
上面只是讲了一下 sheepdog 中 dog 的一小部分,并且没有深究,当然,我们是需要深究这部分的,看每一个功能从上层到下层的具体实现,这都是很有必要的。今天这些只是讲了最外层的部分,从交互入手,我想这也是认识一个系统的一个比较自然的过程吧。由于时间关系,先写这些,欢迎讨论,待续。。。