弄个软链接,方便找文件
[root@archlinux ~]# ln -s /usr/etc/corosync/ /etc/corosync/
ln: failed to create symbolic link '/etc/corosync/': No such file or directory
[root@archlinux ~]# ln -s /usr/etc/corosync/ /etc/corosync
[root@archlinux ~]#
看出配置文件的帮助也正常 man corosync.conf
啥都没有,没有crm,没有服务,没有启动
[root@archlinux corosync]# crm status
bash: crm: command not found
[root@archlinux corosync]# service corosync status
bash: service: command not found
[root@archlinux corosync]# systemctl status corosync
Unit corosync.service could not be found.
[root@archlinux corosync]# ps -ef|grep corosync
root 33903 33853 0 11:57 pts/0 00:00:00 grep corosync
[root@archlinux corosync]# man corosync
参考Ubuntu的,实际没有写服务
root@ubuntu18a:/var/lib/heartbeat/lrm# service corosync status
● corosync.service - Corosync Cluster Engine
Loaded: loaded (/lib/systemd/system/corosync.service; enabled; vendor preset: enabled)
Active: active (running) since Fri 2021-08-20 09:49:18 CST; 2h 9min ago
Docs: man:corosync
man:corosync.conf
man:corosync_overview
Main PID: 1237 (corosync)
Tasks: 2 (limit: 4656)
CGroup: /system.slice/corosync.service
└─1237 /usr/sbin/corosync -f
Aug 20 09:49:18 ubuntu18a corosync[1237]: [QB ] server name: cmap
Aug 20 09:49:18 ubuntu18a corosync[1237]: [QB ] server name: cfg
Aug 20 09:49:18 ubuntu18a corosync[1237]: [QB ] server name: cpg
Aug 20 09:49:18 ubuntu18a corosync[1237]: [QB ] server name: votequorum
Aug 20 09:49:18 ubuntu18a corosync[1237]: [QB ] server name: quorum
Aug 20 09:49:18 ubuntu18a corosync[1237]: [TOTEM ] A new membership (192.168.56.101:44) was formed. Members joined: 1234
Aug 20 09:49:18 ubuntu18a systemd[1]: Started Corosync Cluster Engine.
Aug 20 10:55:07 ubuntu18a corosync[1237]: [TOTEM ] A processor failed, forming new configuration.
Aug 20 10:55:07 ubuntu18a corosync[1237]: [TOTEM ] A new membership (192.168.56.101:48) was formed. Members
Aug 20 11:20:37 ubuntu18a corosync[1237]: [TOTEM ] A new membership (192.168.56.101:52) was formed. Members joined: 1235
root@ubuntu18a:/var/lib/heartbeat/lrm# ll /lib/systemd/system/corosync.service
-rw-r--r-- 1 root root 857 Mar 11 02:00 /lib/systemd/system/corosync.service
root@ubuntu18a:/var/lib/heartbeat/lrm# cat /lib/systemd/system/corosync.service
[Unit]
Description=Corosync Cluster Engine
Documentation=man:corosync man:corosync.conf man:corosync_overview
ConditionKernelCommandLine=!nocluster
Requires=network-online.target
After=network-online.target
[Service]
EnvironmentFile=-/etc/default/corosync
ExecStart=/usr/sbin/corosync -f $COROSYNC_OPTIONS
Type=notify
# The following config is for corosync with enabled watchdog service.
#
# When corosync watchdog service is being enabled and using with
# pacemaker.service, and if you want to exert the watchdog when a
# corosync process is terminated abnormally,
# uncomment the line of the following Restart= and RestartSec=.
#Restart=on-failure
# Specify a period longer than soft_margin as RestartSec.
#RestartSec=70
# rewrite according to environment.
#ExecStartPre=/sbin/modprobe softdog soft_margin=60
[Install]
WantedBy=multi-user.target
root@ubuntu18a:/var/lib/heartbeat/lrm#
验证配置文件
[root@archlinux corosync]# corosync -t
Can't read file /usr/etc/corosync/corosync.conf: No such file or directory
[root@archlinux etc]# cd /etc/corosync
[root@archlinux corosync]# ll
bash: ll: command not found
[root@archlinux corosync]# ls
corosync.conf.example service.d uidgid.d
[root@archlinux corosync]# ls -l *
-rw-r--r-- 1 root root 1917 Aug 20 11:48 corosync.conf.example
service.d:
total 0
uidgid.d:
total 0
[root@archlinux corosync]# cp -p corosync.conf.example corosync.conf
[root@archlinux corosync]# vi corosync.conf
啥都没有修改
[root@archlinux corosync]# corosync -t
parse error in config: Can't open logfile '/var/log/cluster/corosync.log' for reason: No such file or directory (2).
[root@archlinux corosync]# mkdir /var/log/cluster/
[root@archlinux corosync]# corosync -t
Aug 20 12:07:04.706 error [MAIN ] parse error in config: No interfaces defined
Aug 20 12:07:04.706 error [MAIN ] Corosync Cluster Engine exiting with status 8 at main.c:1445.
[root@archlinux corosync]#
修改一下IP地址,测试通过
在nodelist {
node {
下修改 ring0_addr
[root@archlinux corosync]# vi corosync.conf
[root@archlinux corosync]# corosync -t
Aug 20 12:09:10.319 notice [MAIN ] Corosync Cluster Engine exiting normally
[root@archlinux corosync]# diff corosync.conf.example corosync.conf
56c56
< #ring0_addr: 192.168.0.1
---
> ring0_addr: 192.168.56.103
[root@archlinux corosync]#
启动不了
[root@archlinux corosync]# ps -ef|grep coro
root 33944 33853 0 12:10 pts/0 00:00:00 grep coro
[root@archlinux corosync]# corosync
Aug 20 12:10:49.029 notice [MAIN ] Corosync Cluster Engine 3.1.5 starting up
Aug 20 12:10:49.029 info [MAIN ] Corosync built-in features: pie relro bindnow
[root@archlinux corosync]# ps -ef|grep coro
root 33949 33853 0 12:10 pts/0 00:00:00 grep coro
[root@archlinux corosync]# corosync -f
Aug 20 12:11:03.386 notice [MAIN ] Corosync Cluster Engine 3.1.5 starting up
Aug 20 12:11:03.386 info [MAIN ] Corosync built-in features: pie relro bindnow
Aug 20 12:11:03.389 error [MAIN ] Corosync Executive couldn't create lock file.
Aug 20 12:11:03.389 error [MAIN ] Corosync Cluster Engine exiting with status 17 at main.c:1590.
[root@archlinux corosync]# man corosync
[root@archlinux corosync]# ps -ef|grep coro
root 33967 33853 0 12:12 pts/0 00:00:00 grep coro
[root@archlinux corosync]#
[root@archlinux corosync]#
版本号:自编译的3.1.5,ubuntu的2.4.3,配置文件差异很大。
[root@archlinux corosync]# corosync -v
Corosync Cluster Engine, version '3.1.5'
Copyright (c) 2006-2021 Red Hat, Inc.
Built-in features: pie relro bindnow
Available crypto models: nss openssl gcrypt
Available compression models: zlib lz4 lz4hc lzo2 lzma bzip2 zstd
[root@archlinux corosync]#
root@ubuntu18a:~# apt list corosync
Listing... Done
corosync/bionic-updates,now 2.4.3-0ubuntu1.2 amd64 [installed,automatic]
N: There are 2 additional versions. Please use the '-a' switch to see them.
root@ubuntu18a:~# apt list corosync -a
Listing... Done
corosync/bionic-updates,now 2.4.3-0ubuntu1.2 amd64 [installed,automatic]
corosync/bionic-security 2.4.3-0ubuntu1.1 amd64
corosync/bionic 2.4.3-0ubuntu1 amd64
根据帮助,UDPU模式下,不用配置interface,在nodelist下配置就可以了。
For UDPU an interface section is not needed and it is recommended that the nodelist is used to define cluster nodes.
开启debug,查看日志
debug: on
logfile: /var/log/cluster/corosync.log
[root@archlinux exec]# corosync
Aug 20 14:29:43.069 notice [MAIN ] Corosync Cluster Engine 3.1.5 starting up
Aug 20 14:29:43.073 info [MAIN ] Corosync built-in features: pie relro bindnow
Aug 20 14:29:43.073 debug [TOTEM ] totemip_parse: IPv4 address of 192.168.56.103 resolved as 192.168.56.103
Aug 20 14:29:43.073 debug [TOTEM ] totemip_parse: IPv4 address of 192.168.56.103 resolved as 192.168.56.103
Aug 20 14:29:43.073 debug [TOTEM ] totemip_parse: IPv4 address of 192.168.56.101 resolved as 192.168.56.101
Aug 20 14:29:43.073 debug [TOTEM ] Configuring link 0 params
Aug 20 14:29:43.073 debug [TOTEM ] totemip_parse: IPv4 address of 192.168.56.103 resolved as 192.168.56.103
[root@archlinux exec]#
debug日志可以看到已经探测到另外2台服务器了,但是对启动失败没有更多信息。
[root@archlinux exec]# cat /var/log/cluster/corosync.log
Aug 20 12:07:04.706 [33933] archlinux corosync error [MAIN ] parse error in config: No interfaces defined
Aug 20 12:07:04.706 [33933] archlinux corosync error [MAIN ] Corosync Cluster Engine exiting with status 8 at main.c:1445.
Aug 20 12:09:10.319 [33941] archlinux corosync notice [MAIN ] Corosync Cluster Engine exiting normally
Aug 20 12:10:49.029 [33945] archlinux corosync notice [MAIN ] Corosync Cluster Engine 3.1.5 starting up
Aug 20 12:10:49.029 [33945] archlinux corosync info [MAIN ] Corosync built-in features: pie relro bindnow
Aug 20 12:10:49.043 [33945] archlinux corosync error [MAIN ] Corosync Executive couldn't create lock file.
Aug 20 12:10:49.043 [33945] archlinux corosync error [MAIN ] Corosync Cluster Engine exiting with status 17 at main.c:1590.
Aug 20 12:11:03.386 [33950] archlinux corosync notice [MAIN ] Corosync Cluster Engine 3.1.5 starting up
Aug 20 12:11:03.386 [33950] archlinux corosync info [MAIN ] Corosync built-in features: pie relro bindnow
Aug 20 12:11:03.389 [33950] archlinux corosync error [MAIN ] Corosync Executive couldn't create lock file.
Aug 20 12:11:03.389 [33950] archlinux corosync error [MAIN ] Corosync Cluster Engine exiting with status 17 at main.c:1590.
Aug 20 14:23:33.726 [34042] archlinux corosync notice [MAIN ] Corosync Cluster Engine 3.1.5 starting up
Aug 20 14:23:33.729 [34042] archlinux corosync info [MAIN ] Corosync built-in features: pie relro bindnow
Aug 20 14:23:33.739 [34042] archlinux corosync error [MAIN ] Corosync Executive couldn't create lock file.
Aug 20 14:23:33.739 [34042] archlinux corosync error [MAIN ] Corosync Cluster Engine exiting with status 17 at main.c:1590.
Aug 20 14:29:43.069 [34058] archlinux corosync notice [MAIN ] Corosync Cluster Engine 3.1.5 starting up
Aug 20 14:29:43.073 [34058] archlinux corosync info [MAIN ] Corosync built-in features: pie relro bindnow
Aug 20 14:29:43.073 [34058] archlinux corosync debug [TOTEM ] totemip_parse: IPv4 address of 192.168.56.103 resolved as 192.168.56.103
Aug 20 14:29:43.073 [34058] archlinux corosync debug [TOTEM ] totemip_parse: IPv4 address of 192.168.56.103 resolved as 192.168.56.103
Aug 20 14:29:43.073 [34058] archlinux corosync debug [TOTEM ] totemip_parse: IPv4 address of 192.168.56.101 resolved as 192.168.56.101
Aug 20 14:29:43.073 [34058] archlinux corosync debug [TOTEM ] Configuring link 0 params
Aug 20 14:29:43.073 [34058] archlinux corosync debug [TOTEM ] totemip_parse: IPv4 address of 192.168.56.103 resolved as 192.168.56.103
Aug 20 14:29:43.076 [34058] archlinux corosync debug [QB ] shm size:8388621; real_size:8392704; rb->word_size:2098176
Aug 20 14:29:43.076 [34058] archlinux corosync debug [MAIN ] Corosync TTY detached
Aug 20 14:29:43.079 [34058] archlinux corosync error [MAIN ] Corosync Executive couldn't create lock file.
Aug 20 14:29:43.079 [34058] archlinux corosync error [MAIN ] Corosync Cluster Engine exiting with status 17 at main.c:1590.
[root@archlinux exec]#
我现在可以分析源代码啦!
在exec目录下的main.c可以看到报错信息:Corosync Executive couldn’t create lock file.
前面是在corosync_flock函数中,调用了open函数失败
1102 static enum e_corosync_done corosync_flock (const char *lockfile, pid_t pid)
1103 {
1104 struct flock lock;
1105 enum e_corosync_done err;
1106 char pid_s[17];
1107 int fd_flag;
1108
1109 err = COROSYNC_DONE_EXIT;
1110
1111 lockfile_fd = open (lockfile, O_WRONLY | O_CREAT, 0640);
1112 if (lockfile_fd == -1) {
1113 log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file.");
1114 return (COROSYNC_DONE_ACQUIRE_LOCK);
1115 }
搜索_flock
1589 if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != COROSYNC_DONE_EXIT) {
1590 corosync_exit_error (flock_err);
搜索corosync_lock_file,继续找pid文件路径
166 static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid";
搜索LOCALSTATEDIR来找具体路径
[root@archlinux corosync]# grep -r LOCALSTATEDIR * 2>/dev/null
-r在子目录中搜索 2>/dev/null不显示错误
剔除后
config.log:#define LOCALSTATEDIR "/usr/var"
最终得到 /usr/var/run/corosync.pid,创建目录
[root@archlinux corosync]# ls -l /usr/var/run/corosync.pid
ls: cannot access '/usr/var/run/corosync.pid': No such file or directory
[root@archlinux corosync]# ls -l /var/run/corosync.pid
ls: cannot access '/var/run/corosync.pid': No such file or directory
[root@archlinux corosync]# cd /usr/var/run
bash: cd: /usr/var/run: No such file or directory
[root@archlinux var]# cd
[root@archlinux ~]# mkdir /usr/var/run
[root@archlinux ~]#
验证启动正常
[root@archlinux ~]# corosync
Aug 20 17:58:40.213 notice [MAIN ] Corosync Cluster Engine 3.1.5 starting up
Aug 20 17:58:40.216 info [MAIN ] Corosync built-in features: pie relro bindnow
Aug 20 17:58:40.216 debug [TOTEM ] totemip_parse: IPv4 address of 192.168.56.103 resolved as 192.168.56.103
Aug 20 17:58:40.216 debug [TOTEM ] totemip_parse: IPv4 address of 192.168.56.103 resolved as 192.168.56.103
Aug 20 17:58:40.216 debug [TOTEM ] totemip_parse: IPv4 address of 192.168.56.101 resolved as 192.168.56.101
Aug 20 17:58:40.216 debug [TOTEM ] Configuring link 0 params
Aug 20 17:58:40.216 debug [TOTEM ] totemip_parse: IPv4 address of 192.168.56.103 resolved as 192.168.56.103
[root@archlinux ~]# ps -ef|grep corosync
root 34172 1 2 17:58 ? 00:00:00 corosync
root 34186 34146 0 17:58 pts/0 00:00:00 grep corosync
[root@archlinux ~]# corosync-cfgtool -s
Local node ID 1, transport knet
LINK ID 0 udp
addr = 192.168.56.103
status:
nodeid: 1: localhost
nodeid: 1234: disconnected
[root@archlinux ~]#