环境
Primary node
—Hostname:drbd1 —Mysql IP:172.16.0.79
-Mysql DBPATH:/data/mysql
Slave node
—Hostname:drbd2 —Mysql IP:172.16.0.80
—Mysql DBPATH:/data/mysql
Public
—Mysql VIP:172.16.0.78
—centos:CentOS 7.5
—Kernel:3.10.0-862.2.3.el7.x86_64
-drbd:drbd84-utils-9.3.1-1.el7.elrepo.x86_64
—Kmod-drbd:kmod-drbd84-8.4.11-1.el7_5.elrepo.x86_64
-Corosync:corosync-2.4.3-2.el7_5.1.x86_64
corosynclib-2.4.3-2.el7_5.1.x86_64
—Pacemaker:pacemaker-cli-1.1.18-11.el7_5.3.x86_64
pacemaker-libs-1.1.18-11.el7_5.3.x86_64
pacemaker-cluster-libs-1.1.18-11.el7_5.3.x86_64
pacemaker-1.1.18-11.el7_5.3.x86_64
pcs-0.9.162-5.el7.centos.2.x86_64
crmsh-scripts-3.0.0-6.2.noarch
crmsh-3.0.0-6.2.noarch
python-parallax-1.0.0a1-7.1.noarch
安装
网络及服务器基础配置
(一) 设置主机名及文件解析
[root@drbd1 ~]# vim /etc/hostname
drbd1
[root@drbd1 ~]#vim /etc/hosts
172.16.0.79 drbd1
172.16.0.80 drbd2
[root@drbd1 ~]#scp /etc/hosts 172.16.0.80:/etc/
[root@drbd2 ~]#vim /etc/hostname
drbd2
(二) SSH无密码互通
[root@drbd1 ~]#ssh-keygen
[root@drbd1 ~]#ssh-copy-id drbd2
[root@drbd2 ~]#ssh-keygen
[root@drbd2 ~]#ssh-copy-id drbd1
(三) 时间同步
[root@drbd1 ~]#crontab –e
* * * * * ntpdate -s ntp1.aliyun.com
[root@drbd2 ~]#crontab –e
* * * * * ntpdate -s ntp1.aliyun.com
软件安装与配置
(一) DBRD安装与配置
1、 安装
[root@drbd1 ~]#yum update
[root@drbd1 ~]#reboot
[root@drbd1 ~]#rpm -ivh http://www.elrepo.org/elrepo-release-7.0-2.el7.elrepo.noarch.rpm
[root@drbd1 ~]#yum install kmod-drbd84 drbd84-utils
[root@drbd1 ~]#modprobe drbd
[root@drbd1 ~]#lsmod|grep drbd
drbd2同上
2、 配置
[root@drbd1 ~]#vim /etc/drbd.d/global_common.conf
global {
usage-count yes;
}
common {
handlers {
split-brain "/usr/lib/drbd/notify-split-brain.sh root";
}
startup {
}
options {
}
disk {
resync-rate 30M;
on-io-error detach;
disk-flushes no;
md-flushes no;
}
net {
protocol C;
verify-alg md5;
csums-alg md5;
data-integrity-alg md5;
after-sb-0pri discard-zero-changes;
after-sb-1pri discard-secondary;
after-sb-2pri disconnect;
}
}
[root@drbd1 ~]#vim /etc/drbd.d/mysql.res
resource mysql {
on drbd1 {
device /dev/drbd0;
disk /dev/mapper/vg--data-lv--data;
address 172.16.0.79:7898;
meta-disk internal;
}
on drbd2 {
device /dev/drbd0;
disk /dev/mapper/vg--data-lv--data;
address 172.16.0.80:7898;
meta-disk internal;
}
}
[root@drbd1 ~]#cd /etc/drbd.d
[root@drbd1 ~]#scp global_common.conf drbd2:/etc/drbd.d/
[root@drbd1 ~]#scp mysql.res drbd2:/etc/drbd.d/
(二) Mysql数据同步初始化
1、 创建底层disk设备(基于lvm的逻辑卷)
[root@drbd1 ~]#mkdir /data
[root@drbd1 ~]#fdisk /dev/sdb
[root@drbd1 ~]#pvcreate /dev/sdb1
[root@drbd1 ~]#vgcreate vg-data /dev/sdb1
[root@drbd1 ~]#lvcreate -n lv-data -L 9G vg-data
[root@drbd1 ~]#mkfs -t ext4 /dev/vg-data/lv-data
[root@drbd1 ~]#dd if=/dev/zero of=/dev/mapper/vg--data-lv--data bs=1M count=128
drbd2同上
2、 初始化drbd meta data
[root@drbd1 ~]#drbdadm create-md mysql
[root@drbd2 ~]#drbdadm create-md mysql
3、 启动drbd资源
[root@drbd1 ~]#drbdadm up mysql
[root@drbd1 ~]#ln -s /usr/lib/drbd/drbd /bin/
[root@drbd1 ~]#drbd status
drbd2同上
4、 手动至primary node为drbd 主节点
[root@drbd1 ~]#drbdadm primary mysql --force
[root@drbd1 ~]#drbdadm role mysql
Primary/ Secondary
[root@drbd2 ~]#drbdadm role mysql
Secondary/Primary
[root@drbd1 ~]#drbdadm dstate mysql
UpToDate/UpToDate
5、 安装mysql,迁移数据库到drbd分区
[root@drbd1 ~]#mkfs.ext4 /dev/drbd0
[root@drbd1 ~]#mount /dev/drbd0 /data
[root@drbd1 ~]#systemctl stop mysqld
[root@drbd1 ~]#cp -a /var/lib/mysql /data/
[root@drbd1 ~]#vim /etc/my.cnf
datadir = /data/mysql
[root@drbd1 ~]#drbdadm dstate mysql
UpToDate/UpToDate
[root@drbd1 ~]#systemctl start mysqld
[root@drbd1 ~]#systemctl stop mysqld
[root@drbd1 ~]#umount /data
[root@drbd1 ~]#drbdadm secondary mysql
[root@drbd2 ~]#drbdadm primary mysql
[root@drbd2 ~]#mount /dev/drbd0 /data
[root@drbd2 ~]#systemctl start mysqld
(三) Corosync与Pacemaker安装与配置
1、 安装
[root@drbd1 ~]#yum install pcs pacemaker corosync fence-agents-all -y
[root@drbd2 ~]#yum install pcs pacemaker corosync fence-agents-all -y
2、 配置corosync
[root@drbd1 ~]#cd /etc/corosync
[root@drbd1 ~]#cp corosync.conf.example corosync.conf
[root@drbd1 ~]#vim corosync.conf
bindnetaddr: 172.16.0.0
[root@drbd1 ~]#mv /dev/{random,random.bak}
[root@drbd1 ~]#ln -s /dev/urandom /dev/random
[root@drbd1 ~]#corosync-keygen
[root@drbd1 ~]#scp corosync.conf authkey drbd2:/etc/corosync/
[root@drbd1 ~]#systemctl start corosync
[root@drbd2 ~]#systemctl start corosync
[root@drbd1 ~]#systemctl start pacemaker
[root@drbd2 ~]#systemctl start pacemaker
[root@drbd1 ~]#systemctl start pcsd
[root@drbd2 ~]#systemctl start pcsd
3 配置pcs
[root@drbd1 ~]#passwd hacluster
[root@drbd1 ~]#pcs cluster auth drbd1 drbd2
[root@drbd1 ~]#pcs cluster setup --start --name mycluster drbd1 drbd2 --force
[root@drbd2 ~]#passwd hacluster
[root@drbd2 ~]#pcs cluster auth drbd1 drbd2
[root@drbd1 ~]#systemctl enable pcsd
[root@drbd1 ~]#systemctl enable corosync
[root@drbd1 ~]#systemctl enable pacemaker
[root@drbd1 ~]#reboot
[root@drbd2 ~]#systemctl enable pcsd
[root@drbd2 ~]#systemctl enable corosync
[root@drbd2 ~]#systemctl enable pacemaker
[root@drbd2 ~]#reboot
[root@drbd1 ~]#crm_verify -L -V ##crm_verify命令用来验证当前的集群配置是否有错误
error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined
error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option
error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity
Errors found during check: config not valid
如果报此错误
[root@drbd1 ~]#pcs property set stonith-enabled=false
[root@drbd1 ~]#pcs status
[root@drbd1 ~]#crm_mon -1
Stack: classic openais (with plugin)
Current DC: drbd1 - partition with quorum
Version: 1.1.11-97629de
2 Nodes configured, 2 expected votes
0 Resources configured
Online: [ drbd1 drbd2 ]
(四) Crmsh安装及配置集群资源
1、 安装crmsh
[root@drbd1 ~]#wget ftp://ftp.pbone.net/mirror/ftp5.gwdg.de/pub/opensuse/repositories/home:/KGronlund/CentOS_7/noarch/python-parallax-1.0.0a1-7.1.noarch.rpm
[root@drbd1 ~]#rpm -ivh python-parallax-1.0.0a1-7.1.noarch.rpm
[root@drbd1 ~]#vim /etc/yum.repos.d/crm.repo
[network_ha-clustering_Stable]
name=Stable High Availability/Clustering packages (CentOS_CentOS-7)
type=rpm-md
baseurl=http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-7/
gpgcheck=1
gpgkey=http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-7/repodata/repomd.xml.key
enabled=1
[root@drbd1 ~]yum install crmsh -y
[root@drbd1 ~]systemctl disable mysqld
drbd2同上
2、 crm配置集群资源
设置默认策略
[root@drbd1 ~]#crm configure
crm(live)configure#property pe-warn-series-max=1000
crm(live)configure#property pe-input-series-max=1000
crm(live)configure#property pe-error-series-max=1000
crm(live)configure#property cluster-recheck-interval=5min
crm(live)configure#property no-quorum-policy=ignore
crm(live)configure#property stonith-enabled=false
crm(live)configure#property start-failure-is-fatal=false
crm(live)configure#property default-action-timeout=180s
crm(live)configure#commit
crm(live)configure#exit
[root@drbd1 ~]#crm_verify -L
设置资源粘帖度
[root@drbd1 ~]#crm configure
crm(live)configure#rsc_defaults resource-stickiness=100
crm(live)configure#commit
crm(live)configure#exit
[root@drbd1 ~]#crm_verify -L
配置drbd资源
[root@drbd1 ~]#service mysqld stop
[root@drbd1 ~]#umount /data
[root@drbd1 ~]#service drbd stop
[root@drbd1 ~]#crm configure
primitive p_drbd_mysql ocf:linbit:drbd \
params drbd_resource=mysql \
op start timeout=240s interval=0 \
op stop timeout=180s interval=0 \
op promote timeout=180s interval=0 \
op demote timeout=180s interval=0 \
op monitor interval=30s role=Slave \
op monitor interval=29s role=Master
配置FS资源
primitive p_fs_mysql ocf:heartbeat:Filesystem \
params device="/dev/drbd0" directory="/data/" fstype=ext4 \
op start timeout=60s interval=0 \
op stop timeout=180s interval=0 \
op monitor interval=60s timeout=60s
配置mysql资源
primitive p_srv_mysql mysql \
params binary="/usr/sbin/mysqld" config="/etc/my.cnf" datadir="/data/mysql" pid="/tmp/mysqld.pid" socket="/tmp/mysql.sock" \
op start tinterval="0" timeout="120" \
op stop interval="0" timeout="120" \
op monitor interval="10" timeout="30"
配置VIP资源
primitive p_vip_mysql ocf:heartbeat:IPaddr2 \
params ip=172.16.0.78 cidr_netmask=24 \
op monitor interval=30s
配置资源组
group g_mysql p_vip_mysql p_fs_mysql p_srv_mysql
配置主从关系
ms ms_drbd_mysql p_drbd_mysql \
meta master-max=1 master-node-max=1 \
clone-max=2 clone-node-max=1 \
notify=true target-role=Master
配置资源绑定约束
colocation c_mysql_on_drbd inf: g_mysql ms_drbd_mysql:Master
配置资源顺序约束
order o_drbd_before_mysql inf: ms_drbd_mysql:promote g_mysql:start
配置ping资源防止脑裂
primitive p_ping ocf:pacemaker:ping params name=ping \
multiplier=1000 host_list=172.16.0.254 \
op monitor interval=15s timeout=60s \
op start timeout=60s
clone cl_ping p_ping \
meta interleave=ture
location l_drbd_master_on_ping ms_drbd_mysql \
rule $role=Master -inf: not_defined ping or ping number:lte 0
[root@drbd1 ~]#crm_verify -L
测试
1 手动offline drbd1,测试切换是否成功
[root@drbd1 ~]#crm node standby
[root@drbd1 ~]#crm status
2 手动online drbd1,测试是否回切
[root@drbd1 ~]#crm node online
[root@drbd1 ~]#crm status
3 手动offline drbd2,测试切换是否成功
[root@drbd2 ~]#crm node standby
[root@drbd2 ~]#crm status
4 手动online drbd2,测试是否会切
[root@drbd1 ~]#crm node online
[root@drbd1 ~]#crm status
5 停止network,测试脑裂处理是否成功
[root@drbd1 ~]#service network stop
[root@drbd2 ~]#crm status
[root@drbd1 ~]service network start
[root@drbd2 ~]#service network stop
[root@drbd1 ~]#crm status
[root@drbd2 ~]service network start
注:crm配置实例
node jtdb3 \
attributes standby=off
node jtdb4 \
attributes standby=off
primitive p_drbd_mysql ocf:linbit:drbd \
params drbd_resource=mysql \
op start timeout=240s interval=0 \
op stop timeout=180s interval=0 \
op promote timeout=180s interval=0 \
op demote timeout=180s interval=0 \
op monitor interval=30s role=Slave \
op monitor interval=29s role=Master
primitive p_fs_mysql Filesystem \
params device="/dev/drbd0" directory="/data/" fstype=xfs options="noatime,nodiratime,nobarrier" \
op start timeout=60s interval=0 \
op stop timeout=60s interval=0 \
op monitor interval=60s timeout=60s
primitive p_ping ocf:pacemaker:ping \
params name=ping multiplier=1000 host_list=192.168.1.1 \
op start timeout=60s interval=0 \
op monitor interval=15s timeout=60s
primitive p_srv_mysql mysql \
params config="/etc/my.cnf" datadir="/data/mysql/" \
op start timeout=120s interval=0 \
op stop timeout=120s interval=0 \
op monitor interval=20s timeout=30s \
meta target-role=Started
primitive p_vip_mysql IPaddr2 \
params ip=192.168.1.3 cidr_netmask=24 \
op monitor interval=30s
group g_mysql p_vip_mysql p_fs_mysql p_srv_mysql
ms ms_drbd_mysql p_drbd_mysql \
meta master-max=1 master-node-max=1 clone-max=2 clone-node-max=1 notify=true
clone cl_ping p_ping \
meta interleave=ture
colocation c_mysql_on_drbd inf: g_mysql ms_drbd_mysql:Master
location l_drbd_master_on_ping ms_drbd_mysql \
rule $role=Master -inf: not_defined ping or ping number:lte 0
order o_drbd_before_mysql inf: ms_drbd_mysql:promote g_mysql:start
property cib-bootstrap-options: \
expected-quorum-votes=2 \
pe-warn-series-max=1000 \
pe-input-series-max=1000 \
pe-error-series-max=1000 \
cluster-recheck-interval=5min \
no-quorum-policy=ignore \
stonith-enabled=false \
start-failure-is-fatal=false \
default-action-timeout=180s \
dc-version=1.1.11-97629de \
cluster-infrastructure="classic openais (with plugin)"
rsc_defaults rsc-options: \
resource-stickiness=100