一、OS配置
1.1环境规划
环境:centos 7
数据库:pg11.5
ip地址:
node1 10.122.166.120
node2 10.122.166.127
node3 10.122.166.128
vip:
10.122.166.123
10.122.166.133
1.2系统配置
关闭selinux ‐‐‐‐> /etc/selinux/config
关闭防火墙
systemctl stop firewalld.service
systemctl disable firewalld.service
修改主机名
hostnamectl set‐hostname nodeX
编辑 /etc/hosts 文件
10.122.166.120 node1
10.122.166.127 node2
10.122.166.128 node3
二、数据库安装
2.1 所有节点安装PG软件(以node1为例)
1)安装依赖包
[root@node1 ~]# yum install gcc gcc-c++ flex bison readline-devel zlib-devel -y
2)创建用户
[root@node1 ~]# groupadd postgres
[root@node1 ~]# useradd postgres -g postgres
[root@node1 ~]# echo "pg123"|passwd postgres --stdin
3)编译安装PG软件
[root@node1 ~]# tar -xvf postgresql-11.5.tar.gz
[root@node1 ~]# cd postgresql-11.5/
[root@node1 ~]# ./configure --with-segsize=20 --prefix=/data/postgres/
[root@node1 ~]# cd contrib/
[root@node1 ~]# gmake install all
4)创建数据目录、归档目录
[root@node1 ~]# mkdir -p /data/postgres/data
[root@node1 ~]# mkdir -p /data/postgres/archive_log
[root@node1 ~]# chown -R postgres:postgres /data/postgres
[root@node1 ~]# chmod 0700 /data/postgres/data
2.2 node1 初始化数据库
1)初始化数据库
[postgres@node1 ~]$ cd /data/postgres/bin/
[postgres@node1 bin]$ initdb ‐D /data/postgres/data
2)修改配置文件
[postgres@node1 data]$ vim postgresql.conf
listen_addresses = '*'
port = '5432'
max_connections = '1000'
log_connections = 'yes'
shared_buffers = '4000MB'
effective_cache_size = '1000MB'
temp_buffers = '64MB'
work_mem = '40MB'
max_worker_processes = '32'
max_parallel_workers = '16'
max_parallel_workers_per_gather = '4'
wal_compression = 'on'
log_destination = 'csvlog'
logging_collector = 'on'
log_directory = 'log'
log_truncate_on_rotation = 'on'
log_rotation_size = '30MB'
log_lock_waits = 'on'
lock_timeout = '300000'
wal_level = replica
synchronous_commit = on
archive_mode = on
archive_command = 'cp %p /data/postgres/archive_log/%f'
max_wal_senders=5
wal_keep_segments = 32
hot_standby = on
wal_log_hints = 'on'
log_statement = 'ddl'
[postgres@node1 data]$ vim pg_hba.conf
host all all 10.122.166.0/24 trust
host replication all 10.122.166.0/24 trust
3)添加环境变量
[postgres@node1 ~]$ vi ~/.bash_profile
export PATH=/data/postgres/bin/:$PATH
export PGDATA=/data/postgres/data
export LD_LIBRARY_PATH=/data/postgres/lib:${LD_LIBRARY_PATH}
[postgres@node1 ~]$ source ~/.bash_profile
4)启动数据库
[postgres@node1 ~]$ pg_ctl start
2.3 node2/node3 创建备库
1)以 node2 为例
[postgres@node2 data]$ pg_basebackup -h 10.122.166.120 -p 5432 -U postgres -D /data/postgres/data
2)配置recovery.conf 文件
[postgres@node2 data]$ vim /data/postgres/data/recovery.conf
standby_mode = on
primary_conninfo = 'host=10.122.166.120 port=5432 user=postgres password=postgres'
recovery_target_timeline = 'latest'
3)启动备库
[postgres@node2 ~]$ pg_ctl start
2.4 node1 查看集群状态
postgres=# select application_name,client_addr,sync_state from pg_stat_replication;
application_name | client_addr | sync_state
------------------+----------------+------------
node2 | 10.122.166.127 | async
node3 | 10.122.166.128 | async
(2 rows)
2.5 所有节点停止 PG服务
先备库 后主库
pg_ctl -D stop -m f
三、配置corosync
1)所有节点安装软件
yum install -y pacemaker corosync pcs
2)配置 corosync.conf文件
[root@node1 ~]# cd /etc/corosync
[root@node1 corosync]# vim corosync.conf
totem {
max_messages: 20
vsftype: none
crypto_cipher: none
secauth: on
token_retransmits_before_loss_const: 10
interface {
bindnetaddr: 10.122.166.0
mcastaddr: 226.94.61.22
ringnumber: 0
mcastport: 5405
ttl: 1
}
consensus: 6000
clear_node_high_bit: yes
crypto_hash: none
rrp_mode: none
join: 60
cluster_name: cluster
token: 5000
version: 2
transport: udp
ip_version: ipv4
heartbeat_failures_allowed : 3
}
logging {
to_logfile: yes
timestamp: on
syslog_facility: daemon
logger_subsys {
debug: off
subsys: QUORUM
}
to_syslog: yes
debug: off
logfile: /var/log/corosync.log
to_stderr: no
fileline: off
}
quorum {
expected_votes: 3
two_node: 0
provider: corosync_votequorum
}
nodelist {
node {
ring0_addr: node1
nodeid: 1
}
node {
ring0_addr: node2
nodeid: 2
}
node {
ring0_addr: node3
nodeid: 3
}
}
3)复制到另外两个节点
[root@node1 corosync]# scp corosync.conf 10.122.166.127:/etc/corosync
[root@node1 corosync]# scp corosync.conf 10.122.166.128:/etc/corosync
4)所有节点启动corosync服务
service corosync start
四、配置 pacemaker
1)所有节点启动 pacemaker服务
service pacemaker start
2)任意节点使用root用户检查状态
[root@node3 cib]# crm_mon -Afr -1
Stack: corosync
Current DC: node2 (version 1.1.23-1.el7_9.1-9acf116022) - partition with quorum
Last updated: Thu Sep 29 13:59:07 2022
Last change: Thu Sep 29 13:58:59 2022 by hacluster via crmd on node2
3 nodes configured
0 resource instances configured
Online: [ node1 node2 node3 ]
No resources
Node Attributes:
* Node node1:
* Node node2:
* Node node3:
Migration Summary:
* Node node2:
* Node node1:
* Node node3:
3)node1 编辑config.pcs文件
[root@node1 ~]# cd /var/lib/pacemaker/
[root@node1 pacemaker]# vim config.pcs
pcs cluster cib pgsql_cfg
pcs -f pgsql_cfg property set no-quorum-policy="ignore"
pcs -f pgsql_cfg property set stonith-enabled="false"
pcs -f pgsql_cfg resource defaults resource-stickiness="INFINITY"
pcs -f pgsql_cfg resource defaults migration-threshold="1"
pcs -f pgsql_cfg resource create vip-master IPaddr2 \
ip="10.122.166.123" \
nic="eth0" \
cidr_netmask="24" \
op start timeout="60s" interval="0s" on-fail="restart" \
op monitor timeout="60s" interval="10s" on-fail="restart" \
op stop timeout="60s" interval="0s" on-fail="block"
pcs -f pgsql_cfg resource create vip-slave IPaddr2 \
ip="10.122.166.133" \
nic="eth0" \
cidr_netmask="24" \
meta migration-threshold="0" \
op start timeout="60s" interval="0s" on-fail="stop" \
op monitor timeout="60s" interval="10s" on-fail="restart" \
op stop timeout="60s" interval="0s" on-fail="ignore"
pcs -f pgsql_cfg resource create pgsql pgsql \
pgctl="/data/postgres/bin/pg_ctl" \
psql="/data/postgres/bin/psql" \
pgdata="/data/postgres/data/" \
rep_mode="sync" \
node_list="node1 node2 node3" \
restore_command="cp %p /data/postgres/archive_log/%f" \
primary_conninfo_opt="keepalives_idle=60 keepalives_interval=5 keepalives_count=5" \
master_ip="10.122.166.123" \
restart_on_promote='true' \
op start timeout="60s" interval="0s" on-fail="restart" \
op monitor timeout="60s" interval="4s" on-fail="restart" \
op monitor timeout="60s" interval="3s" on-fail="restart" role="Master" \
op promote timeout="60s" interval="0s" on-fail="restart" \
op demote timeout="60s" interval="0s" on-fail="stop" \
op stop timeout="60s" interval="0s" on-fail="block" \
op notify timeout="60s" interval="0s"
pcs -f pgsql_cfg resource master pgsql-cluster pgsql \
master-max=1 master-node-max=1 clone-max=3 clone-node-max=1 notify=true
pcs -f pgsql_cfg resource group add master-group vip-master
pcs -f pgsql_cfg resource group add slave-group vip-slave
pcs -f pgsql_cfg constraint colocation add master-group with master pgsql-cluster INFINITY
pcs -f pgsql_cfg constraint order promote pgsql-cluster then start master-group symmetrical=false score=INFINITY
pcs -f pgsql_cfg constraint order demote pgsql-cluster then stop master-group symmetrical=false score=0
pcs -f pgsql_cfg constraint colocation add slave-group with slave pgsql-cluster INFINITY
pcs -f pgsql_cfg constraint order promote pgsql-cluster then start slave-group symmetrical=false score=INFINITY
pcs -f pgsql_cfg constraint order demote pgsql-cluster then stop slave-group symmetrical=false score=0
pcs cluster cib-push pgsql_cfg
4)所有节点增加 root用户的 PATH路径
[root@node1 ~]# vim .bash_profile
export PATH=/data/postgres/bin/:$PATH
[root@node1 ~]# source .bash_profile
5)node1 加载配置
[root@node1 pacemaker]# sh config.pcs
Warning: Defaults do not apply to resources which override them with their own defined values
Warning: Defaults do not apply to resources which override them with their own defined values
Assumed agent name 'ocf:heartbeat:IPaddr2' (deduced from 'IPaddr2')
Assumed agent name 'ocf:heartbeat:IPaddr2' (deduced from 'IPaddr2')
Assumed agent name 'ocf:heartbeat:pgsql' (deduced from 'pgsql')
Adding pgsql-cluster master-group (score: INFINITY) (Options: first-action=promote then-action=start symmetrical=false)
Adding pgsql-cluster master-group (score: 0) (Options: first-action=demote then-action=stop symmetrical=false)
Adding pgsql-cluster slave-group (score: INFINITY) (Options: first-action=promote then-action=start symmetrical=false)
Adding pgsql-cluster slave-group (score: 0) (Options: first-action=demote then-action=stop symmetrical=false)
CIB updated
6)等待一会,再次检查状态
[root@node1 ~]# crm_mon -Afr -1
Stack: corosync
Current DC: node2 (version 1.1.23-1.el7_9.1-9acf116022) - partition with quorum
Last updated: Thu Sep 29 15:14:27 2022
Last change: Thu Sep 29 14:15:00 2022 by root via crm_attribute on node1
3 nodes configured
5 resource instances configured
Online: [ node1 node2 node3 ]
Full list of resources:
Master/Slave Set: pgsql-cluster [pgsql]
Masters: [ node1 ]
Slaves: [ node2 node3 ]
Resource Group: master-group
vip-master (ocf::heartbeat:IPaddr2): Started node1
Resource Group: slave-group
vip-slave (ocf::heartbeat:IPaddr2): Started node2
Node Attributes:
* Node node1:
+ master-pgsql : 1000
+ pgsql-data-status : LATEST
+ pgsql-master-baseline : 000000002D000098
+ pgsql-status : PRI
* Node node2:
+ master-pgsql : 100
+ pgsql-data-status : STREAMING|SYNC
+ pgsql-status : HS:sync
* Node node3:
+ master-pgsql : -INFINITY
+ pgsql-data-status : STREAMING|ASYNC
+ pgsql-status : HS:async
Migration Summary:
* Node node2:
* Node node1:
* Node node3:
7)任意节点使用 pcs命令检查状态
[root@node1 ~]# pcs status
Cluster name: cluster
Stack: corosync
Current DC: node2 (version 1.1.23-1.el7_9.1-9acf116022) - partition with quorum
Last updated: Thu Sep 29 15:15:08 2022
Last change: Thu Sep 29 14:15:00 2022 by root via crm_attribute on node1
3 nodes configured
5 resource instances configured
Online: [ node1 node2 node3 ]
Full list of resources:
Master/Slave Set: pgsql-cluster [pgsql]
Masters: [ node1 ]
Slaves: [ node2 node3 ]
Resource Group: master-group
vip-master (ocf::heartbeat:IPaddr2): Started node1
Resource Group: slave-group
vip-slave (ocf::heartbeat:IPaddr2): Started node2
Daemon Status:
corosync: active/disabled
pacemaker: active/disabled
pcsd: inactive/disabled
8)在 node1(主库)查看状态
postgres=# select application_name,client_addr,sync_state from pg_stat_replication;
application_name | client_addr | sync_state
------------------+----------------+------------
node2 | 10.122.166.127 | sync
node3 | 10.122.166.128 | async
(2 rows)
安装 crmsh
[root@ceph1 ~]# wget -P /etc/yum.repos.d/ http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-7/network:ha-clustering:Stable.repo
[root@ceph1 ~]# yum install -y crmsh
[root@ceph1 ~]# crm configure property stonith-enabled=false
使用 crm 命令
[root@node1 ~]# crm status
Stack: corosync
Current DC: node2 (version 1.1.23-1.el7_9.1-9acf116022) - partition with quorum
Last updated: Thu Sep 29 15:18:04 2022
Last change: Thu Sep 29 14:15:00 2022 by root via crm_attribute on node1
3 nodes configured
5 resource instances configured
Online: [ node1 node2 node3 ]
Full list of resources:
Master/Slave Set: pgsql-cluster [pgsql]
Masters: [ node1 ]
Slaves: [ node2 node3 ]
Resource Group: master-group
vip-master (ocf::heartbeat:IPaddr2): Started node1
Resource Group: slave-group
vip-slave (ocf::heartbeat:IPaddr2): Started node2
常用命令
停止集群服务: service pacemaker stop (会自动停止PG)
启动集群服务: service pacemaker start(会自动启动PG)
查看集群状态: crm_mon -Afr -1
查看集群状态且启动刷新: crm_mon -Afr
刷新集群状态: crm resource cleanup pgsql-cluster
刷新某一个节点的状态: crm resource cleanup pgsql nodename
清理历史告警: crm resource cleanup