一、配置集群
IP地址 | 机器名 | 组件 |
---|---|---|
192.168.11.192 | pgsql1 | pgsql12, repmgr |
192.168.11.193 | pgsql2 | pgsql12, repmgr |
二、postgresql安装yum源
yum install -y https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpm
yum install -y postgresql12-server
cat > /usr/lib/systemd/system/postgresql-12.service << 'EOF'
[Unit]
Description=PostgreSQL 12 database server
Documentation=https://www.postgresql.org/docs/12/static/
After=syslog.target
After=network.target
[Service]
Type=notify
User=postgres
Group=postgres
Environment=PGDATA=/data/pgsql
OOMScoreAdjust=-1000
Environment=PG_OOM_ADJUST_FILE=/proc/self/oom_score_adj
Environment=PG_OOM_ADJUST_VALUE=0
ExecStartPre=/usr/pgsql-12/bin/postgresql-12-check-db-dir ${PGDATA}
ExecStart=/usr/pgsql-12/bin/postmaster -D ${PGDATA}
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed
KillSignal=SIGINT
TimeoutSec=0
[Install]
WantedBy=multi-user.target
EOF
mkdir -p /data/{pgsql,archive,backup}
chown -R postgres:postgres /data/pgsql/
chown -R postgres:postgres /data/archive/
chown -R postgres:postgres /data/backup/
#只在主节点上执行初始化
/usr/pgsql-12/bin/postgresql-12-setup initdb
#开机启动
systemctl enable postgresql-12
三、 准备工作
#修改postgres密码
echo "Postgres2O21"|passwd --stdin postgres
su - postgres
#生成密钥
ssh-keygen -t rsa -P "" -f ~/.ssh/id_rsa
#免密
ssh-copy-id 192.168.11.192
ssh-copy-id 192.168.11.193
四、主节点
cat > /data/pgsql/postgresql.conf << 'EOF'
max_wal_size = 1GB
min_wal_size = 80MB
log_timezone = 'Asia/Shanghai'
datestyle = 'iso, mdy'
timezone = 'Asia/Shanghai'
default_text_search_config = 'pg_catalog.english'
listen_addresses = '0.0.0.0'
max_wal_senders = 10
max_replication_slots = 10
wal_level = replica
hot_standby = on
archive_mode = on
archive_command = 'test ! -f /data/archive/%f && cp %p /data/archive/%f'
wal_log_hints = on
shared_preload_libraries = 'repmgr'
EOF
cat > /data/pgsql/pg_hba.conf << 'EOF'
# TYPE DATABASE USER ADDRESS METHOD
local all all trust
host all all 127.0.0.1/32 trust
local repmgr repmgr trust
host repmgr repmgr 127.0.0.1/32 trust
host repmgr repmgr 192.168.11.0/24 trust
host all all 0.0.0.0/0 md5
host all all ::1/128 trust
local replication all trust
host replication all 127.0.0.1/32 trust
host replication all ::1/128 trust
local replication repmgr trust
host replication repmgr 127.0.0.1/32 trust
host replication repmgr 192.168.11.0/24 trust
EOF
#启动数据库
systemctl start postgresql-12
#切换到postgres用户并启动数据库
su - postgres -c "psql"
create database repmgr;
create user repmgr with password 'repmgr' superuser login;
alter database repmgr owner to repmgr;
#配置环境变量
cat >/etc/profile.d/pgsql.sh << EOF
export PATH=/usr/pgsql-12/bin:$PATH
EOF
source /etc/profile
五、主节点安装repmgr
curl https://dl.2ndquadrant.com/default/release/get/12/rpm | bash
yum install -y repmgr12
cat > /etc/repmgr/12/repmgr.conf << 'EOF'
ssh_options='-q -o ConnectTimeout=10'
node_id=1
node_name='192.168.11.192'
conninfo='host=192.168.11.192 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/data/pgsql'
pg_bindir='/usr/pgsql-12/bin/'
failover=automatic
promote_command='/usr/pgsql-12/bin/repmgr standby promote -f /etc/repmgr/12/repmgr.conf --log-to-file'
follow_command='/usr/pgsql-12/bin/repmgr standby follow -f /etc/repmgr/12/repmgr.conf --log-to-file --upstream-node-id=%n'
priority=100
log_file='/data/pgsql/repmgrd.log'
monitoring_history=true
connection_check_type ='ping'
monitor_interval_secs=2
reconnect_attempts=10
reconnect_interval=5
primary_visibility_consensus = true
standby_disconnect_on_failover = true
repmgrd_service_start_command ='sudo /usr/bin/systemctl start repmgr12.service'
repmgrd_service_stop_command ='sudo /usr/bin/systemctl stop repmgr12.service'
EOF
#注册主节点
su - postgres -c "/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf primary register"
#查询注册情况
su - postgres -c "/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf cluster show"
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+----------------+---------+-----------+----------------+----------+----------+----------+-----------------------------------------------------------------
1 | 192.168.11.192 | primary | * running | | default | 100 | 3 | host=192.168.11.192 user=repmgr dbname=repmgr connect_timeout=2
#启动repmgr12
systemctl start repmgr12
六、从节点安装repmgr
curl https://dl.2ndquadrant.com/default/release/get/12/rpm | bash
yum install -y repmgr12
cat > /etc/repmgr/12/repmgr.conf << 'EOF'
ssh_options='-q -o ConnectTimeout=10'
node_id=2
node_name='192.168.11.193'
conninfo='host=192.168.11.193 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/data/pgsql'
pg_bindir='/usr/pgsql-12/bin/'
failover=automatic
promote_command='/usr/pgsql-12/bin/repmgr standby promote -f /etc/repmgr/12/repmgr.conf --log-to-file'
follow_command='/usr/pgsql-12/bin/repmgr standby follow -f /etc/repmgr/12/repmgr.conf --log-to-file --upstream-node-id=%n'
priority=100
log_file='/data/pgsql/repmgrd.log'
monitoring_history=true
connection_check_type ='ping'
monitor_interval_secs=2
reconnect_attempts=10
reconnect_interval=5
primary_visibility_consensus = true
standby_disconnect_on_failover = true
repmgrd_service_start_command ='sudo /usr/bin/systemctl start repmgr12.service'
repmgrd_service_stop_command ='sudo /usr/bin/systemctl stop repmgr12.service'
EOF
mkdir -p /data/{pgsql,archive,backup}
chown -R postgres:postgres /data/pgsql/
chown -R postgres:postgres /data/archive/
chown -R postgres:postgres /data/backup/
#注册主节点
su - postgres -c "/usr/pgsql-12/bin/repmgr -h 192.168.11.192 -U repmgr -d repmgr -f /etc/repmgr/12/repmgr.conf standby clone --dry-run"
#clone主节点数据
su - postgres -c "/usr/pgsql-12/bin/repmgr -h 192.168.11.192 -U repmgr -d repmgr -f /etc/repmgr/12/repmgr.conf standby clone"
#启动standby节点
systemctl start postgresql-12
#启动repmgr12
systemctl start repmgr12
#查看传输的状态
su - postgres -c psql
select * from pg_stat_wal_receiver;
#注册节点
su - postgres -c "/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf standby register"
#查询注册情况
su - postgres -c "/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf cluster show"
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+----------------+---------+-----------+----------------+----------+----------+----------+-----------------------------------------------------------------
1 | 192.168.11.192 | primary | * running | | default | 100 | 3 | host=192.168.11.192 user=repmgr dbname=repmgr connect_timeout=2
2 | 192.168.11.193 | standby | running | 192.168.11.192 | default | 100 | 3 | host=192.168.11.193 user=repmgr dbname=repmgr connect_timeout=2
七、 主备切换
切换主备,需要在备库上执行
su - postgres -c "/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf standby switchover -U repmgr --verbose"
#查询注册情况
[root@centos7 etc]# su - postgres -c "/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf cluster show"
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+----------------+---------+-----------+----------------+----------+----------+----------+-----------------------------------------------------------------
1 | 192.168.11.192 | standby | running | 192.168.11.193 | default | 100 | 3 | host=192.168.11.192 user=repmgr dbname=repmgr connect_timeout=2
2 | 192.168.11.193 | primary | * running | | default | 100 | 4 | host=192.168.11.193 user=repmgr dbname=repmgr connect_timeout=2
八、原主库执行rejoin加入集群
#原主库执行rejoin加入集群(注:需要把原主库关停后再执行以下脚本)
su - postgres -c "repmgr node rejoin -d 'host=192.168.11.192 dbname=repmgr user=repmgr' --force-rewind --config-files=postgresql.conf,postgresql.auto.conf --verbose --dry-run"
su - postgres -c "repmgr node rejoin -d 'host=192.168.11.192 dbname=repmgr user=repmgr' --force-rewind --config-files=postgresql.conf,postgresql.auto.conf --verbose"
或者
su - postgres -c "repmgr node rejoin -h 192.168.11.192 -U repmgr -d repmgr -f /etc/repmgr/12/repmgr.conf --force-rewind --config-files=postgresql.conf,postgresql.auto.conf --verbose --dry-run"
su - postgres -c "repmgr node rejoin -h 192.168.11.192 -U repmgr -d repmgr -f /etc/repmgr/12/repmgr.conf --force-rewind --config-files=postgresql.conf,postgresql.auto.conf --verbose"
参考:http://t.zoukankan.com/ctypyb2002-p-9792868.html
九、手动提升备节点为主节点
su - postgres -c "repmgr -f /etc/repmgr/12/repmgr.conf standby promote --verbose"
十、 失败重做
#使用命令将其驱逐出 repmgr 集群
su - postgres -c "repmgr -f /etc/repmgr/12/repmgr.conf standby unregister -d 'host=192.168.11.192 dbname=repmgr user=repmgr' --force-rewind --config-files=postgresql.conf,postgresql.auto.conf --verbose "
#关闭分离的从库,清理数据目录
#按增加从库的步骤重新加入集群
十一、其它
查询集群状态
repmgr service status --detail
ID | Name | Role | Status | Upstream | Location | Priority | repmgrd | PID | Paused? | Upstream last seen
----+----------------+---------+-----------+----------------+----------+----------+-------------+-----+---------+--------------------
1 | 192.168.11.192 | standby | running | 192.168.11.193 | default | 100 | not running | n/a | n/a | n/a
2 | 192.168.11.193 | primary | * running | | default | 100 | not running | n/a | n/a | n/a