当前位置: 首页 > 工具软件 > repmgr > 使用案例 >

repmgr+pg主备安装及切换

费承载
2023-12-01

安装postgresql
adduser postgres
wget https://ftp.postgresql.org/pub/source/v13.4/postgresql-13.4.tar.gz
mkdir -p /postgresql/{pgdata,archive,scripts,backup,pg13,soft}
chown -R postgres:postgres /postgresql
yum install -y cmake make gcc zlib gcc-c++ perl readline readline-devel zlib zlib-devel  3perl python36 tcl openssl ncurses-devel openldap pam
mv /root/postgresql-13.4.tar.gz ./
chown -R postgres:postgres /postgresql

su - postgres
cd /postgresql/soft/
tar -zxvf postgresql-13.4.tar.gz 
cd postgresql-13.4
./configure --prefix=/postgresql/pg13
make -j 8 && make install


systemctl daemon-reload
systemctl enable PG13
systemctl start PG13
systemctl status PG13

[root@pg03 ~]# cat /etc/systemd/system/PG13.service
[Unit]
Description=PostgreSQL database server
Documentation=man:postgres(1)
After=network.target

[Service]
Type=forking
User=postgres
Group=postgres
Environment=PGPORT=5432
Environment=PGDATA=/postgresql/pgdata
OOMScoreAdjust=-1000
ExecStart=/postgresql/pg13/bin/pg_ctl start -D ${PGDATA} -s -o "-p ${PGPORT}" -w -t 300
ExecStop=/postgresql/pg13/bin/pg_ctl stop -D ${PGDATA} -s -m fast
ExecReload=/postgresql/pg13/bin/pg_ctl reload -D ${PGDATA} -s
KillMode=mixed
KillSignal=SIGINT
TimeoutSec=0

[Install]
WantedBy=multi-user.target

cat .bash_profile内容:
PATH=$PATH:$HOME/.local/bin:$HOME/bin:/postgresql/pg13/bin

export PATH
export LANG=en_US.UTF-8
export PS1="[\u@\h \W]\$ "
export PGPORT=5432
export PGDATA=/postgresql/pgdata
export PGHOME=/postgresql/pg13
export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib:$LD_LIBRARY_PATH
export PATH=$PGHOME/bin:$PATH:.
export DATE=`date +"%Y%m%d%H%M"`
export MANPATH=$PGHOME/share/man:$MANPATH
export PGHOST=$PGDATA
export PGUSER=postgres
export PGDATABASE=postgres


psql -U postgres -h 192.168.52.138 -d postgres -p5432

/etc/profile内容:编译repmgr会用到
unset -f pathmunge
PATH=$PATH:/postgresql/pg13/bin


postgres密码:abcd1234
[root@pg03 repmgr-5.2.1]# su - postgres
Last login: Wed Sep 22 10:16:02 CST 2021 on pts/0
[postgres@pg03 ~]$ pwd
/home/postgres
 [postgres@pg03 ~]$ more .pgpass 
192.168.52.138:5432:postgres:replicator:rep
192.168.52.139:5432:postgres:replicator:rep


postgresql.conf内容:
listen_addresses = '*'
port=5432
unix_socket_directories='/postgresql/pgdata'
logging_collector = on
log_directory = 'pg_log'
log_filename = 'postgresql-%a.log'
log_truncate_on_rotation = on
wal_level = replica
archive_mode = on
archive_command = 'arch.sh %f %p'
wal_log_hints=on
hot_standby = on    


/postgresql/pgdata/arch.sh内容:
test ! -f $PGDATA/arch/$1 && cp --preserve=timestamps $2 $PGDATA/arch/$1 ; find $PGDATA/arch/ -type f -mtime +7 -exec rm -f {} \;
 
配置复制用户:(使用管理员也行)
create role replicator with replication login;
\password replicator
rep

[postgres@pg03 ~]$ psql
psql (13.4)
Type "help" for help.

postgres=# \du
                                    List of roles
 Role name  |                         Attributes                         | Member of 
------------+------------------------------------------------------------+-----------
 postgres   | Superuser, Create role, Create DB, Replication, Bypass RLS | {}
 replicator | Replication                                                | {}

postgres=# alter user replicator with superuser;
ALTER ROLE
postgres=# \du
                                    List of roles
 Role name  |                         Attributes                         | Member of 
------------+------------------------------------------------------------+-----------
 postgres   | Superuser, Create role, Create DB, Replication, Bypass RLS | {}
 replicator | Superuser, Replication   
pg_hba.conf内容:
# TYPE  DATABASE    USER    ADDRESS       METHOD
local     all       all                    trust
host      all       all   127.0.0.1/32     trust
host      all       all    0.0.0.0/0        md5
host    replication     replicator      192.168.52.0/24       trust

启停pg:
pg_ctl stop
pg_ctl start
在线加载配置变更:pg_ctl reload

主备配置ssh互信:
[postgres@pg03 ~]$ cat /etc/hosts
127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.52.138  pg03
192.168.52.139  pg04

su - postgres进入postgres用户操作
pg03上操作:
# 生成秘钥到用户主目录下的.ssh文件夹下
ssh-keygen -t rsa
# 将秘钥拷贝到远程机器
ssh-copy-id -i .ssh/id_rsa.pub postgres@pg04
# # 验证是否授权完成:不提示密码,直接返回日期说明配置正确
ssh pg04 date
pg04上操作:
# 生成秘钥到用户主目录下的.ssh文件夹下
ssh-keygen -t rsa
# 将秘钥拷贝到远程机器
ssh-copy-id -i .ssh/id_rsa.pub postgres@pg03
# # 验证是否授权完成:不提示密码,直接返回日期说明配置正确
ssh pg03 date

主备测试链接:备库连主库
[postgres@pg04 ~]$ psql 'host=192.168.52.138 user=replicator dbname=postgres connect_timeout=2'
Password for user replicator:rep
编译repmgr-5.2.1
https://repmgr.org/download/repmgr-5.2.1.tar.gz

tar -zxvf repmgr-5.2.1.tar.gz
pg03上:
[postgres@pg03 ~]$ cat /etc/repmgr.conf 
node_id=1
node_name=pg03
conninfo='host=192.168.52.138 user=replicator dbname=postgres connect_timeout=2'
data_directory='/postgresql/pgdata'
pg_bindir='/postgresql/pg13/bin'

pg04上:
[postgres@pg04 ~]$ cat /etc/repmgr.conf 
node_id=2
node_name=pg04
conninfo='host=192.168.52.139 user=replicator dbname=postgres connect_timeout=2'
data_directory='/postgresql/pgdata'
pg_bindir='/postgresql/pg13/bin'

yum install flex -y
[root@pg03 repmgr-5.2.1]# ./configure --prefix=/postgresql/pg13
[root@pg03 repmgr-5.2.1]# make && make install

主库注册:
[postgres@pg03 ~]$ pg_ctl reload
server signaled
[postgres@pg03 ~]$ repmgr primary register
INFO: connecting to primary database...
NOTICE: attempting to install extension "repmgr"
NOTICE: "repmgr" extension successfully installed
NOTICE: primary node record (ID: 1) registered
[postgres@pg03 ~]$ repmgr cluster show
 ID | Name | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                                    
----+------+---------+-----------+----------+----------+----------+----------+-----------------------------------------------------------------------
 1  | pg03 | primary | * running |          | default  | 100      | 1        | host=192.168.52.138 user=replicator dbname=postgres connect_timeout=2

备库注册:
[postgres@pg04 ~]$ repmgr -h 192.168.52.138 -U replicator -d postgres -f /etc/repmgr.conf standby clone --dry-run
NOTICE: destination directory "/postgresql/pgdata" provided
INFO: connecting to source node
DETAIL: connection string is: host=192.168.52.138 user=replicator dbname=postgres
DETAIL: current installation size is 31 MB
INFO: "repmgr" extension is installed in database "postgres"
WARNING: target data directory appears to be a PostgreSQL data directory
DETAIL: target data directory is "/postgresql/pgdata"
HINT: use -F/--force to overwrite the existing data directory
INFO: replication slot usage not requested;  no replication slot will be set up for this standby
INFO: parameter "max_wal_senders" set to 10
NOTICE: checking for available walsenders on the source node (2 required)
INFO: sufficient walsenders available on the source node
DETAIL: 2 required, 10 available
NOTICE: checking replication connections can be made to the source server (2 required)
INFO: required number of replication connections could be made to the source server
DETAIL: 2 replication connections required
WARNING: data checksums are not enabled and "wal_log_hints" is "off"
DETAIL: pg_rewind requires "wal_log_hints" to be enabled
NOTICE: standby will attach to upstream node 1
HINT: consider using the -c/--fast-checkpoint option
INFO: all prerequisites for "standby clone" are met
[postgres@pg04 ~]$ repmgr -h 192.168.52.138 -U replicator -d postgres -f /etc/repmgr.conf standby clone
NOTICE: destination directory "/postgresql/pgdata" provided
INFO: connecting to source node
DETAIL: connection string is: host=192.168.52.138 user=replicator dbname=postgres
DETAIL: current installation size is 31 MB
ERROR: target data directory appears to be a PostgreSQL data directory
DETAIL: target data directory is "/postgresql/pgdata"
HINT: use -F/--force to overwrite the existing data directory
[postgres@pg04 ~]$ rm -rf /postgresql/pgdata/*
[postgres@pg04 ~]$ repmgr -h 192.168.52.138 -U replicator -d postgres -f /etc/repmgr.conf standby clone
NOTICE: destination directory "/postgresql/pgdata" provided
INFO: connecting to source node
DETAIL: connection string is: host=192.168.52.138 user=replicator dbname=postgres
DETAIL: current installation size is 31 MB
INFO: replication slot usage not requested;  no replication slot will be set up for this standby
NOTICE: checking for available walsenders on the source node (2 required)
NOTICE: checking replication connections can be made to the source server (2 required)
WARNING: data checksums are not enabled and "wal_log_hints" is "off"
DETAIL: pg_rewind requires "wal_log_hints" to be enabled
INFO: checking and correcting permissions on existing directory "/postgresql/pgdata"
NOTICE: starting backup (using pg_basebackup)...
HINT: this may take some time; consider using the -c/--fast-checkpoint option
INFO: executing:
  pg_basebackup -l "repmgr base backup"  -D /postgresql/pgdata -h 192.168.52.138 -p 5432 -U replicator -X stream 
WARNING:  skipping special file "./.s.PGSQL.5432"
WARNING:  skipping special file "./.s.PGSQL.5432"
NOTICE: standby clone (using pg_basebackup) complete
NOTICE: you can now start your PostgreSQL server
HINT: for example: pg_ctl -D /postgresql/pgdata start
HINT: after starting the server, you need to register this standby with "repmgr standby register"
[postgres@pg04 ~]$ pg_ctl -D /postgresql/pgdata start
waiting for server to start....2021-09-22 10:07:56.869 CST [2362] LOG:  redirecting log output to logging collector process
2021-09-22 10:07:56.869 CST [2362] HINT:  Future log output will appear in directory "pg_log".
 done
server started
[postgres@pg04 ~]$ repmgr standby register
INFO: connecting to local node "pg04" (ID: 2)
INFO: connecting to primary database
WARNING: --upstream-node-id not supplied, assuming upstream node is primary (node ID 1)
INFO: standby registration complete
NOTICE: standby node "pg04" (ID: 2) successfully registered
[postgres@pg04 ~]$ repmgr cluster show
 ID | Name | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                                    
----+------+---------+-----------+----------+----------+----------+----------+-----------------------------------------------------------------------
 1  | pg03 | primary | * running |          | default  | 100      | 1        | host=192.168.52.138 user=replicator dbname=postgres connect_timeout=2
 2  | pg04 | standby |   running | pg03     | default  | 100      | 1        | host=192.168.52.139 user=replicator dbname=postgres connect_timeout=2
[postgres@pg04 ~]$ psql
psql (13.4)
Type "help" for help.

postgres=# select * from pg_stat_replication;
 pid | usesysid | usename | application_name | client_addr | client_hostname | client_port | backend_start | backend_xmin | state | sent_lsn 
| write_lsn | flush_lsn | replay_lsn | write_lag | flush_lag | replay_lag | sync_priority | sync_state | reply_time 
-----+----------+---------+------------------+-------------+-----------------+-------------+---------------+--------------+-------+----------
+-----------+-----------+------------+-----------+-----------+------------+---------------+------------+------------
(0 rows)

postgres=# \x
Expanded display is on.
+-----------+-----------+------------+-----------+-----------+------------+---------------+------------+---
(0 rows)

postgres=# select * from pg_stat_wal_receiver;
-[ RECORD 1 ]---------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
pid                   | 2368
status                | streaming
receive_start_lsn     | 0/8000000
receive_start_tli     | 1
written_lsn           | 0/8000570
flushed_lsn           | 0/8000570
received_tli          | 1
last_msg_send_time    | 2021-09-22 10:08:35.703193+08
last_msg_receipt_time | 2021-09-22 10:08:35.714032+08
latest_end_lsn        | 0/8000570
latest_end_time       | 2021-09-22 10:08:05.694295+08
slot_name             | 
sender_host           | 192.168.52.138
sender_port           | 5432
conninfo              | user=replicator passfile=/home/postgres/.pgpass channel_binding=disable connect_timeout=2 dbname=replication host=192.168.52.138 port=5432 application_name=pg04 fallback_application_name=walreceiver sslmode=disable sslcompression=0 ssl_min_protocol_version=TLSv1.2 gssencmode=disable krbsrvname=postgres target_session_attrs=any
主备库切换测试:
在备库做switchover
[postgres@pg04 ~]$ repmgr standby switchover
在备库做promote激活
主库无法恢复的情况下,在备库操作激活动作。
[postgres@pg04 ~]$ repmgr standby promote
NOTICE: promoting standby to primary
DETAIL: promoting server "pg04" (ID: 2) using pg_promote()
NOTICE: waiting up to 60 seconds (parameter "promote_check_timeout") for promotion to complete
NOTICE: STANDBY PROMOTE successful
DETAIL: server "pg04" (ID: 2) was successfully promoted to primary
[postgres@pg04 ~]$ repmgr cluster show
 ID | Name | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                                    
----+------+---------+-----------+----------+----------+----------+----------+-----------------------------------------------------------------------
 1  | pg03 | primary | - failed  | ?        | default  | 100      |          | host=192.168.52.138 user=replicator dbname=postgres connect_timeout=2
 2  | pg04 | primary | * running |          | default  | 100      | 4        | host=192.168.52.139 user=replicator dbname=postgres connect_timeout=2

WARNING: following issues were detected
  - unable to connect to node "pg03" (ID: 1)

HINT: execute with --verbose option to see connection error messages

等主库恢复后,重新作为备库加入:

repmgr node rejoin -h192.168.52.139 -Upostgres -dpostgres --force-rewind --config-files=postgresql.conf,postgresql.auto.conf --verbose

[postgres@pg03 ~]$ repmgr node rejoin -d 'host=192.168.52.139 port=5432 user=replicator dbname=postgres' --force-rewind --config-files=postgresql.conf,postgresql.auto.conf --verbose
INFO: looking for configuration file in /etc
INFO: configuration file found at: "/etc/repmgr.conf"
NOTICE: pg_rewind execution required for this node to attach to rejoin target node 2
DETAIL: rejoin target server's timeline 4 forked off current database system timeline 3 before current recovery point 0/C000028
INFO: prerequisites for using pg_rewind are met
INFO: 2 files copied to "/tmp/repmgr-config-archive-pg03"
NOTICE: executing pg_rewind
DETAIL: pg_rewind command is "/postgresql/pg13/bin/pg_rewind -D '/postgresql/pgdata' --source-server='host=192.168.52.139 user=replicator dbname=postgres connect_timeout=2'"
NOTICE: 2 files copied to /postgresql/pgdata
INFO: directory "/tmp/repmgr-config-archive-pg03" deleted
NOTICE: setting node 1's upstream to node 2
WARNING: unable to ping "host=192.168.52.138 user=replicator dbname=postgres connect_timeout=2"
DETAIL: PQping() returned "PQPING_NO_RESPONSE"
NOTICE: starting server using "/postgresql/pg13/bin/pg_ctl  -w -D '/postgresql/pgdata' start"
INFO: node "pg03" (ID: 1) is pingable
INFO: node "pg03" (ID: 1) has attached to its upstream node
NOTICE: NODE REJOIN successful
DETAIL: node 1 is now attached to node 2

[postgres@pg03 ~]$ repmgr cluster show
 ID | Name | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                                    
----+------+---------+-----------+----------+----------+----------+----------+-----------------------------------------------------------------------
 1  | pg03 | standby |   running | pg04     | default  | 100      | 3        | host=192.168.52.138 user=replicator dbname=postgres connect_timeout=2

或者clone恢复:
repmgr standby clone -h 192.168.52.139 -Upostgres

最后主备又可正常switchover了。

[postgres@pg03 ~]$ psql
psql (13.4)
Type "help" for help.

postgres=# \l
                                 List of databases
   Name    |  Owner   | Encoding |  Collate   |   Ctype    |   Access privileges   
-----------+----------+----------+------------+------------+-----------------------
 jyc       | postgres | UTF8     | en_US.utf8 | en_US.utf8 | 
 postgres  | postgres | UTF8     | en_US.utf8 | en_US.utf8 | 
 template0 | postgres | UTF8     | en_US.utf8 | en_US.utf8 | =c/postgres          +
           |          |          |            |            | postgres=CTc/postgres
 template1 | postgres | UTF8     | en_US.utf8 | en_US.utf8 | =c/postgres          +
           |          |          |            |            | postgres=CTc/postgres
(4 rows)

postgres=# \dt
Did not find any relations.
postgres=# pwd
postgres-# \c postgres
You are now connected to database "postgres" as user "postgres".
postgres-# \dt
Did not find any relations.
postgres-# \dx
                    List of installed extensions
  Name   | Version |   Schema   |            Description             
---------+---------+------------+------------------------------------
 plpgsql | 1.0     | pg_catalog | PL/pgSQL procedural language
 repmgr  | 5.2     | repmgr     | Replication manager for PostgreSQL
(2 rows)

postgres-# \du
                                    List of roles
 Role name  |                         Attributes                         | Member of 
------------+------------------------------------------------------------+-----------
 postgres   | Superuser, Create role, Create DB, Replication, Bypass RLS | {}
 replicator | Superuser, Replication                                     | {}

postgres-# \dt+
Did not find any relations.
postgres-# \q
[postgres@pg03 ~]$ repmgr cluster show
 ID | Name | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                                    
----+------+---------+-----------+----------+----------+----------+----------+-----------------------------------------------------------------------
 1  | pg03 | primary | * running |          | default  | 100      | 5        | host=192.168.52.138 user=replicator dbname=postgres connect_timeout=2
 2  | pg04 | standby |   running | pg03     | default  | 100      | 5        | host=192.168.52.139 user=replicator dbname=postgres connect_timeout=2
[postgres@pg03 ~]$
 

 类似资料: