安装postgresql
adduser postgres
wget https://ftp.postgresql.org/pub/source/v13.4/postgresql-13.4.tar.gz
mkdir -p /postgresql/{pgdata,archive,scripts,backup,pg13,soft}
chown -R postgres:postgres /postgresql
yum install -y cmake make gcc zlib gcc-c++ perl readline readline-devel zlib zlib-devel 3perl python36 tcl openssl ncurses-devel openldap pam
mv /root/postgresql-13.4.tar.gz ./
chown -R postgres:postgres /postgresql
su - postgres
cd /postgresql/soft/
tar -zxvf postgresql-13.4.tar.gz
cd postgresql-13.4
./configure --prefix=/postgresql/pg13
make -j 8 && make install
systemctl daemon-reload
systemctl enable PG13
systemctl start PG13
systemctl status PG13
[root@pg03 ~]# cat /etc/systemd/system/PG13.service
[Unit]
Description=PostgreSQL database server
Documentation=man:postgres(1)
After=network.target
[Service]
Type=forking
User=postgres
Group=postgres
Environment=PGPORT=5432
Environment=PGDATA=/postgresql/pgdata
OOMScoreAdjust=-1000
ExecStart=/postgresql/pg13/bin/pg_ctl start -D ${PGDATA} -s -o "-p ${PGPORT}" -w -t 300
ExecStop=/postgresql/pg13/bin/pg_ctl stop -D ${PGDATA} -s -m fast
ExecReload=/postgresql/pg13/bin/pg_ctl reload -D ${PGDATA} -s
KillMode=mixed
KillSignal=SIGINT
TimeoutSec=0
[Install]
WantedBy=multi-user.target
cat .bash_profile内容:
PATH=$PATH:$HOME/.local/bin:$HOME/bin:/postgresql/pg13/bin
export PATH
export LANG=en_US.UTF-8
export PS1="[\u@\h \W]\$ "
export PGPORT=5432
export PGDATA=/postgresql/pgdata
export PGHOME=/postgresql/pg13
export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib:$LD_LIBRARY_PATH
export PATH=$PGHOME/bin:$PATH:.
export DATE=`date +"%Y%m%d%H%M"`
export MANPATH=$PGHOME/share/man:$MANPATH
export PGHOST=$PGDATA
export PGUSER=postgres
export PGDATABASE=postgres
psql -U postgres -h 192.168.52.138 -d postgres -p5432
/etc/profile内容:编译repmgr会用到
unset -f pathmunge
PATH=$PATH:/postgresql/pg13/bin
postgres密码:abcd1234
[root@pg03 repmgr-5.2.1]# su - postgres
Last login: Wed Sep 22 10:16:02 CST 2021 on pts/0
[postgres@pg03 ~]$ pwd
/home/postgres
[postgres@pg03 ~]$ more .pgpass
192.168.52.138:5432:postgres:replicator:rep
192.168.52.139:5432:postgres:replicator:rep
postgresql.conf内容:
listen_addresses = '*'
port=5432
unix_socket_directories='/postgresql/pgdata'
logging_collector = on
log_directory = 'pg_log'
log_filename = 'postgresql-%a.log'
log_truncate_on_rotation = on
wal_level = replica
archive_mode = on
archive_command = 'arch.sh %f %p'
wal_log_hints=on
hot_standby = on
/postgresql/pgdata/arch.sh内容:
test ! -f $PGDATA/arch/$1 && cp --preserve=timestamps $2 $PGDATA/arch/$1 ; find $PGDATA/arch/ -type f -mtime +7 -exec rm -f {} \;
配置复制用户:(使用管理员也行)
create role replicator with replication login;
\password replicator
rep
[postgres@pg03 ~]$ psql
psql (13.4)
Type "help" for help.
postgres=# \du
List of roles
Role name | Attributes | Member of
------------+------------------------------------------------------------+-----------
postgres | Superuser, Create role, Create DB, Replication, Bypass RLS | {}
replicator | Replication | {}
postgres=# alter user replicator with superuser;
ALTER ROLE
postgres=# \du
List of roles
Role name | Attributes | Member of
------------+------------------------------------------------------------+-----------
postgres | Superuser, Create role, Create DB, Replication, Bypass RLS | {}
replicator | Superuser, Replication
pg_hba.conf内容:
# TYPE DATABASE USER ADDRESS METHOD
local all all trust
host all all 127.0.0.1/32 trust
host all all 0.0.0.0/0 md5
host replication replicator 192.168.52.0/24 trust
启停pg:
pg_ctl stop
pg_ctl start
在线加载配置变更:pg_ctl reload
主备配置ssh互信:
[postgres@pg03 ~]$ cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.52.138 pg03
192.168.52.139 pg04
su - postgres进入postgres用户操作
pg03上操作:
# 生成秘钥到用户主目录下的.ssh文件夹下
ssh-keygen -t rsa
# 将秘钥拷贝到远程机器
ssh-copy-id -i .ssh/id_rsa.pub postgres@pg04
# # 验证是否授权完成:不提示密码,直接返回日期说明配置正确
ssh pg04 date
pg04上操作:
# 生成秘钥到用户主目录下的.ssh文件夹下
ssh-keygen -t rsa
# 将秘钥拷贝到远程机器
ssh-copy-id -i .ssh/id_rsa.pub postgres@pg03
# # 验证是否授权完成:不提示密码,直接返回日期说明配置正确
ssh pg03 date
主备测试链接:备库连主库
[postgres@pg04 ~]$ psql 'host=192.168.52.138 user=replicator dbname=postgres connect_timeout=2'
Password for user replicator:rep
编译repmgr-5.2.1
https://repmgr.org/download/repmgr-5.2.1.tar.gz
tar -zxvf repmgr-5.2.1.tar.gz
pg03上:
[postgres@pg03 ~]$ cat /etc/repmgr.conf
node_id=1
node_name=pg03
conninfo='host=192.168.52.138 user=replicator dbname=postgres connect_timeout=2'
data_directory='/postgresql/pgdata'
pg_bindir='/postgresql/pg13/bin'
pg04上:
[postgres@pg04 ~]$ cat /etc/repmgr.conf
node_id=2
node_name=pg04
conninfo='host=192.168.52.139 user=replicator dbname=postgres connect_timeout=2'
data_directory='/postgresql/pgdata'
pg_bindir='/postgresql/pg13/bin'
yum install flex -y
[root@pg03 repmgr-5.2.1]# ./configure --prefix=/postgresql/pg13
[root@pg03 repmgr-5.2.1]# make && make install
主库注册:
[postgres@pg03 ~]$ pg_ctl reload
server signaled
[postgres@pg03 ~]$ repmgr primary register
INFO: connecting to primary database...
NOTICE: attempting to install extension "repmgr"
NOTICE: "repmgr" extension successfully installed
NOTICE: primary node record (ID: 1) registered
[postgres@pg03 ~]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+------+---------+-----------+----------+----------+----------+----------+-----------------------------------------------------------------------
1 | pg03 | primary | * running | | default | 100 | 1 | host=192.168.52.138 user=replicator dbname=postgres connect_timeout=2
备库注册:
[postgres@pg04 ~]$ repmgr -h 192.168.52.138 -U replicator -d postgres -f /etc/repmgr.conf standby clone --dry-run
NOTICE: destination directory "/postgresql/pgdata" provided
INFO: connecting to source node
DETAIL: connection string is: host=192.168.52.138 user=replicator dbname=postgres
DETAIL: current installation size is 31 MB
INFO: "repmgr" extension is installed in database "postgres"
WARNING: target data directory appears to be a PostgreSQL data directory
DETAIL: target data directory is "/postgresql/pgdata"
HINT: use -F/--force to overwrite the existing data directory
INFO: replication slot usage not requested; no replication slot will be set up for this standby
INFO: parameter "max_wal_senders" set to 10
NOTICE: checking for available walsenders on the source node (2 required)
INFO: sufficient walsenders available on the source node
DETAIL: 2 required, 10 available
NOTICE: checking replication connections can be made to the source server (2 required)
INFO: required number of replication connections could be made to the source server
DETAIL: 2 replication connections required
WARNING: data checksums are not enabled and "wal_log_hints" is "off"
DETAIL: pg_rewind requires "wal_log_hints" to be enabled
NOTICE: standby will attach to upstream node 1
HINT: consider using the -c/--fast-checkpoint option
INFO: all prerequisites for "standby clone" are met
[postgres@pg04 ~]$ repmgr -h 192.168.52.138 -U replicator -d postgres -f /etc/repmgr.conf standby clone
NOTICE: destination directory "/postgresql/pgdata" provided
INFO: connecting to source node
DETAIL: connection string is: host=192.168.52.138 user=replicator dbname=postgres
DETAIL: current installation size is 31 MB
ERROR: target data directory appears to be a PostgreSQL data directory
DETAIL: target data directory is "/postgresql/pgdata"
HINT: use -F/--force to overwrite the existing data directory
[postgres@pg04 ~]$ rm -rf /postgresql/pgdata/*
[postgres@pg04 ~]$ repmgr -h 192.168.52.138 -U replicator -d postgres -f /etc/repmgr.conf standby clone
NOTICE: destination directory "/postgresql/pgdata" provided
INFO: connecting to source node
DETAIL: connection string is: host=192.168.52.138 user=replicator dbname=postgres
DETAIL: current installation size is 31 MB
INFO: replication slot usage not requested; no replication slot will be set up for this standby
NOTICE: checking for available walsenders on the source node (2 required)
NOTICE: checking replication connections can be made to the source server (2 required)
WARNING: data checksums are not enabled and "wal_log_hints" is "off"
DETAIL: pg_rewind requires "wal_log_hints" to be enabled
INFO: checking and correcting permissions on existing directory "/postgresql/pgdata"
NOTICE: starting backup (using pg_basebackup)...
HINT: this may take some time; consider using the -c/--fast-checkpoint option
INFO: executing:
pg_basebackup -l "repmgr base backup" -D /postgresql/pgdata -h 192.168.52.138 -p 5432 -U replicator -X stream
WARNING: skipping special file "./.s.PGSQL.5432"
WARNING: skipping special file "./.s.PGSQL.5432"
NOTICE: standby clone (using pg_basebackup) complete
NOTICE: you can now start your PostgreSQL server
HINT: for example: pg_ctl -D /postgresql/pgdata start
HINT: after starting the server, you need to register this standby with "repmgr standby register"
[postgres@pg04 ~]$ pg_ctl -D /postgresql/pgdata start
waiting for server to start....2021-09-22 10:07:56.869 CST [2362] LOG: redirecting log output to logging collector process
2021-09-22 10:07:56.869 CST [2362] HINT: Future log output will appear in directory "pg_log".
done
server started
[postgres@pg04 ~]$ repmgr standby register
INFO: connecting to local node "pg04" (ID: 2)
INFO: connecting to primary database
WARNING: --upstream-node-id not supplied, assuming upstream node is primary (node ID 1)
INFO: standby registration complete
NOTICE: standby node "pg04" (ID: 2) successfully registered
[postgres@pg04 ~]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+------+---------+-----------+----------+----------+----------+----------+-----------------------------------------------------------------------
1 | pg03 | primary | * running | | default | 100 | 1 | host=192.168.52.138 user=replicator dbname=postgres connect_timeout=2
2 | pg04 | standby | running | pg03 | default | 100 | 1 | host=192.168.52.139 user=replicator dbname=postgres connect_timeout=2
[postgres@pg04 ~]$ psql
psql (13.4)
Type "help" for help.
postgres=# select * from pg_stat_replication;
pid | usesysid | usename | application_name | client_addr | client_hostname | client_port | backend_start | backend_xmin | state | sent_lsn
| write_lsn | flush_lsn | replay_lsn | write_lag | flush_lag | replay_lag | sync_priority | sync_state | reply_time
-----+----------+---------+------------------+-------------+-----------------+-------------+---------------+--------------+-------+----------
+-----------+-----------+------------+-----------+-----------+------------+---------------+------------+------------
(0 rows)
postgres=# \x
Expanded display is on.
+-----------+-----------+------------+-----------+-----------+------------+---------------+------------+---
(0 rows)
postgres=# select * from pg_stat_wal_receiver;
-[ RECORD 1 ]---------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
pid | 2368
status | streaming
receive_start_lsn | 0/8000000
receive_start_tli | 1
written_lsn | 0/8000570
flushed_lsn | 0/8000570
received_tli | 1
last_msg_send_time | 2021-09-22 10:08:35.703193+08
last_msg_receipt_time | 2021-09-22 10:08:35.714032+08
latest_end_lsn | 0/8000570
latest_end_time | 2021-09-22 10:08:05.694295+08
slot_name |
sender_host | 192.168.52.138
sender_port | 5432
conninfo | user=replicator passfile=/home/postgres/.pgpass channel_binding=disable connect_timeout=2 dbname=replication host=192.168.52.138 port=5432 application_name=pg04 fallback_application_name=walreceiver sslmode=disable sslcompression=0 ssl_min_protocol_version=TLSv1.2 gssencmode=disable krbsrvname=postgres target_session_attrs=any
主备库切换测试:
在备库做switchover
[postgres@pg04 ~]$ repmgr standby switchover
在备库做promote激活
主库无法恢复的情况下,在备库操作激活动作。
[postgres@pg04 ~]$ repmgr standby promote
NOTICE: promoting standby to primary
DETAIL: promoting server "pg04" (ID: 2) using pg_promote()
NOTICE: waiting up to 60 seconds (parameter "promote_check_timeout") for promotion to complete
NOTICE: STANDBY PROMOTE successful
DETAIL: server "pg04" (ID: 2) was successfully promoted to primary
[postgres@pg04 ~]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+------+---------+-----------+----------+----------+----------+----------+-----------------------------------------------------------------------
1 | pg03 | primary | - failed | ? | default | 100 | | host=192.168.52.138 user=replicator dbname=postgres connect_timeout=2
2 | pg04 | primary | * running | | default | 100 | 4 | host=192.168.52.139 user=replicator dbname=postgres connect_timeout=2
WARNING: following issues were detected
- unable to connect to node "pg03" (ID: 1)
HINT: execute with --verbose option to see connection error messages
等主库恢复后,重新作为备库加入:
repmgr node rejoin -h192.168.52.139 -Upostgres -dpostgres --force-rewind --config-files=postgresql.conf,postgresql.auto.conf --verbose
[postgres@pg03 ~]$ repmgr node rejoin -d 'host=192.168.52.139 port=5432 user=replicator dbname=postgres' --force-rewind --config-files=postgresql.conf,postgresql.auto.conf --verbose
INFO: looking for configuration file in /etc
INFO: configuration file found at: "/etc/repmgr.conf"
NOTICE: pg_rewind execution required for this node to attach to rejoin target node 2
DETAIL: rejoin target server's timeline 4 forked off current database system timeline 3 before current recovery point 0/C000028
INFO: prerequisites for using pg_rewind are met
INFO: 2 files copied to "/tmp/repmgr-config-archive-pg03"
NOTICE: executing pg_rewind
DETAIL: pg_rewind command is "/postgresql/pg13/bin/pg_rewind -D '/postgresql/pgdata' --source-server='host=192.168.52.139 user=replicator dbname=postgres connect_timeout=2'"
NOTICE: 2 files copied to /postgresql/pgdata
INFO: directory "/tmp/repmgr-config-archive-pg03" deleted
NOTICE: setting node 1's upstream to node 2
WARNING: unable to ping "host=192.168.52.138 user=replicator dbname=postgres connect_timeout=2"
DETAIL: PQping() returned "PQPING_NO_RESPONSE"
NOTICE: starting server using "/postgresql/pg13/bin/pg_ctl -w -D '/postgresql/pgdata' start"
INFO: node "pg03" (ID: 1) is pingable
INFO: node "pg03" (ID: 1) has attached to its upstream node
NOTICE: NODE REJOIN successful
DETAIL: node 1 is now attached to node 2
[postgres@pg03 ~]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+------+---------+-----------+----------+----------+----------+----------+-----------------------------------------------------------------------
1 | pg03 | standby | running | pg04 | default | 100 | 3 | host=192.168.52.138 user=replicator dbname=postgres connect_timeout=2
或者clone恢复:
repmgr standby clone -h 192.168.52.139 -Upostgres
最后主备又可正常switchover了。
[postgres@pg03 ~]$ psql
psql (13.4)
Type "help" for help.
postgres=# \l
List of databases
Name | Owner | Encoding | Collate | Ctype | Access privileges
-----------+----------+----------+------------+------------+-----------------------
jyc | postgres | UTF8 | en_US.utf8 | en_US.utf8 |
postgres | postgres | UTF8 | en_US.utf8 | en_US.utf8 |
template0 | postgres | UTF8 | en_US.utf8 | en_US.utf8 | =c/postgres +
| | | | | postgres=CTc/postgres
template1 | postgres | UTF8 | en_US.utf8 | en_US.utf8 | =c/postgres +
| | | | | postgres=CTc/postgres
(4 rows)
postgres=# \dt
Did not find any relations.
postgres=# pwd
postgres-# \c postgres
You are now connected to database "postgres" as user "postgres".
postgres-# \dt
Did not find any relations.
postgres-# \dx
List of installed extensions
Name | Version | Schema | Description
---------+---------+------------+------------------------------------
plpgsql | 1.0 | pg_catalog | PL/pgSQL procedural language
repmgr | 5.2 | repmgr | Replication manager for PostgreSQL
(2 rows)
postgres-# \du
List of roles
Role name | Attributes | Member of
------------+------------------------------------------------------------+-----------
postgres | Superuser, Create role, Create DB, Replication, Bypass RLS | {}
replicator | Superuser, Replication | {}
postgres-# \dt+
Did not find any relations.
postgres-# \q
[postgres@pg03 ~]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+------+---------+-----------+----------+----------+----------+----------+-----------------------------------------------------------------------
1 | pg03 | primary | * running | | default | 100 | 5 | host=192.168.52.138 user=replicator dbname=postgres connect_timeout=2
2 | pg04 | standby | running | pg03 | default | 100 | 5 | host=192.168.52.139 user=replicator dbname=postgres connect_timeout=2
[postgres@pg03 ~]$