需要有一套集群环境或者redis主从环境,然后配置哨兵监控主节点,之后进行故障转移测试
可参考之前写的文章,这里我用主从模式。
主机说明
主机IP | Redis端口号 | Sentinel端口 | 说明 |
---|---|---|---|
192.168.9.38 | 6379 | 26379 | master |
192.168.9.39 | 6379 | 26379 | slave01 |
192.168.9.68 | 6379 | 26379 | slave02 |
Redis目录结构(192.168.9.38)
[root@redis redis]# pwd
/opt/redis
[root@mysql01 redis]# tree
.
├── bin
│ ├── redis-benchmark
│ ├── redis-check-aof
│ ├── redis-check-rdb
│ ├── redis-cli
│ ├── redis-migrate-tool
│ ├── redis-sentinel -> redis-server
│ └── redis-server
├── conf
│ └── 6379
│ ├── db
│ │ ├── dump.rdb
│ │ └── nodes-6379.conf
│ ├── log
│ │ └── redis_6379.log
│ ├── redis.conf
│ ├── sentinel
│ │ └── redis-sentinel.log
│ └── sentinel.conf
└── scripts
├── creat-instance.sh
└── node-env-init.sh
7 directories, 15 files
详细配置文件说明可参考如下文章
# 访问控制关闭
protected-mode no
port 26379
daemonize yes
pidfile /var/run/redis-sentinel.pid
dir /opt/redis/conf/6379/sentinel
logfile /opt/redis/conf/6379/sentinel/redis-sentinel.log
# 哨兵监控的master节点,后⾯面这个2是指⼏几个哨兵发现master宕机了了,才进⾏行行故障转移
sentinel monitor mymaster 192.168.9.38 6379 2
# 设置主机访问密码
sentinel auth-pass mymaster 12345678
# 哨兵多久连接不不上master就认为master宕机了了的时间,单位是毫秒,默认30秒
sentinel down-after-milliseconds mymaster 3000
# 新的master出现后,其他follow的slave并⾏同步的个数,并⾏的越多同时阻塞的就越多,设置1来保证每次只有一个slave 处于不能处理命令请求的状态
sentinel parallel-syncs mymaster 1
分发配置文件到其它节点上,若有多实例配置可以以端口号区分目录
scp /opt/redis/conf/6379/sentinel.conf 192.168.9.39:/opt/redis/conf/6379/
scp /opt/redis/conf/6379/sentinel.conf 192.168.9.68:/opt/redis/conf/6379/
以 192.168.9.38 为例
# 启动
/opt/redis/bin/redis-sentinel /opt/redis/conf/6379/sentinel.conf
# 查看master状态
[root@redis 6379]# /opt/redis/bin/redis-cli -h 192.168.9.38 -p 26379
192.168.9.38:26379> sentinel master mymaster
1) "name"
2) "mymaster"
3) "ip"
4) "192.168.9.38"
5) "port"
6) "6379"
......
# 查看slave状态
192.168.9.38:26379> sentinel slaves mymaster
1) 1) "name"
2) "192.168.9.68:6379"
3) "ip"
4) "192.168.9.68"
5) "port"
6) "6379"
......
2) 1) "name"
2) "192.168.9.39:6379"
3) "ip"
4) "192.168.9.39"
5) "port"
6) "6379"
......
# 查看Sentinel状态
192.168.9.38:26379> sentinel sentinels mymaster
1) 1) "name"
2) "879c18048a7ba013354dbca3f1d1e8ffbe1b2bb9"
3) "ip"
4) "192.168.9.68"
5) "port"
6) "26379"
......
2) 1) "name"
2) "9171622bf154b7e48a988743694ed3f9025916ed"
3) "ip"
4) "192.168.9.39"
5) "port"
6) "26379"
操作步骤
# 1. 实时查看192.168.9.38的Sentinel日志
[root@redis ~]# tailf /opt/redis/conf/6379/sentinel/redis-sentinel.log
# 2. kill master节点进程
kill -9 `netstat -lntup | grep redis-server | awk -F'[ /]+' '{print $7}'`
# 3. 查看192.168.9.38的Sentinel日志会发现master已经重新选举
# 新的master已经切换到192.168.9.68节点上了
1355:X 06 Jul 2020 19:20:57.501 # +sdown master mymaster 192.168.9.38 6379
1355:X 06 Jul 2020 19:20:58.566 # +odown master mymaster 192.168.9.38 6379 #quorum 3/2
1355:X 06 Jul 2020 19:21:24.774 # +new-epoch 66
1355:X 06 Jul 2020 19:21:24.774 # +try-failover master mymaster 192.168.9.38 6379
1355:X 06 Jul 2020 19:21:24.871 # +vote-for-leader 2badff7d06626470131b75b1cf69ea643f4780ce 66
1355:X 06 Jul 2020 19:21:25.072 # 9171622bf154b7e48a988743694ed3f9025916ed voted for 2badff7d06626470131b75b1cf69ea643f4780ce 66
1355:X 06 Jul 2020 19:21:25.072 # 879c18048a7ba013354dbca3f1d1e8ffbe1b2bb9 voted for 2badff7d06626470131b75b1cf69ea643f4780ce 66
1355:X 06 Jul 2020 19:21:25.118 # +elected-leader master mymaster 192.168.9.38 6379
1355:X 06 Jul 2020 19:21:25.118 # +failover-state-select-slave master mymaster 192.168.9.38 6379
1355:X 06 Jul 2020 19:21:25.180 # +selected-slave slave 192.168.9.68:6379 192.168.9.68 6379 @ mymaster 192.168.9.38 6379
1355:X 06 Jul 2020 19:21:25.180 * +failover-state-send-slaveof-noone slave 192.168.9.68:6379 192.168.9.68 6379 @ mymaster 192.168.9.38 6379
1355:X 06 Jul 2020 19:21:25.256 * +failover-state-wait-promotion slave 192.168.9.68:6379 192.168.9.68 6379 @ mymaster 192.168.9.38 6379
1355:X 06 Jul 2020 19:21:25.788 # +promoted-slave slave 192.168.9.68:6379 192.168.9.68 6379 @ mymaster 192.168.9.38 6379
1355:X 06 Jul 2020 19:21:25.788 # +failover-state-reconf-slaves master mymaster 192.168.9.38 6379
1355:X 06 Jul 2020 19:21:25.806 * +slave-reconf-sent slave 192.168.9.39:6379 192.168.9.39 6379 @ mymaster 192.168.9.38 6379
1355:X 06 Jul 2020 19:21:26.159 # -odown master mymaster 192.168.9.38 6379
1355:X 06 Jul 2020 19:21:26.646 * +slave-reconf-inprog slave 192.168.9.39:6379 192.168.9.39 6379 @ mymaster 192.168.9.38 6379
1355:X 06 Jul 2020 19:21:26.647 * +slave-reconf-done slave 192.168.9.39:6379 192.168.9.39 6379 @ mymaster 192.168.9.38 6379
1355:X 06 Jul 2020 19:21:26.716 # +failover-end master mymaster 192.168.9.38 6379
1355:X 06 Jul 2020 19:21:26.717 # +switch-master mymaster 192.168.9.38 6379 192.168.9.68 6379
1355:X 06 Jul 2020 19:21:26.717 * +slave slave 192.168.9.39:6379 192.168.9.39 6379 @ mymaster 192.168.9.68 6379
1355:X 06 Jul 2020 19:21:26.717 * +slave slave 192.168.9.38:6379 192.168.9.38 6379 @ mymaster 192.168.9.68 6379
1355:X 06 Jul 2020 19:21:29.753 # +sdown slave 192.168.9.38:6379 192.168.9.38 6379 @ mymaster 192.168.9.68 6379
# 4. 查看192.168.9.68是否已经是master节点并进行读写操作
# 因为这里的redis主从配置的从节点只读,故如果192.168.9.68没有切换成master是不能进行写操作的
[root@redis ~]# /opt/redis/bin/redis-cli -h 192.168.9.68 -p 26379
192.168.9.68:26379> sentinel master mymaster
1) "name"
2) "mymaster"
3) "ip"
4) "192.168.9.68"
5) "port"
6) "6379"
......
[root@redis ~]# /opt/redis/bin/redis-cli -h 192.168.9.68 -a 12345678
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
192.168.9.68:6379> set name RSQ
OK
192.168.9.68:6379> get name
"RSQ"
# 5. 把之前的master的redis实例启动起来(192.168.9.38)
[root@redis 6379]# /opt/redis/bin/redis-server /opt/redis/conf/6379/redis.conf
[root@redis 6379]# /opt/redis/bin/redis-cli -h 192.168.9.38 -a 12345678
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
192.168.9.38:6379> info Replication
# Replication
role:slave
master_host:192.168.9.68
master_port:6379
master_link_status:up
master_last_io_seconds_ago:0
master_sync_in_progress:0
slave_repl_offset:111692
slave_priority:100
slave_read_only:1
connected_slaves:0
master_replid:8f8986ae850f233f7531fa63ac5f3c22e3a5b4c9
master_replid2:0000000000000000000000000000000000000000
master_repl_offset:111692
second_repl_offset:-1
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:94624
repl_backlog_histlen:17069
# 检查192.168.9.38的redis.conf配置文件,会发现之前的master被Sentinel自动修改了配置文件
[root@redis 6379]# grep "^replicaof" /opt/redis/conf/6379/redis.conf
replicaof 192.168.9.68 6379
故障转移测试成功