ceph 版本
# ceph --version
ceph version 13.2.10 () mimic (stable)
尝试重启osd
systemctl status ceph-osd@42.service
systemctl restart ceph-osd@42.service
journalctl -f -u ceph-osd@42.service
查看osd挂载路径
# ceph osd tree
host k004
42 hdd 14.55269 osd.42 down 0 1.00000
# ls -lh /var/lib/ceph/osd/ceph-*/block | grep a2f1287c
lrwxrwxrwx 1 ceph ceph 93 Apr 17 2021 /var/lib/ceph/osd/ceph-42/block -> /dev/ceph-a2f1287c-d6d2-4831-8434-8d847770a6e5/osd-block-2cc95b8b-3c44-4cd8-9d36-5dba20f2efbd
# lsblk | grep a2f1287c -B 1
sds 65:32 0 14.6T 0 disk
└─ceph--a2f1287c--d6d2--4831--8434--8d847770a6e5-osd--block--2cc95b8b--3c44--4cd8--9d36--5dba20f2efbd 253:18 0 14.6T 0 lvm
重启osd
- 通常情况下,简单的重启 ceph-osd 进程就可以让它重回集群并恢复
systemctl restart ceph-osd@42.service
journalctl -f -u ceph-osd@42.service
1. 踢出OSD (out)
ceph osd out osd.42
ceph -s
systemctl stop ceph-osd@42.service
systemctl status ceph-osd@42.service
2. 删除OSD (rm)
ceph osd rm osd.42
# ceph -s
cluster:
id: cfa5430b-
health: HEALTH_WARN
1 osds exist in the crush map but not in the osdmap
3. 从CRUSH中删除 (crush)
ceph osd crush rm osd.42
4. 删除osd 的认证信息 (auth)
ceph auth del osd.42
5. 卸载OSD挂载
umount /var/lib/ceph/osd/ceph-42
6. lvm删除
# lvdisplay | grep "LV Path" | grep a2f1287c
LV Path /dev/ceph-a2f1287c-d6d2-4831-8434-8d847770a6e5/osd-block-2cc95b8b-3c44-4cd8-9d36-5dba20f2efbd
# lvremove /dev/ceph-a2f1287c-d6d2-4831-8434-8d847770a6e5/osd-block-2cc95b8b-3c44-4cd8-9d36-5dba20f2efbd
Do you really want to remove and DISCARD active logical volume ceph-a2f1287c-d6d2-4831-8434-8d847770a6e5/osd-block-2cc95b8b-3c44-4cd8-9d36-5dba20f2efbd? [y/n]: y
Logical volume "osd-block-2cc95b8b-3c44-4cd8-9d36-5dba20f2efbd" successfully removed
# vgremove ceph-a2f1287c-d6d2-4831-8434-8d847770a6e5
Volume group "ceph-a2f1287c-d6d2-4831-8434-8d847770a6e5" successfully removed
# pvremove /dev/sds
Labels on physical volume "/dev/sds" successfully wiped.
7. 格式化磁盘
# mkfs.ext4 /dev/sds
8. 添加osd
# cd my-cluster/
# ceph-deploy osd create k004 --data /dev/sds
9. 检查 osd 重新添加成功
# ceph osd tree
51 hdd 14.55269 osd.42 up 1.00000 1.00000
10. ceph集群状态
# ceph health detail
OBJECT_MISPLACED 5052101/186196351 objects misplaced (2.713%)