[root@ceph-deploy ~]#ceph health detail
HEALTH_ERR 1/429 objects unfound (0.233%); Reduced data availability: 1 pg inactive; Possible data damage: 1 pg recovery_unfound; Degraded data redundancy: 5/1287 objects degraded (0.389%), 1 pg degraded, 1 pg undersized; 1/3 mons down, quorum ceph-mon1,ceph-mon3
OBJECT_UNFOUND 1/429 objects unfound (0.233%)
pg 10.f has 1 unfound objects
PG_AVAILABILITY Reduced data availability: 1 pg inactive
pg 10.f is stuck inactive for 4506.434898, current state recovery_unfound+undersized+degraded+remapped+peered, last acting [0]
PG_DAMAGED Possible data damage: 1 pg recovery_unfound
pg 10.f is recovery_unfound+undersized+degraded+remapped+peered, acting [0], 1 unfound
PG_DEGRADED Degraded data redundancy: 5/1287 objects degraded (0.389%), 1 pg degraded, 1 pg undersized
pg 10.f is stuck undersized for 901.638790, current state recovery_unfound+undersized+degraded+remapped+peered, last acting [0]
MON_DOWN 1/3 mons down, quorum ceph-mon1,ceph-mon3
mon.ceph-mon2 (rank 1) addr 10.0.0.27:6789/0 is down (out of quorum)
ceph集群的mon2节点服务无法启动
日志:
[root@ceph-mon2 ~]#zcat /var/log/ceph/ceph-mon.ceph-mon2.log-20220924.gz
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Write Ahead Log file in /var/lib/ceph/mon/ceph-ceph-mon2/store.db:
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.error_if_exists: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.create_if_missing: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.paranoid_checks: 1
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.statistics: (nil)
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.use_fsync: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: table_factory options: flush_block_policy_factory: FlushBlockBySizePolicyFactory (0x55b446dc9458)
cache_index_and_filter_blocks: 1
cache_index_and_filter_blocks_with_high_priority: 1
pin_l0_filter_and_index_blocks_in_cache: 1
index_type: 0
hash_index_allow_collision: 1
checksum: 1
no_block_cache: 0
block_cache: 0x55b4472180f0
block_cache_name: BinnedLRUCache
block_cache_options:
capacity : 536870912
num_shard_bits : 4
strict_capacity_limit : 0
high_pri_pool_ratio: 0.000
block_cache_compressed: (nil)
persistent_cache: (nil)
block_size: 4096
block_size_deviation: 10
block_restart_interval: 16
index_block_restart_interval: 1
metadata_block_size: 4096
partition_filters: 0
use_delta_encoding: 1
filter_policy: rocksdb.BuiltinBloomFilter
whole_key_filtering: 1
verify_compression: 0
read_amp_bytes_per_bit: 0
format_version: 2
enable_index_compression: 1
block_align: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.write_buffer_size: 33554432
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.max_write_buffer_number: 2
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compression: NoCompression
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.bottommost_compression: Disabled
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.prefix_extractor: nullptr
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.memtable_insert_with_hint_prefix_extractor: nullptr
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.num_levels: 7
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.min_write_buffer_number_to_merge: 1
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.max_write_buffer_number_to_maintain: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compression_opts.window_bits: -14
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compression_opts.level: -1
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compression_opts.strategy: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compression_opts.max_dict_bytes: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compression_opts.zstd_max_train_bytes: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.level0_file_num_compaction_trigger: 4
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.level0_slowdown_writes_trigger: 20
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.level0_stop_writes_trigger: 36
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.target_file_size_base: 67108864
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.target_file_size_multiplier: 1
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.max_bytes_for_level_base: 268435456
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.level_compaction_dynamic_level_bytes: 1
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.max_bytes_for_level_multiplier: 10.000000
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.max_bytes_for_level_multiplier_addtl[0]: 1
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.max_bytes_for_level_multiplier_addtl[1]: 1
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.max_bytes_for_level_multiplier_addtl[2]: 1
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.max_bytes_for_level_multiplier_addtl[3]: 1
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.max_bytes_for_level_multiplier_addtl[4]: 1
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.max_bytes_for_level_multiplier_addtl[5]: 1
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.max_bytes_for_level_multiplier_addtl[6]: 1
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.max_sequential_skip_in_iterations: 8
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.max_compaction_bytes: 1677721600
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.arena_block_size: 4194304
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.soft_pending_compaction_bytes_limit: 68719476736
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.hard_pending_compaction_bytes_limit: 274877906944
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.rate_limit_delay_max_milliseconds: 100
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.disable_auto_compactions: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compaction_style: kCompactionStyleLevel
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compaction_pri: kByCompensatedSize
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compaction_options_universal.size_ratio: 1
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compaction_options_universal.min_merge_width: 2
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compaction_options_universal.max_merge_width: 4294967295
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compaction_options_universal.max_size_amplification_percent: 200
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compaction_options_universal.compression_size_percent: -1
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compaction_options_universal.stop_style: kCompactionStopStyleTotalSize
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compaction_options_fifo.max_table_files_size: 1073741824
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compaction_options_fifo.allow_compaction: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.compaction_options_fifo.ttl: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.table_properties_collectors:
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.inplace_update_support: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.inplace_update_num_locks: 10000
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.memtable_prefix_bloom_size_ratio: 0.000000
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.memtable_huge_page_size: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.bloom_locality: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.max_successive_merges: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.optimize_filters_for_hits: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.paranoid_file_checks: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.force_consistency_checks: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.report_bg_io_stats: 0
2022-09-24 08:52:50.502 7f48030c1a00 4 rocksdb: Options.ttl: 0
2022-09-24 08:52:50.504 7f48030c1a00 2 rocksdb: [/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos7/DIST/centos7/MACHINE_SIZE/gigantic/release/13.2.10/rpm/el7/BUILD/ceph-13.2.10/src/rocksdb/db/version_set.cc:1330] Unable to load table properties for file 1415 --- IO error: While open a file for random read: /var/lib/ceph/mon/ceph-ceph-mon2/store.db/001415.sst: No such file or directory
2022-09-24 08:52:50.504 7f48030c1a00 2 rocksdb: [/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos7/DIST/centos7/MACHINE_SIZE/gigantic/release/13.2.10/rpm/el7/BUILD/ceph-13.2.10/src/rocksdb/db/version_set.cc:1330] Unable to load table properties for file 1416 --- IO error: While open a file for random read: /var/lib/ceph/mon/ceph-ceph-mon2/store.db/001416.sst: No such file or directory
2022-09-24 08:52:50.504 7f48030c1a00 4 rocksdb: [/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos7/DIST/centos7/MACHINE_SIZE/gigantic/release/13.2.10/rpm/el7/BUILD/ceph-13.2.10/src/rocksdb/db/version_set.cc:3362] Recovered from manifest file:/var/lib/ceph/mon/ceph-ceph-mon2/store.db/MANIFEST-001370 succeeded,manifest_file_number is 1370, next_file_number is 1418, last_sequence is 605597, log_number is 0,prev_log_number is 0,max_column_family is 0,deleted_log_number is 1409
2022-09-24 08:52:50.504 7f48030c1a00 4 rocksdb: [/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos7/DIST/centos7/MACHINE_SIZE/gigantic/release/13.2.10/rpm/el7/BUILD/ceph-13.2.10/src/rocksdb/db/version_set.cc:3370] Column family [default] (ID 0), log number is 1413
2022-09-24 08:52:50.504 7f48030c1a00 4 rocksdb: [/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos7/DIST/centos7/MACHINE_SIZE/gigantic/release/13.2.10/rpm/el7/BUILD/ceph-13.2.10/src/rocksdb/db/db_impl.cc:252] Shutdown: canceling all background work
2022-09-24 08:52:50.504 7f48030c1a00 4 rocksdb: [/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos7/DIST/centos7/MACHINE_SIZE/gigantic/release/13.2.10/rpm/el7/BUILD/ceph-13.2.10/src/rocksdb/db/db_impl.cc:397] Shutdown complete
2022-09-24 08:52:50.504 7f48030c1a00 -1 rocksdb: Corruption: Can't access /001415.sst: IO error: while stat a file for size: /var/lib/ceph/mon/ceph-ceph-mon2/store.db/001415.sst: No such file or directory
Can't access /001416.sst: IO error: while stat a file for size: /var/lib/ceph/mon/ceph-ceph-mon2/store.db/001416.sst: No such file or directory
2022-09-24 08:52:50.504 7f48030c1a00 -1 error opening mon data directory at '/var/lib/ceph/mon/ceph-ceph-mon2': (22) Invalid argument
# 找不到mon的数据库
执行
[root@ceph-node3 ~]#systemctl stop ceph-osd.target
[root@ceph-node3 ~]#systemctl status ceph-radosgw.target
● ceph-radosgw.target - ceph target allowing to start/stop all ceph-radosgw@.service instances at once
Loaded: loaded (/usr/lib/systemd/system/ceph-radosgw.target; enabled; vendor preset: enabled)
Active: active since Sat 2022-09-24 10:22:08 CST; 16min ago
[root@ceph-node3 ~]#systemctl stop ceph-radosgw.target
[root@ceph-node3 ~]#mkdir /tmp/mon-store
[root@ceph-node3 ~]#for osd in /var/lib/ceph/osd/ceph-1
ceph-10/ ceph-11/ ceph-12/ ceph-13/ ceph-14/
[root@ceph-node3 ~]#for osd in /var/lib/ceph/osd/ceph-*;do ceph-objectstore-tool --data-path $osd --op update-mon-db --mon-store-path /tmp/mon-store --no-mon-config;done
osd.10 : 0 osdmaps trimmed, 297 osdmaps added.
osd.11 : 0 osdmaps trimmed, 0 osdmaps added.
osd.12 : 0 osdmaps trimmed, 0 osdmaps added.
Mount failed with '(5) Input/output error'
osd.14 : 0 osdmaps trimmed, 0 osdmaps added.
[root@ceph-node3 ~]#rsync -avz /tmp/mon-store/ root@ceph-node2:/tmp/mon-store/
[root@ceph-node2 ~]#systemctl stop ceph-osd.target
[root@ceph-node2 ~]#systemctl stop ceph-radosgw.target
[root@ceph-node2 ~]#for osd in /var/lib/ceph/osd/ceph-*;do ceph-objectstore-tool --data-path $osd --op update-mon-db --mon-store-path /tmp/mon-store --no-mon-config;done
osd.5 : 0 osdmaps trimmed, 0 osdmaps added.
Mount failed with '(5) Input/output error'
osd.7 : 0 osdmaps trimmed, 0 osdmaps added.
osd.8 : 0 osdmaps trimmed, 0 osdmaps added.
osd.9 : 0 osdmaps trimmed, 0 osdmaps added.
[root@ceph-node2 ~]#rsync -avz /tmp/mon-store/ root@ceph-node1:/tmp/mon-store/
[root@ceph-node1 ~]#systemctl stop ceph-osd.target
[root@ceph-node1 ~]#systemctl stop ceph-radosgw.target
[root@ceph-node1 ~]#for osd in /var/lib/ceph/osd/ceph-*;do ceph-objectstore-tool --data-path $osd --op update-mon-db --mon-store-path /tmp/mon-store --no-mon-config;done
osd.0 : 0 osdmaps trimmed, 0 osdmaps added.
osd.1 : 0 osdmaps trimmed, 0 osdmaps added.
osd.2 : 0 osdmaps trimmed, 0 osdmaps added.
osd.3 : 0 osdmaps trimmed, 0 osdmaps added.
osd.4 : 0 osdmaps trimmed, 0 osdmaps added.
[root@ceph-node1 ~]#ls /tmp/mon-store/
kv_backend store.db
[root@ceph-node1 ~]#ceph-mon
ceph-mon ceph-monstore-tool
[root@ceph-node1 ~]#ceph-monstore-tool /tmp/mon-store/ rebuild -- --keyring /etc/ceph/ceph.client.admin.keyring
# 将收集到的osd数据复制到损坏的ceph-mon2节点
[root@ceph-node1 ~]#scp -r /tmp/mon-store/* root@ceph-mon2:/root/mon-store/
执行恢复
[root@ceph-mon2 ~]#mv /var/lib/ceph/mon/ceph-ceph-mon2/store.db /root/
[root@ceph-mon2 ~]#cp -rav mon-store/store.db /var/lib/ceph/mon/ceph-ceph-mon2/
[root@ceph-mon2 ~]#chown -R ceph.ceph /var/lib/ceph/mon/ceph-ceph-mon2/store.db/
[root@ceph-mon2 ~]#chmod 755 -R /var/lib/ceph/mon/
验证
[root@ceph-mon2 ~]#systemctl restart ceph-mon.target
[root@ceph-mon2 ~]#systemctl status ceph-mon.target
● ceph-mon.target - ceph target allowing to start/stop all ceph-mon@.service instances at once
Loaded: loaded (/usr/lib/systemd/system/ceph-mon.target; enabled; vendor preset: enabled)
Active: active since Sat 2022-09-24 10:56:33 CST; 3s ago
Sep 24 10:56:33 ceph-mon2.kktb.local systemd[1]: Reached target ceph target allowing to start/stop all ceph-mon@.service instances at once.
验证节点状态
ceph -s
services:
mon: 3 daemons, quorum ceph-mon1,ceph-mon2,ceph-mon3
https://blog.csdn.net/qq_33218245/article/details/103255740
不同ceph集群的状态记录
[ceph@ceph-deploy ceph-cluster]$ceph -s
cluster:
id: 0fe4535c-4924-4cf3-bdd3-ae7d17577e6d
health: HEALTH_ERR
noout flag(s) set
1/429 objects unfound (0.233%)
Reduced data availability: 1 pg inactive, 1 pg peering
Possible data damage: 1 pg recovery_unfound
Degraded data redundancy: 12/1287 objects degraded (0.932%), 3 pgs degraded
clock skew detected on mon.ceph-mon3
services:
mon: 3 daemons, quorum ceph-mon1,ceph-mon2,ceph-mon3
mgr: ceph-mgr1(active), standbys: ceph-mgr2
mds: mycephfs-1/1/1 up {0=ceph-mgr1=up:active}
osd: 15 osds: 15 up, 15 in; 1 remapped pgs
flags noout
rgw: 1 daemon active
data:
pools: 10 pools, 544 pgs
objects: 429 objects, 724 MiB
usage: 18 GiB used, 282 GiB / 300 GiB avail
pgs: 0.368% pgs not active
12/1287 objects degraded (0.932%)
1/429 objects unfound (0.233%)
539 active+clean
2 active+recovery_wait+degraded
1 active+recovering
1 peering
1 recovery_unfound+undersized+degraded+remapped+peered
io:
recovery: 22 MiB/s, 1 keys/s, 10 objects/s
查看ceph的状态
[ceph@ceph-deploy ceph-cluster]$ceph -s
cluster:
id: 0fe4535c-4924-4cf3-bdd3-ae7d17577e6d
health: HEALTH_ERR
noout flag(s) set
3/1287 objects misplaced (0.233%)
1/429 objects unfound (0.233%)
Reduced data availability: 1 pg inactive
Possible data damage: 1 pg recovery_unfound
Degraded data redundancy: 23/1287 objects degraded (1.787%), 9 pgs degraded, 1 pg undersized
clock skew detected on mon.ceph-mon3
有两块osd为down
[root@ceph-deploy ~]#ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.29228 root default
-5 0.09743 host ceph-node1
0 hdd 0.01949 osd.0 up 1.00000 1.00000
1 hdd 0.01949 osd.1 up 1.00000 1.00000
2 hdd 0.01949 osd.2 up 1.00000 1.00000
3 hdd 0.01949 osd.3 up 1.00000 1.00000
4 hdd 0.01949 osd.4 up 1.00000 1.00000
-7 0.09743 host ceph-node2
5 hdd 0.01949 osd.5 up 1.00000 1.00000
6 hdd 0.01949 osd.6 down 0 1.00000
7 hdd 0.01949 osd.7 up 1.00000 1.00000
8 hdd 0.01949 osd.8 up 1.00000 1.00000
9 hdd 0.01949 osd.9 up 1.00000 1.00000
-3 0.09743 host ceph-node3
10 hdd 0.01949 osd.10 up 1.00000 1.00000
11 hdd 0.01949 osd.11 down 0 1.00000
12 hdd 0.01949 osd.12 down 0 1.00000
13 hdd 0.01949 osd.13 down 0 1.00000
14 hdd 0.01949 osd.14 up 1.00000 1.00000
先解决osd13
先将osd13踢出集群
将osd集群状态标记为out再进行操作
[ceph@ceph-deploy ceph-cluster]$ceph osd out 13
osd.13 is already out.
[ceph@ceph-deploy ceph-cluster]$ceph osd purge 13 --yes-i-really-mean-it
purged osd.13
# 停止服务
[root@ceph-node3 ~]#systemctl stop ceph-osd@13.service
查看osd与磁盘之间的对应关系
[root@ceph-node3 ~]#ls /var/lib/ceph/osd/ceph-13 -l
total 24
lrwxrwxrwx 1 ceph ceph 93 Sep 24 10:22 block -> /dev/ceph-73c83d35-614e-478e-aaa9-d9008b6182d4/osd-block-2c86a621-dd60-4322-a625-60545a713e43
-rw------- 1 ceph ceph 37 Sep 24 10:22 ceph_fsid
-rw------- 1 ceph ceph 37 Sep 24 10:22 fsid
-rw------- 1 ceph ceph 56 Sep 24 10:22 keyring
-rw------- 1 ceph ceph 6 Sep 24 10:22 ready
-rw------- 1 ceph ceph 10 Sep 24 10:22 type
-rw------- 1 ceph ceph 3 Sep 24 10:22 whoami
[root@ceph-node3 ~]#lsblk -l
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sda 8:0 0 40G 0 disk
sda1 8:1 0 1000M 0 part /boot
sda2 8:2 0 9.8G 0 part /
sda3 8:3 0 2G 0 part [SWAP]
sdb 8:16 0 20G 0 disk
ceph--43f381a4--be17--4f27--8e40--d821c148bdc3-osd--block--69fc9839--67b9--4f83--8933--53998a89a9e9 253:2 0 20G 0 lvm
sdc 8:32 0 20G 0 disk
ceph--b45d10f1--f836--4c8b--ba35--c6948c205037-osd--block--cc9b9090--e346--4c19--9112--1acbf31347f0 253:1 0 20G 0 lvm
sdd 8:48 0 20G 0 disk
ceph--614c79d8--d202--4c7c--b1ba--73b3ab976a6e-osd--block--e2f268e7--0ae6--4802--b94a--a48658a997c0 253:3 0 20G 0 lvm
sde 8:64 0 20G 0 disk
ceph--73c83d35--614e--478e--aaa9--d9008b6182d4-osd--block--2c86a621--dd60--4322--a625--60545a713e43 253:4 0 20G 0 lvm
sdf 8:80 0 20G 0 disk
ceph--530e6f1f--1db0--4c2c--9c9f--683d7dd5b449-osd--block--8f1b29f9--802a--472a--a628--a530e6e7efcb 253:0 0 20G 0 lvm
准备osd节点
[root@ceph-node3 ~]#parted /dev/sde mklabel gpt -s
# 报错节点正忙
[root@ceph-node3 ~]#ceph-volume lvm zap /dev/sde
--> Zapping: /dev/sde
--> --destroy was not specified, but zapping a whole device will remove the partition table
stderr: wipefs: error: /dev/sde: probing initialization failed: Device or resource busy
--> failed to wipefs device, will try again to workaround probable race condition
# 解决:取消挂载
[root@ceph-node3 ~]#umount /var/lib/ceph/osd/ceph-13
[root@ceph-node3 ~]#dmsetup remove_all
# 清除数据
[root@ceph-node3 ~]#ceph-volume lvm zap /dev/sde
--> Zapping: /dev/sde
--> --destroy was not specified, but zapping a whole device will remove the partition table
Running command: /usr/bin/dd if=/dev/zero of=/dev/sde bs=1M count=10 conv=fsync
stderr: 10+0 records in
10+0 records out
stderr: 10485760 bytes (10 MB) copied, 0.0143463 s, 731 MB/s
--> Zapping successful for: <Raw Device: /dev/sde>
deploy节点添加osd13
[ceph@ceph-deploy ceph-cluster]$ceph-deploy osd create --data /dev/sde ceph-node3
[ceph_deploy.conf][DEBUG ] found configuration file at: /home/ceph/.cephdeploy.conf
[ceph_deploy.cli][INFO ] Invoked (2.0.1): /bin/ceph-deploy osd create --data /dev/sde ceph-node3
[ceph_deploy.cli][INFO ] ceph-deploy options:
[ceph_deploy.cli][INFO ] verbose : False
[ceph_deploy.cli][INFO ] bluestore : None
[ceph_deploy.cli][INFO ] cd_conf : <ceph_deploy.conf.cephdeploy.Conf instance at 0x7f157dbf3170>
[ceph_deploy.cli][INFO ] cluster : ceph
[ceph_deploy.cli][INFO ] fs_type : xfs
[ceph_deploy.cli][INFO ] block_wal : None
[ceph_deploy.cli][INFO ] default_release : False
[ceph_deploy.cli][INFO ] username : None
[ceph_deploy.cli][INFO ] journal : None
[ceph_deploy.cli][INFO ] subcommand : create
[ceph_deploy.cli][INFO ] host : ceph-node3
[ceph_deploy.cli][INFO ] filestore : None
[ceph_deploy.cli][INFO ] func : <function osd at 0x7f157dc398c0>
[ceph_deploy.cli][INFO ] ceph_conf : None
[ceph_deploy.cli][INFO ] zap_disk : False
[ceph_deploy.cli][INFO ] data : /dev/sde
[ceph_deploy.cli][INFO ] block_db : None
[ceph_deploy.cli][INFO ] dmcrypt : False
[ceph_deploy.cli][INFO ] overwrite_conf : False
[ceph_deploy.cli][INFO ] dmcrypt_key_dir : /etc/ceph/dmcrypt-keys
[ceph_deploy.cli][INFO ] quiet : False
[ceph_deploy.cli][INFO ] debug : False
[ceph_deploy.osd][DEBUG ] Creating OSD on cluster ceph with data device /dev/sde
[ceph-node3][DEBUG ] connection detected need for sudo
[ceph-node3][DEBUG ] connected to host: ceph-node3
[ceph-node3][DEBUG ] detect platform information from remote host
[ceph-node3][DEBUG ] detect machine type
[ceph-node3][DEBUG ] find the location of an executable
[ceph_deploy.osd][INFO ] Distro info: CentOS Linux 7.9.2009 Core
[ceph_deploy.osd][DEBUG ] Deploying osd to ceph-node3
[ceph-node3][DEBUG ] write cluster configuration to /etc/ceph/{cluster}.conf
[ceph-node3][DEBUG ] find the location of an executable
[ceph-node3][INFO ] Running command: sudo /usr/sbin/ceph-volume --cluster ceph lvm create --bluestore --data /dev/sde
[ceph-node3][WARNIN] Running command: /bin/ceph-authtool --gen-print-key
[ceph-node3][WARNIN] Running command: /bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring -i - osd new 1b08f4f3-527f-4edd-a177-072857af6448
[ceph-node3][WARNIN] Running command: /sbin/vgcreate --force --yes ceph-bb6c78fa-d58e-40f4-ac48-71afc83b8052 /dev/sde
[ceph-node3][WARNIN] stdout: Physical volume "/dev/sde" successfully created.
[ceph-node3][WARNIN] stdout: Volume group "ceph-bb6c78fa-d58e-40f4-ac48-71afc83b8052" successfully created
[ceph-node3][WARNIN] Running command: /sbin/lvcreate --yes -l 100%FREE -n osd-block-1b08f4f3-527f-4edd-a177-072857af6448 ceph-bb6c78fa-d58e-40f4-ac48-71afc83b8052
[ceph-node3][WARNIN] stdout: Logical volume "osd-block-1b08f4f3-527f-4edd-a177-072857af6448" created.
[ceph-node3][WARNIN] Running command: /bin/ceph-authtool --gen-print-key
[ceph-node3][WARNIN] Running command: /bin/mount -t tmpfs tmpfs /var/lib/ceph/osd/ceph-13
[ceph-node3][WARNIN] Running command: /bin/chown -h ceph:ceph /dev/ceph-bb6c78fa-d58e-40f4-ac48-71afc83b8052/osd-block-1b08f4f3-527f-4edd-a177-072857af6448
[ceph-node3][WARNIN] Running command: /bin/chown -R ceph:ceph /dev/dm-4
[ceph-node3][WARNIN] Running command: /bin/ln -s /dev/ceph-bb6c78fa-d58e-40f4-ac48-71afc83b8052/osd-block-1b08f4f3-527f-4edd-a177-072857af6448 /var/lib/ceph/osd/ceph-13/block
[ceph-node3][WARNIN] Running command: /bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring mon getmap -o /var/lib/ceph/osd/ceph-13/activate.monmap
[ceph-node3][WARNIN] stderr: got monmap epoch 3
[ceph-node3][WARNIN] Running command: /bin/ceph-authtool /var/lib/ceph/osd/ceph-13/keyring --create-keyring --name osd.13 --add-key AQC+Ci9jZgrhBxAASbC5pqxBdnizlakasrkB7g==
[ceph-node3][WARNIN] stdout: creating /var/lib/ceph/osd/ceph-13/keyring
[ceph-node3][WARNIN] added entity osd.13 auth auth(auid = 18446744073709551615 key=AQC+Ci9jZgrhBxAASbC5pqxBdnizlakasrkB7g== with 0 caps)
[ceph-node3][WARNIN] Running command: /bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-13/keyring
[ceph-node3][WARNIN] Running command: /bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-13/
[ceph-node3][WARNIN] Running command: /bin/ceph-osd --cluster ceph --osd-objectstore bluestore --mkfs -i 13 --monmap /var/lib/ceph/osd/ceph-13/activate.monmap --keyfile - --osd-data /var/lib/ceph/osd/ceph-13/ --osd-uuid 1b08f4f3-527f-4edd-a177-072857af6448 --setuser ceph --setgroup ceph
[ceph-node3][WARNIN] --> ceph-volume lvm prepare successful for: /dev/sde
[ceph-node3][WARNIN] Running command: /bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-13
[ceph-node3][WARNIN] Running command: /bin/ceph-bluestore-tool --cluster=ceph prime-osd-dir --dev /dev/ceph-bb6c78fa-d58e-40f4-ac48-71afc83b8052/osd-block-1b08f4f3-527f-4edd-a177-072857af6448 --path /var/lib/ceph/osd/ceph-13 --no-mon-config
[ceph-node3][WARNIN] Running command: /bin/ln -snf /dev/ceph-bb6c78fa-d58e-40f4-ac48-71afc83b8052/osd-block-1b08f4f3-527f-4edd-a177-072857af6448 /var/lib/ceph/osd/ceph-13/block
[ceph-node3][WARNIN] Running command: /bin/chown -h ceph:ceph /var/lib/ceph/osd/ceph-13/block
[ceph-node3][WARNIN] Running command: /bin/chown -R ceph:ceph /dev/dm-4
[ceph-node3][WARNIN] Running command: /bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-13
[ceph-node3][WARNIN] Running command: /bin/systemctl enable ceph-volume@lvm-13-1b08f4f3-527f-4edd-a177-072857af6448
[ceph-node3][WARNIN] stderr: Created symlink from /etc/systemd/system/multi-user.target.wants/ceph-volume@lvm-13-1b08f4f3-527f-4edd-a177-072857af6448.service to /usr/lib/systemd/system/ceph-volume@.service.
[ceph-node3][WARNIN] Running command: /bin/systemctl enable --runtime ceph-osd@13
[ceph-node3][WARNIN] Running command: /bin/systemctl start ceph-osd@13
[ceph-node3][WARNIN] --> ceph-volume lvm activate successful for osd ID: 13
[ceph-node3][WARNIN] --> ceph-volume lvm create successful for: /dev/sde
[ceph-node3][INFO ] checking OSD status...
[ceph-node3][DEBUG ] find the location of an executable
[ceph-node3][INFO ] Running command: sudo /bin/ceph --cluster=ceph osd stat --format=json
[ceph_deploy.osd][DEBUG ] Host ceph-node3 is now ready for osd use.
# 提示这块osd已经正常使用
[ceph@ceph-deploy ceph-cluster]$ceph osd set noout
osd6进行同样的操作
# deploy节点执行操作
[ceph@ceph-deploy ceph-cluster]$ceph osd out 6
# node2节点执行 操作
[root@ceph-node2 ~]#systemctl stop ceph-osd@6
# deploy节点
[ceph@ceph-deploy ceph-cluster]$ceph osd purge 6 --yes-i-really-mean-it
重新添加osd节点
查看ceph osd状态
[ceph@ceph-deploy ceph-cluster]$ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.29228 root default
-5 0.09743 host ceph-node1
0 hdd 0.01949 osd.0 up 1.00000 1.00000
1 hdd 0.01949 osd.1 up 1.00000 1.00000
2 hdd 0.01949 osd.2 up 1.00000 1.00000
3 hdd 0.01949 osd.3 up 1.00000 1.00000
4 hdd 0.01949 osd.4 up 1.00000 1.00000
-7 0.09743 host ceph-node2
5 hdd 0.01949 osd.5 up 1.00000 1.00000
6 hdd 0.01949 osd.6 up 1.00000 1.00000
7 hdd 0.01949 osd.7 up 1.00000 1.00000
8 hdd 0.01949 osd.8 up 1.00000 1.00000
9 hdd 0.01949 osd.9 up 1.00000 1.00000
-3 0.09743 host ceph-node3
10 hdd 0.01949 osd.10 up 1.00000 1.00000
11 hdd 0.01949 osd.11 up 1.00000 1.00000
12 hdd 0.01949 osd.12 up 1.00000 1.00000
13 hdd 0.01949 osd.13 up 1.00000 1.00000
14 hdd 0.01949 osd.14 up 1.00000 1.00000
解除out状态
[ceph@ceph-deploy ceph-cluster]$ceph osd unset noout
noout is unset
验证ceph集群状态
[ceph@ceph-deploy ~]$ceph -s
cluster:
id: 0fe4535c-4924-4cf3-bdd3-ae7d17577e6d
health: HEALTH_OK # 正常
services:
mon: 3 daemons, quorum ceph-mon1,ceph-mon2,ceph-mon3
mgr: ceph-mgr1(active), standbys: ceph-mgr2
mds: mycephfs-1/1/1 up {0=ceph-mgr1=up:active}
osd: 15 osds: 15 up, 15 in
rgw: 1 daemon active
data:
pools: 10 pools, 544 pgs
objects: 428 objects, 723 MiB
usage: 18 GiB used, 282 GiB / 300 GiB avail
pgs: 544 active+clean
验证pg中的数据