ceph创建pools到指定的OSD上

阚亮
2023-12-01

生产环境中有很多场景,是需要在指定的 OSD 设备上创建 Ceph Pool,或者说需要把pool限制在指定的OSD上。因为Ceph集群中允许使用混合类型的磁盘,比如一部分磁盘是SSD,一部分是HDD。如果针对某些业务需要高速磁盘SSD,而某些业务 HDD 就可以满足部署,在创建资源池 Pool 的时候可以指定它创建在某些OSD 设备上。或者为了不影响性能,通常一套ceph集群中需要RBD块存储和jcephFS文件系统同时使用,这样可以把RBD的pool指定到相应的OSD上,cephFS所使用的pool指定到另外的OSD上。

方法概述

  1. 修改Ceph集群的 crush map (可以导出crush map文件修改,然后导入生效)里的 rule section,该rule 选择哪个bucket路径 (实际上就是最终选择哪个osd device)
  2. 通过设定Pool的crush_ruleset 来指定该 Pool 使用crush map里的哪个rule(命令:ceph osd pool set crush_ruleset 4)

场景描述

现有环境:

主机 磁盘
test-ceph01 /dev/sdb,/dev/sdc
test-ceph02 /dev/sdb,/dev/sdc
test-ceph03 /dev/sdb,/dev/sdc

# ceph osd tree
ID CLASS WEIGHT  TYPE NAME            STATUS REWEIGHT PRI-AFF 
-1       0.29214 root default                                 
-7       0.09738     host test-ceph01                         
 0   hdd 0.04869         osd.0            up  1.00000 1.00000 
 4   hdd 0.04869         osd.4            up  1.00000 1.00000 
-5       0.09738     host test-ceph02                         
 2   hdd 0.04869         osd.2            up  1.00000 1.00000 
 5   hdd 0.04869         osd.5            up  1.00000 1.00000 
-3       0.09738     host test-ceph03                         
 1   hdd 0.04869         osd.1            up  1.00000 1.00000 
 3   hdd 0.04869         osd.3            up  1.00000 1.00000 

我们需要把RBD存储在/dev/sdb上,cephfs存储在/dev/sdc上

先查看下默认规则:

ceph osd crush rule dump

重新配置Crush map

1.获取Crush map

ceph osd getcrushmap -o ./crushmap

2.反编译Crush map

crushtool -d crushmap -o decrushmap

当前目录下会生成一个decrushmap文件

3.修改Crush map

vim decrushmap

...
# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
device 3 osd.3 class hdd
device 4 osd.4 class hdd
device 5 osd.5 class hdd
...
# buckets
host test-ceph03 {
	id -3		# do not change unnecessarily
	id -4 class hdd		# do not change unnecessarily
	# weight 0.097
	alg straw2
	hash 0	# rjenkins1
	item osd.1 weight 0.049
	item osd.3 weight 0.049
}
host test-ceph02 {
	id -5		# do not change unnecessarily
	id -6 class hdd		# do not change unnecessarily
	# weight 0.097
	alg straw2
	hash 0	# rjenkins1
	item osd.2 weight 0.049
	item osd.5 weight 0.049
}
host test-ceph01 {
	id -7		# do not change unnecessarily
	id -8 class hdd		# do not change unnecessarily
	# weight 0.097
	alg straw2
	hash 0	# rjenkins1
	item osd.0 weight 0.049
	item osd.4 weight 0.049
}
root default {
	id -1		# do not change unnecessarily
	id -2 class hdd		# do not change unnecessarily
	# weight 0.292
	alg straw2
	hash 0	# rjenkins1
	item test-ceph03 weight 0.097
	item test-ceph02 weight 0.097
	item test-ceph01 weight 0.097
}

-------------------------------------------
#####新增2个bucket
root rbd-disk {
    id -9       # do not change unnecessarily  //这个数值是根据上面的bucket最后数据+1
    # weight 9.000
    alg straw
    hash 0  # rjenkins1
    item osd.0 weight 1.000          //这里的磁盘号根据上面的ceph osd tree结果来看
    item osd.1 weight 1.000
    item osd.2 weight 1.000
}
root fs-disk {
    id -10       # do not change unnecessarily
    # weight 6.000
    alg straw
    hash 0  # rjenkins1
    item osd.3 weight 1.000
    item osd.4 weight 1.000
    item osd.5 weight 1.000
}
-------------------------------------------

# rules
rule replicated_rule {
	id 0
	type replicated
	min_size 1
	max_size 10
	step take default
	step chooseleaf firstn 0 type host
	step emit
}

-------------------------------------------
#####新增2个rule
rule rbd-disk {
    ruleset 1                 //同样这里的值根据上面的rule值 + 1
    type replicated
    min_size 1
    max_size 10
    step take rbd-disk
    step chooseleaf firstn 0 type osd
    step emit
}
rule fs-disk {
    ruleset 2
    type replicated
    min_size 1
    max_size 10
    step take fs-disk
    step chooseleaf firstn 0 type osd
    step emit
}
-------------------------------------------
# end crush map

4.编译Crush map

crushtool -c decrushmap -o newcrushmap

5.注入Crush map

ceph osd setcrushmap -i newcrushmap

创建资源池

1.创建供RBD池

ceph osd pool create images 64 64
ceph osd pool create volumes 64 64
ceph osd pool create backups 64 64
ceph osd pool create vms 64 64

2.创建cephFS池

ceph osd pool create cephfs_data 64 64
ceph osd pool create cephfs_metadata 64 64
ceph fs new cephfs cephfs_metadata cephfs_data

可以查看一下
# ceph fs ls
name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_data ]

修改资源池存储规则

通过命令可以看到,刚刚创建的四个资源池的crush_ruleset 都是0

[root@test-ceph01 ceph-ansible]# ceph osd dump | grep images
pool 1 'images' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 14 flags hashpspool stripe_width 0
[root@test-ceph01 ceph-ansible]# ceph osd dump | grep volumes
pool 2 'volumes' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 17 flags hashpspool stripe_width 0
[root@test-ceph01 ceph-ansible]# ceph osd dump | grep backups
pool 3 'backups' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 20 flags hashpspool stripe_width 0
[root@test-ceph01 ceph-ansible]# ceph osd dump | grep vms
pool 4 'vms' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 23 flags hashpspool stripe_width 0

[root@test-ceph01 ceph-ansible]# ceph osd dump | grep cephfs_metadata
pool 6 'cephfs_metadata' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 40 flags hashpspool stripe_width 0 application cephfs
[root@test-ceph01 ceph-ansible]# ceph osd dump | grep cephfs_data
pool 5 'cephfs_data' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 40 flags hashpspool stripe_width 0 application cephfs

下面修改为对应的rule

1.修改rbd对应的存储规则

ceph osd pool set images crush_rule rbd-disk
ceph osd pool set volumes crush_rule rbd-disk
ceph osd pool set backups crush_rule rbd-disk
ceph osd pool set vms crush_rule rbd-disk

2.修改cephFS对应的存储规则

ceph osd pool set cephfs_data crush_rule fs-disk
ceph osd pool set cephfs_metadata crush_rule fs-disk

验证

1.验证前可以先通过命令查看原始状态下池中是否有数据,我们是新环境,结果是空的

rados ls -p images
rados ls -p cephfs_data

2.添加对象到资源池中

rados -p images put rbd_test_data /etc/hosts
rados -p cephfs_metadata put fs_test_data /etc/hosts

# 此时再次查看一下
[root@test-ceph01 ceph-ansible]# rados ls -p images
rbd_test_data
[root@test-ceph01 ceph-ansible]# rados ls -p cephfs_metadata
fs_test_data

[root@test-ceph01 ceph-ansible]# ceph df detail   //可以看到,USED已经有数值了,对象已经存储进去了
GLOBAL:
    SIZE       AVAIL      RAW USED     %RAW USED     OBJECTS 
    299GiB     293GiB      6.03GiB          2.02           2 
POOLS:
    NAME                ID     QUOTA OBJECTS     QUOTA BYTES     USED     %USED     MAX AVAIL     OBJECTS     DIRTY     READ     WRITE     RAW USED 
    images              1      N/A               N/A             236B         0       46.4GiB           1         1       0B        1B         708B 
    volumes             2      N/A               N/A               0B         0       46.4GiB           0         0       0B        0B           0B 
    backups             3      N/A               N/A               0B         0       46.4GiB           0         0       0B        0B           0B 
    vms                 4      N/A               N/A               0B         0       46.4GiB           0         0       0B        0B           0B 
    cephfs_data         5      N/A               N/A               0B         0       46.4GiB           0         0       0B        0B           0B 
    cephfs_metadata     6      N/A               N/A             236B         0       46.4GiB 

3.查看对象分布情况

[root@test-ceph01 ceph-ansible]# ceph osd map images rbd_test_data
osdmap e44 pool 'images' (1) object 'rbd_test_data' -> pg 1.e1aabc8 (1.8) -> up ([1,2,0], p1) acting ([1,2,0], p1)
[root@test-ceph01 ceph-ansible]# ceph osd map cephfs_metadata fs_test_data
osdmap e44 pool 'cephfs_metadata' (6) object 'fs_test_data' -> pg 6.4a32357a (6.3a) -> up ([3,5,4], p3) acting ([3,5,4], p3)

可以发现,对象rbd_test_data在osd.0,osd.1,osd.2上,对象fs_test_data在osd.3,osd.4,osd.5上

 类似资料: