当前位置: 首页 > 工具软件 > ROOK > 使用案例 >

1.24版本k8s集群安装部署rook-ceph存储集群

缪晋
2023-12-01

一、初始花基本环境
1.主机环境信息

系统主机名主机IP地址配置说明
AlmaLinux release 8.6 (Sky Tiger)master01192.168.3.31k8s管理节点,VIP192.168.3.30
AlmaLinux release 8.6 (Sky Tiger)master02192.168.3.32k8s管理节点,VIP192.168.3.30
AlmaLinux release 8.6 (Sky Tiger)master03192.168.3.33k8s管理节点,VIP192.168.3.30
AlmaLinux release 8.6 (Sky Tiger)node01192.168.3.41k8s工作节点,10G数据盘1+10G数据盘2
AlmaLinux release 8.6 (Sky Tiger)node02192.168.3.42k8s工作节点,系统盘+10G数据盘1+10G数据盘2
AlmaLinux release 8.6 (Sky Tiger)node03192.168.3.43k8s工作节点,系统盘+10G数据盘1+10G数据盘2
AlmaLinux release 8.6 (Sky Tiger)node04192.168.3.44k8s工作节点,系统盘+10G数据盘1+10G数据盘2
AlmaLinux release 8.6 (Sky Tiger)node03192.168.3.45k8s工作节点,系统盘+10G数据盘1+10G数据盘2

2.部署k8s集群
参见;https://blog.csdn.net/lic95/article/details/125044136

二、部署rook ceph
1.参考rook-ceph说明
https://rook.github.io/docs/rook/latest/Getting-Started/quickstart/#deploy-the-rook-operator

2.确认none节点硬盘情况,本文5个node节点共10块硬盘

[root@node01 ~]# lsblk -f
NAME               FSTYPE      LABEL UUID                                   MOUNTPOINT
nvme0n1                                                                     
├─nvme0n1p1        xfs               4f22cfd0-c208-4a72-a2d5-82ee32d7f956   /boot
└─nvme0n1p2        LVM2_member       2o3Cz0-u0vm-D81w-hysk-LwSv-cLGg-5YyA5c 
  ├─almalinux-root xfs               919eb2ea-14db-4105-b7fd-af85b1ec2dfd   /
  └─almalinux-swap swap              d246b8f0-1ee4-425b-9a37-d8d9b2781403   
nvme0n2                                                                     
nvme0n3                                                                     
[root@node01 ~]# 

#如果该FSTYPE字段不为空,则无法被rook osd使用,请手动处理

3.部署证书管理器

#下载yaml文件
 wget https://github.com/cert-manager/cert-manager/releases/download/v1.8.0/cert-manager.yaml

#启动pod
kubectl apply -f cert-manager.yaml

#查看启动状态
[root@master01 ~]# kubectl get pods -n cert-manager
NAME                                       READY   STATUS    RESTARTS   AGE
cert-manager-6868fddcb4-kcvpp              1/1     Running   0          42s
cert-manager-cainjector-6d6bbc7965-f5trt   1/1     Running   0          42s
cert-manager-webhook-59f66d6c7b-wsw6f      1/1     Running   0          42s
[root@master01 ~]# 

4.安装lvm包管理器,用于处理ceph硬盘

yum install -y lvm2

5.部署rook ceph

#在master节点克隆指定版本
git clone --single-branch --branch v1.9.4 https://github.com/rook/rook.git

# 部署 rook Operator
cd rook/deploy/examples
kubectl create -f crds.yaml -f common.yaml -f operator.yaml
kubectl create -f cluster.yaml

# 在继续操作之前,验证 rook-ceph-operator 是否处于“Running”状态:
# 注意,部分镜像位于k8s.gcr.io,需要科学上网,或者查看日志手动下载到各个node节点
kubectl get pod -n rook-ceph
[root@master01 ~]# kubectl get pods -n rook-ceph
NAME                                               READY   STATUS      RESTARTS   AGE
csi-cephfsplugin-4bbbh                             3/3     Running     0          55m
csi-cephfsplugin-9zsjn                             3/3     Running     0          55m
csi-cephfsplugin-provisioner-5c6c4c7785-dlrfh      6/6     Running     0          55m
csi-cephfsplugin-provisioner-5c6c4c7785-fs6nz      6/6     Running     0          55m
csi-cephfsplugin-tvlxt                             3/3     Running     0          55m
csi-cephfsplugin-vj7s9                             3/3     Running     0          55m
csi-cephfsplugin-xg92l                             3/3     Running     0          55m
csi-rbdplugin-9s64s                                3/3     Running     0          55m
csi-rbdplugin-gvkbw                                3/3     Running     0          55m
csi-rbdplugin-provisioner-7c756d9bd7-9b9sm         6/6     Running     0          55m
csi-rbdplugin-provisioner-7c756d9bd7-cdlfd         6/6     Running     0          55m
csi-rbdplugin-rdtxb                                3/3     Running     0          55m
csi-rbdplugin-s9t2r                                3/3     Running     0          55m
csi-rbdplugin-x2ldf                                3/3     Running     0          55m
rook-ceph-crashcollector-node01-5c65c4845d-wtqgz   1/1     Running     0          51m
rook-ceph-crashcollector-node02-64fd8d97f7-w9mlv   1/1     Running     0          50m
rook-ceph-crashcollector-node03-675b749756-b9gjq   1/1     Running     0          49m
rook-ceph-crashcollector-node04-7dcb76b499-lc4td   1/1     Running     0          51m
rook-ceph-crashcollector-node05-79b4c99f86-sfvvf   1/1     Running     0          51m
rook-ceph-mgr-a-7dc64d847f-kzf26                   2/2     Running     0          51m
rook-ceph-mgr-b-5dc59949ff-fwkl4                   2/2     Running     0          51m
rook-ceph-mon-a-779dc5cd57-wlkhx                   1/1     Running     0          55m
rook-ceph-mon-b-b9bdf6486-t48ks                    1/1     Running     0          54m
rook-ceph-mon-c-776f7674b6-r29zr                   1/1     Running     0          51m
rook-ceph-operator-74c6447d5b-gmlmx                1/1     Running     0          58m
rook-ceph-osd-0-7d746b7b59-7zn58                   1/1     Running     0          51m
rook-ceph-osd-1-698b49669-5plgq                    1/1     Running     0          51m
rook-ceph-osd-2-777bb8bfc9-4zm56                   1/1     Running     0          51m
rook-ceph-osd-3-7568df5fd4-lgh25                   1/1     Running     0          51m
rook-ceph-osd-4-6fd6747d6-bxtxx                    1/1     Running     0          51m
rook-ceph-osd-5-868d874bc4-jpxjc                   1/1     Running     0          51m
rook-ceph-osd-6-d7d46949-fgxb2                     1/1     Running     0          50m
rook-ceph-osd-7-6bc688dcf6-t84g6                   1/1     Running     0          50m
rook-ceph-osd-8-6fb5cdb988-fcnd5                   1/1     Running     0          49m
rook-ceph-osd-9-7c595fd74d-khxdl                   1/1     Running     0          49m
rook-ceph-osd-prepare-node01-s6z6d                 0/1     Completed   0          49m
rook-ceph-osd-prepare-node02-z6s5z                 0/1     Completed   0          49m
rook-ceph-osd-prepare-node03-9sjtl                 0/1     Completed   0          49m
rook-ceph-osd-prepare-node04-7bglr                 0/1     Completed   0          49m
rook-ceph-osd-prepare-node05-6rkgc                 0/1     Completed   0          49m
rook-ceph-tools-68f89f79f9-jqcg8                   1/1     Running     0          52m
[root@master01 ~]# 

6.部署rook Toolbox

# 启动 rook-ceph-tools pod
kubectl create -f deploy/examples/toolbox.yaml

#等待工具箱 pod 下载其容器并进入running状态
[root@master01 ~]# kubectl -n rook-ceph rollout status deploy/rook-ceph-tools
deployment "rook-ceph-tools" successfully rolled out
[root@master01 ~]# 

7.查看集群状态

#链接 toolbox 
kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- bash

#插看集群
[rook@rook-ceph-tools-68f89f79f9-jqcg8 /]$ ceph -s
  cluster:
    id:     da0ab8bb-e42f-42b1-b134-eb1d58888828
    health: HEALTH_OK
 
  services:
    mon: 3 daemons, quorum a,b,c (age 99m)
    mgr: a(active, since 98m), standbys: b
    osd: 10 osds: 10 up (since 97m), 10 in (since 97m)
 
  data:
    pools:   1 pools, 1 pgs
    objects: 0 objects, 0 B
    usage:   51 MiB used, 100 GiB / 100 GiB avail
    pgs:     1 active+clean
 
[rook@rook-ceph-tools-68f89f79f9-jqcg8 /]$ 

[rook@rook-ceph-tools-68f89f79f9-jqcg8 /]$ ceph osd status
ID  HOST     USED  AVAIL  WR OPS  WR DATA  RD OPS  RD DATA  STATE      
 0  node01  5200k  9.99G      0        0       0        0   exists,up  
 1  node04  5260k  9.99G      0        0       0        0   exists,up  
 2  node05  5200k  9.99G      0        0       0        0   exists,up  
 3  node01  5264k  9.99G      0        0       0        0   exists,up  
 4  node04  5200k  9.99G      0        0       0        0   exists,up  
 5  node05  5264k  9.99G      0        0       0        0   exists,up  
 6  node02  5136k  9.99G      0        0       0        0   exists,up  
 7  node02  5136k  9.99G      0        0       0        0   exists,up  
 8  node03  5072k  9.99G      0        0       0        0   exists,up  
 9  node03  5072k  9.99G      0        0       0        0   exists,up  
[rook@rook-ceph-tools-68f89f79f9-jqcg8 /]$ 

[rook@rook-ceph-tools-68f89f79f9-jqcg8 /]$ ceph df
--- RAW STORAGE ---
CLASS     SIZE    AVAIL    USED  RAW USED  %RAW USED
ssd    100 GiB  100 GiB  51 MiB    51 MiB       0.05
TOTAL  100 GiB  100 GiB  51 MiB    51 MiB       0.05
 
--- POOLS ---
POOL                   ID  PGS  STORED  OBJECTS  USED  %USED  MAX AVAIL
device_health_metrics   1    1     0 B        0   0 B      0     32 GiB
[rook@rook-ceph-tools-68f89f79f9-jqcg8 /]$ 

[rook@rook-ceph-tools-68f89f79f9-jqcg8 /]$ rados df
POOL_NAME              USED  OBJECTS  CLONES  COPIES  MISSING_ON_PRIMARY  UNFOUND  DEGRADED  RD_OPS   RD  WR_OPS   WR  USED COMPR  UNDER COMPR
device_health_metrics   0 B        0       0       0                   0        0         0       0  0 B       0  0 B         0 B          0 B

total_objects    0
total_used       51 MiB
total_avail      100 GiB
total_space      100 GiB
[rook@rook-ceph-tools-68f89f79f9-jqcg8 /]$ 

8.收集操作日志,用于调试

kubectl create -f deploy/examples/toolbox-job.yaml
kubectl -n rook-ceph logs -l job-name=rook-ceph-toolbox-job

六、dashboard配置

#部署Ceph Dashboard
[root@master01 examples]# kubectl apply -f dashboard-external-https.yaml
service/rook-ceph-mgr-dashboard-external-https created

# 获取 dashboard admin密码
[root@master01 examples]# kubectl -n rook-ceph get secret rook-ceph-dashboard-password -o jsonpath="{['data']['password']}" | base64 -d

#输出密码:
}=1:6:@C>:NP!KVGId;r

#查看新端口
[root@master01 examples]# kubectl get svc -n rook-ceph
NAME                                     TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)             AGE
csi-cephfsplugin-metrics                 ClusterIP   172.18.4.87      <none>        8080/TCP,8081/TCP   78m
csi-rbdplugin-metrics                    ClusterIP   172.18.187.253   <none>        8080/TCP,8081/TCP   78m
rook-ceph-admission-controller           ClusterIP   172.18.49.28     <none>        443/TCP             83m
rook-ceph-mgr                            ClusterIP   172.18.41.24     <none>        9283/TCP            80m
rook-ceph-mgr-dashboard                  ClusterIP   172.18.239.24    <none>        8443/TCP            80m
rook-ceph-mgr-dashboard-external-https   NodePort    172.18.66.56     <none>        8443:30044/TCP      11m
rook-ceph-mon-a                          ClusterIP   172.18.26.25     <none>        6789/TCP,3300/TCP   82m
rook-ceph-mon-b                          ClusterIP   172.18.147.238   <none>        6789/TCP,3300/TCP   80m
rook-ceph-mon-c                          ClusterIP   172.18.244.12    <none>        6789/TCP,3300/TCP   80m

#浏览器访问:
https://192.168.3.41:30044

七、部署rbd和cephfs存储支持

# rdb:
#创建一个名为replicapool的rbd pool
[root@master01 examples]# kubectl apply -f csi/rbd/storageclass.yaml 
cephblockpool.ceph.rook.io/replicapool created
storageclass.storage.k8s.io/rook-ceph-block created

#cephfs:
[root@master01 examples]# kubectl apply -f filesystem.yaml
cephfilesystem.ceph.rook.io/myfs created
[root@master01 examples]# kubectl apply -f csi/cephfs/storageclass.yaml
storageclass.storage.k8s.io/rook-cephfs created
[root@master01 examples]# 
[root@k8s-master1 examples]# kubectl apply -f filesystem.yaml
[root@k8s-master1 examples]# kubectl apply -f csi/cephfs/storageclass.yaml

#查看部署情况
[root@master01 examples]# kubectl get sc
NAME              PROVISIONER                     RECLAIMPOLICY   VOLUMEBINDINGMODE   ALLOWVOLUMEEXPANSION   AGE
rook-ceph-block   rook-ceph.rbd.csi.ceph.com      Delete          Immediate           true                   109s
rook-cephfs       rook-ceph.cephfs.csi.ceph.com   Delete          Immediate           true                   55s
[root@master01 examples]# 

八、使用ceph作为后端存储部署redis集群测试
1.Redis是一个有状态应用
  当把redis以pod的形式部署在k8s中时,每个pod里缓存的数据都是不一样的,而且pod的IP是会随时变化,这时候如果使用普通的deployment和service来部署redis-cluster就会出现很多问题,因此需要改用StatefulSet + Headless Service来解决。

2.数据持久化
  redis虽然是基于内存的缓存,但还是需要依赖于磁盘进行数据的持久化,以便服务出现问题重启时可以恢复已经缓存的数据

3.Headless Service
  headless Service就是没有指定Cluster IP的Service,相应的,在k8s的dns映射里,Headless Service的解析结果不是一个Cluster IP,而是它所关联的所有Pod的IP列表

4.StatefulSet
​​  StatefulSet​​​是k8s中专门用于解决有状态应用部署的一种资源,总的来说可以认为它是​​Deployment/RC​​的一个变种,它有以下几个特性:

  • StatefulSet管理的每个Pod都有唯一的文档/网络标识,并且按照数字规律生成,而不是像Deployment中那样名称和IP都是随机的(比如StatefulSet名字为redis,那么pod名就是redis-0, redis-1 …)
    StatefulSet中ReplicaSet的启停顺序是严格受控的,操作第N个pod一定要等前N-1个执行完才可以
  • StatefulSet中的Pod采用稳定的持久化储存,并且对应的PV不会随着Pod的删除而被销毁
  • 另外需要说明的是,StatefulSet必须要配合Headless Service使用,它会在Headless Service提供的DNS映射上再加一层,最终形成精确到每个pod的域名映射,格式如下:
    ( p o d n a m e ) . (podname). (podname).(headless service name)

6.生成yaml配置文件
​​  redis 配置文件使用 configmap 方式进行挂载,chage-pod-ip.sh 脚本的作用用于当 redis 集群某 pod 重建后 Pod IP 发生变化,在 /data/nodes.conf 中将新的 Pod IP 替换原 Pod IP。不然集群会出问题,创建配置文件文件:

mkdir -p redis-cluster
cd redis-cluster

[root@master01 redis-cluster]# ll
总用量 8
-rw-r--r-- 1 root root 2374 6月   2 20:48 redis-cluster-configmap.yaml
-rw-r--r-- 1 root root 1942 6月   2 20:49 redis-cluster.yaml
[root@master01 redis-cluster]# 

[root@master01 redis-cluster]# cat redis-cluster-configmap.yaml 
apiVersion: v1
kind: ConfigMap
metadata:
  name: redis-cluster
  namespace: redis-cluster
data:
  chage-pod-ip.sh: |
    #!/bin/sh
    CLUSTER_CONFIG="/data/nodes.conf"
    if [ -f ${CLUSTER_CONFIG} ]; then
      if [ -z "${POD_IP}" ]; then
        echo "Unable to determine Pod IP address!"
        exit 1
      fi
      echo "Updating my IP to ${POD_IP} in ${CLUSTER_CONFIG}"
      sed -i.bak -e '/myself/ s/[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}/'${POD_IP}'/' ${CLUSTER_CONFIG}
    fi
    exec "$@"

  redis.conf: |
    bind 0.0.0.0
    protected-mode yes
    port 6379
    tcp-backlog 2048
    timeout 0
    tcp-keepalive 300
    daemonize no
    supervised no
    pidfile /var/run/redis.pid
    loglevel notice
    logfile /data/redis.log
    databases 16
    always-show-logo yes
    stop-writes-on-bgsave-error yes
    rdbcompression yes
    rdbchecksum yes
    dbfilename dump.rdb
    dir /data
    masterauth demo@2022
    replica-serve-stale-data yes
    replica-read-only no
    repl-diskless-sync no
    repl-diskless-sync-delay 5
    repl-disable-tcp-nodelay no
    replica-priority 100
    requirepass demo@2022
    maxclients 32768
    maxmemory-policy allkeys-lru
    lazyfree-lazy-eviction no
    lazyfree-lazy-expire no
    lazyfree-lazy-server-del no
    replica-lazy-flush no
    appendonly yes
    appendfilename "appendonly.aof"
    appendfsync everysec
    no-appendfsync-on-rewrite no
    auto-aof-rewrite-percentage 100
    auto-aof-rewrite-min-size 64mb
    aof-load-truncated yes
    aof-use-rdb-preamble yes
    lua-time-limit 5000
    cluster-enabled yes
    cluster-config-file /data/nodes.conf
    cluster-node-timeout 15000
    slowlog-log-slower-than 10000
    slowlog-max-len 128
    latency-monitor-threshold 0
    notify-keyspace-events ""
    hash-max-ziplist-entries 512
    hash-max-ziplist-value 64
    list-max-ziplist-size -2
    list-compress-depth 0
    set-max-intset-entries 512
    zset-max-ziplist-entries 128
    zset-max-ziplist-value 64
    hll-sparse-max-bytes 3000
    stream-node-max-bytes 4096
    stream-node-max-entries 100
    activerehashing yes
    client-output-buffer-limit normal 0 0 0
    client-output-buffer-limit replica 256mb 64mb 60
    client-output-buffer-limit pubsub 32mb 8mb 60
    hz 10
    dynamic-hz yes
    aof-rewrite-incremental-fsync yes
    rdb-save-incremental-fsync yes


[root@master01 redis-cluster]# cat redis-cluster.yaml 
---
apiVersion: v1
kind: Service
metadata:
  namespace: redis-cluster
  name: redis-cluster
spec:
  clusterIP: None
  ports:
  - port: 6379
    targetPort: 6379
    name: client
  - port: 16379
    targetPort: 16379
    name: gossip
  selector:
    app: redis-cluster
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
  namespace: redis-cluster
  name: redis-cluster
spec:
  serviceName: redis-cluster
  replicas: 6
  selector:
    matchLabels:
      app: redis-cluster
  template:
    metadata:
      labels:
        app: redis-cluster
    spec:
      terminationGracePeriodSeconds: 20
      # pod反亲和配置
      affinity:
        podAntiAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
          - weight: 100
            podAffinityTerm:
              labelSelector:
                matchExpressions:
                - key: app
                  operator: In
                  values:
                  - redis-cluster
              topologyKey: kubernetes.io/hostname
      containers:
      - name: redis
        image: redis:5.0.13
        ports:
        - containerPort: 6379
          name: client
        - containerPort: 16379
          name: gossip
        command: ["/etc/redis/chage-pod-ip.sh", "redis-server", "/etc/redis/redis.conf"]
        env:
        - name: POD_IP
          valueFrom:
            fieldRef:
              fieldPath: status.podIP
        volumeMounts:
        - name: conf
          mountPath: /etc/redis/
          readOnly: false
        - name: data
          mountPath: /data
          readOnly: false
      volumes:
      - name: conf
        configMap:
          name: redis-cluster
          defaultMode: 0755
  # 使用ceph集群文件存储动态提供pv配置段
  volumeClaimTemplates:
  - metadata:
      name: data
    spec:
      storageClassName: "rook-cephfs"
      accessModes:
        - ReadWriteMany
      resources:
        requests:
          storage: 10Gi

7、部署

[root@master01 redis-cluster]# kubectl create ns redis-cluster
namespace/redis-cluster created
[root@master01 redis-cluster]# kubectl apply -f redis-cluster-configmap.yaml
configmap/redis-cluster created
[root@master01 redis-cluster]# kubectl apply -f redis-cluster.yaml 
service/redis-cluster created
statefulset.apps/redis-cluster created
[root@master01 redis-cluster]# 

# 查看部署状态
[root@master01 redis-cluster]# kubectl get pod -n redis-cluster -o wide
NAME              READY   STATUS    RESTARTS   AGE   IP               NODE     NOMINATED NODE   READINESS GATES
redis-cluster-0   1/1     Running   0          62s   10.244.140.92    node02   <none>           <none>
redis-cluster-1   1/1     Running   0          58s   10.244.196.149   node01   <none>           <none>
redis-cluster-2   1/1     Running   0          50s   10.244.114.21    node05   <none>           <none>
redis-cluster-3   1/1     Running   0          40s   10.244.186.215   node03   <none>           <none>
redis-cluster-4   1/1     Running   0          32s   10.244.248.215   node04   <none>           <none>
redis-cluster-5   1/1     Running   0          22s   10.244.140.93    node02   <none>           <none>

[root@master01 redis-cluster]# kubectl get svc -n redis-cluster
NAME            TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)              AGE
redis-cluster   ClusterIP   None         <none>        6379/TCP,16379/TCP   94s

# 查看pvc及pv
[root@master01 redis-cluster]# kubectl get svc -n redis-cluster
NAME            TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)              AGE
redis-cluster   ClusterIP   None         <none>        6379/TCP,16379/TCP   94s
[root@master01 redis-cluster]# kubectl get pvc,pv -n redis-cluster
NAME                                         STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE
persistentvolumeclaim/data-redis-cluster-0   Bound    pvc-78309c22-463c-48c9-8e9f-00ed32fec2e6   10Gi       RWX            rook-cephfs    16m
persistentvolumeclaim/data-redis-cluster-1   Bound    pvc-6839daf7-53ed-42cf-961c-3a4aa403327f   10Gi       RWX            rook-cephfs    16m
persistentvolumeclaim/data-redis-cluster-2   Bound    pvc-58e79d62-415e-4bc1-9e2f-0572f9144c12   10Gi       RWX            rook-cephfs    16m
persistentvolumeclaim/data-redis-cluster-3   Bound    pvc-0dc7f552-4fd1-4f7a-831c-e30b2b11a27f   10Gi       RWX            rook-cephfs    16m
persistentvolumeclaim/data-redis-cluster-4   Bound    pvc-12532ea4-2347-4f7f-b2f5-26b5dd949a86   10Gi       RWX            rook-cephfs    16m
persistentvolumeclaim/data-redis-cluster-5   Bound    pvc-28fb6439-752e-461a-9600-13883a7bdd74   10Gi       RWX            rook-cephfs    75s

NAME                                                        CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS   CLAIM                                STORAGECLASS   REASON   AGE
persistentvolume/pvc-0dc7f552-4fd1-4f7a-831c-e30b2b11a27f   10Gi       RWX            Delete           Bound    redis-cluster/data-redis-cluster-3   rook-cephfs             16m
persistentvolume/pvc-12532ea4-2347-4f7f-b2f5-26b5dd949a86   10Gi       RWX            Delete           Bound    redis-cluster/data-redis-cluster-4   rook-cephfs             16m
persistentvolume/pvc-28fb6439-752e-461a-9600-13883a7bdd74   10Gi       RWX            Delete           Bound    redis-cluster/data-redis-cluster-5   rook-cephfs             75s
persistentvolume/pvc-58e79d62-415e-4bc1-9e2f-0572f9144c12   10Gi       RWX            Delete           Bound    redis-cluster/data-redis-cluster-2   rook-cephfs             16m
persistentvolume/pvc-6839daf7-53ed-42cf-961c-3a4aa403327f   10Gi       RWX            Delete           Bound    redis-cluster/data-redis-cluster-1   rook-cephfs             16m
persistentvolume/pvc-78309c22-463c-48c9-8e9f-00ed32fec2e6   10Gi       RWX            Delete           Bound    redis-cluster/data-redis-cluster-0   rook-cephfs             16m

8.创建集群

#获取集群IP
[root@master01 redis-cluster]# kubectl get pod -n redis-cluster -o wide | awk '{print $6}'
IP
10.244.140.92
10.244.196.149
10.244.114.21
10.244.186.215
10.244.248.215
10.244.140.93

#进入redis容器
[root@master01 redis-cluster]# kubectl exec -it redis-cluster-0 -n redis-cluster -- bash

#创建集群,按提示输入"yes"即可完成集群创建
redis-cli -a demo@2022 --cluster create \
10.244.140.92:6379 \
10.244.196.149:6379 \
10.244.114.21:6379 \
10.244.186.215:6379 \
10.244.248.215:6379 \
10.244.140.93:6379 \
--cluster-replicas 1

9、验证集群

# 可以看到集群状态正常
[root@master01 redis-cluster]# kubectl exec -it redis-cluster-0 -n redis-cluster -- bash
root@redis-cluster-0:/data# redis-cli -c -h redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local -a 'demo@2022'
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.

redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> cluster info
cluster_state:ok
cluster_slots_assigned:16384
cluster_slots_ok:16384
cluster_slots_pfail:0
cluster_slots_fail:0
cluster_known_nodes:6
cluster_size:3
cluster_current_epoch:6
cluster_my_epoch:3
cluster_stats_messages_ping_sent:111
cluster_stats_messages_pong_sent:102
cluster_stats_messages_meet_sent:1
cluster_stats_messages_sent:214
cluster_stats_messages_ping_received:102
cluster_stats_messages_pong_received:112
cluster_stats_messages_received:214
redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> 

redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> cluster info
cluster_state:ok
cluster_slots_assigned:16384
cluster_slots_ok:16384
cluster_slots_pfail:0
cluster_slots_fail:0
cluster_known_nodes:6
cluster_size:3
cluster_current_epoch:6
cluster_my_epoch:3
cluster_stats_messages_ping_sent:170
cluster_stats_messages_pong_sent:161
cluster_stats_messages_meet_sent:1
cluster_stats_messages_sent:332
cluster_stats_messages_ping_received:161
cluster_stats_messages_pong_received:171
cluster_stats_messages_received:332
redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> 

redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> cluster nodes
147ce6d4a6ece2a69c69ba62d9dcb0cc3fcd3252 10.244.114.21:6379@16379 myself,master - 0 1654177346000 3 connected 10923-16383
e805b85e338356615b7ad896f882d43e79281f47 10.244.186.215:6379@16379 slave 147ce6d4a6ece2a69c69ba62d9dcb0cc3fcd3252 0 1654177345000 4 connected
b98047a17cf7fcd144c94abac0e2576bafe9bb30 10.244.196.149:6379@16379 master - 0 1654177345674 2 connected 5461-10922
cebfdfbc97ef43d94d59cf5a87845c9b993d9954 10.244.140.92:6379@16379 master - 0 1654177343000 1 connected 0-5460
313081321f48ccae93f3a67bc43e2d6b0eae93a6 10.244.140.93:6379@16379 slave b98047a17cf7fcd144c94abac0e2576bafe9bb30 0 1654177346678 6 connected
94fbbe644f27519b348bfa6909d9bf44e680da20 10.244.248.215:6379@16379 slave cebfdfbc97ef43d94d59cf5a87845c9b993d9954 0 1654177345000 5 connected
redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> 

10、故障测试
  删除任意一个 pod(删除名称为redis-cluster-1的pod),可以看到k8s会自动拉起一个同样名称的pod(edis-cluster-1),自动绑定原来的pvc和pv,pod的IP也自动被chage-pod-ip.sh脚本修改为当前pod的IP

# 查看pod
[root@master01 redis-cluster]# kubectl get pods -n  redis-cluster -o wide
NAME              READY   STATUS    RESTARTS   AGE     IP               NODE     NOMINATED NODE   READINESS GATES
redis-cluster-0   1/1     Running   0          10m     10.244.140.92    node02   <none>           <none>
redis-cluster-1   1/1     Running   0          21s     10.244.196.151   node01   <none>           <none>
redis-cluster-2   1/1     Running   0          10m     10.244.114.21    node05   <none>           <none>
redis-cluster-3   1/1     Running   0          9m59s   10.244.186.215   node03   <none>           <none>
redis-cluster-4   1/1     Running   0          9m51s   10.244.248.215   node04   <none>           <none>
redis-cluster-5   1/1     Running   0          9m41s   10.244.140.93    node02   <none>           <none>

# 删除redis-cluster-1  pod
[root@master01 redis-cluster]# kubectl delete pod redis-cluster-1 -n redis-cluster
pod "redis-cluster-1" deleted

# pod重建
[root@master01 redis-cluster]# kubectl get pods -n  redis-cluster -o wideNAME              READY   STATUS              RESTARTS   AGE     IP               NODE     NOMINATED NODE   READINESS GATES
redis-cluster-0   1/1     Running             0          10m     10.244.140.92    node02   <none>           <none>
redis-cluster-1   0/1     ContainerCreating   0          2s      <none>           node01   <none>           <none>
redis-cluster-2   1/1     Running             0          10m     10.244.114.21    node05   <none>           <none>
redis-cluster-3   1/1     Running             0          10m     10.244.186.215   node03   <none>           <none>
redis-cluster-4   1/1     Running             0          10m     10.244.248.215   node04   <none>           <none>
redis-cluster-5   1/1     Running             0          9m54s   10.244.140.93    node02   <none>           <none>

# pod重建完成,ip地址由原先的10.244.196.151变为了10.244.196.152,且由于设置了pod反亲和,六个redis pod不会被调度到同一台虚拟机
[root@master01 redis-cluster]# kubectl get pods -n  redis-cluster -o wide
NAME              READY   STATUS    RESTARTS   AGE     IP               NODE     NOMINATED NODE   READINESS GATES
redis-cluster-0   1/1     Running   0          10m     10.244.140.92    node02   <none>           <none>
redis-cluster-1   1/1     Running   0          4s      10.244.196.152   node01   <none>           <none>
redis-cluster-2   1/1     Running   0          10m     10.244.114.21    node05   <none>           <none>
redis-cluster-3   1/1     Running   0          10m     10.244.186.215   node03   <none>           <none>
redis-cluster-4   1/1     Running   0          10m     10.244.248.215   node04   <none>           <none>
redis-cluster-5   1/1     Running   0          9m56s   10.244.140.93    node02   <none>           <none>
[root@master01 redis-cluster]# 

# 查看集群配置
[root@master01 redis-cluster]# kubectl exec -it redis-cluster-0 -n redis-cluster -- ls
appendonly.aof  dump.rdb  nodes.conf  nodes.conf.bak  redis.log
[root@master01 redis-cluster]# kubectl exec -it redis-cluster-0 -n redis-cluster -- cat nodes.conf
e805b85e338356615b7ad896f882d43e79281f47 10.244.186.215:6379@16379 slave 147ce6d4a6ece2a69c69ba62d9dcb0cc3fcd3252 0 1654177555790 4 connected
94fbbe644f27519b348bfa6909d9bf44e680da20 10.244.248.215:6379@16379 slave cebfdfbc97ef43d94d59cf5a87845c9b993d9954 0 1654177556797 5 connected
cebfdfbc97ef43d94d59cf5a87845c9b993d9954 10.244.140.92:6379@16379 myself,master - 0 1654177554000 1 connected 0-5460
313081321f48ccae93f3a67bc43e2d6b0eae93a6 10.244.140.93:6379@16379 slave b98047a17cf7fcd144c94abac0e2576bafe9bb30 0 1654177555000 6 connected
147ce6d4a6ece2a69c69ba62d9dcb0cc3fcd3252 10.244.114.21:6379@16379 master - 0 1654177555000 3 connected 10923-16383
b98047a17cf7fcd144c94abac0e2576bafe9bb30 10.244.196.152:6379@16379 master - 1654177555088 1654177553000 2 disconnected 5461-10922
vars currentEpoch 6 lastVoteEpoch 0
[root@master01 redis-cluster]# 

# 进入集群查看集群状态,集群状态又恢复了正常
[root@master01 redis-cluster]# kubectl exec -it redis-cluster-0 -n redis-cluster -- bash

root@redis-cluster-0:~# redis-cli -c -h redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local -a 'demo@2022'
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> 
redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> 
redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> cluster info
cluster_state:ok
cluster_slots_assigned:16384
cluster_slots_ok:16384
cluster_slots_pfail:0
cluster_slots_fail:0
cluster_known_nodes:6
cluster_size:3
cluster_current_epoch:6
cluster_my_epoch:3
cluster_stats_messages_ping_sent:1039
cluster_stats_messages_pong_sent:986
cluster_stats_messages_meet_sent:1
cluster_stats_messages_sent:2026
cluster_stats_messages_ping_received:986
cluster_stats_messages_pong_received:1034
cluster_stats_messages_received:2020
redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> 

九、总结
  通过rook复用k8s节点部署的ceph集群虽然部署方便,但是由于rook部署ceph时全程自动化且服务全部为pod导致后期维护ceph集群比较困难,我个人并不建议在生产环境中使用rook部署ceph集群,生产中应独立部署ceph集群比较方便维护。可以服用k8s集群部分节点独立部署ceph集群,方便维护,也方便两个集群分别维护。

 类似资料: