一、使用Rook1.17在k8s集群部署ceph存储集群

1、使用Rook官方提供的示例部署组件清单(manifests)部署一个Ceph集群

#在master01节点使用git将部署组件清单示例下载到本地
[root@master01 2]# git clone --single-branch --branch v1.10.11 https://github.com/rook/rook.git

#进入到本地部署组件清单示例目录
[root@master01 2]# cd rook/deploy/examples/

#执行以下命令将Rook和Ceph相关CRD资源和通用资源创建到K8S集群(其中psp.yaml是K8S集群受Pod安全策略PodSecurityPolicy在k8s1.25版本中已经被移除当前咱们作为可选资原文件)
[root@master01 examples]# kubectl create -f crds.yaml -f common.yaml -f filesystem.yaml

2、修改rook/deploy/examples/operator.yaml文件

[root@master01 2]# cd rook/deploy/examples/
[root@master01 examples]# vim operator.yaml
...
...
#110-115行替换国外镜像为国内镜像后,并取消注释
110   ROOK_CSI_CEPH_IMAGE: "registry.cn-hangzhou.aliyuncs.com/abroad_images/cephcsi:v3.7.2"
111   ROOK_CSI_REGISTRAR_IMAGE: "registry.aliyuncs.com/google_containers/csi-node-driver-registrar:v2.7.0"
112   ROOK_CSI_RESIZER_IMAGE: "registry.aliyuncs.com/google_containers/csi-resizer:v1.7.0"
113   ROOK_CSI_PROVISIONER_IMAGE: "registry.aliyuncs.com/google_containers/csi-provisioner:v3.4.0"
114   ROOK_CSI_SNAPSHOTTER_IMAGE: "registry.aliyuncs.com/google_containers/csi-snapshotter:v6.2.1"
115   ROOK_CSI_ATTACHER_IMAGE: "registry.aliyuncs.com/google_containers/csi-attacher:v4.1.0"
...
...
#打开CephCSI 提供者的节点(node)亲和性(去掉前面的注释即可,会同时作用于CephFS和RBD提供者,如果要分开这两者的调度,可以继续打开后面专用的节点亲和性)
181   CSI_PROVISIONER_NODE_AFFINITY: "role=storage-node; storage=rook-ceph"
...
...
#打开CephCSI 提供者的节点(node)亲和性(去掉前面的注释即可,会同时作用于CephFS和RBD提供者,如果要分开这两者的调度,可以继续打开后面专用的节点亲和性)
193   CSI_PLUGIN_NODE_AFFINITY: "role=storage-node; storage=rook-ceph"
#如果CephFS和RBD提供者的调度亲各性要分开,则在上面的基础上修改206行和223行打开它们专用的开关(去除下面两行前端的#即可),这里设置成不分开,所以仍然注释
...
...
206   # CSI_RBD_PROVISIONER_NODE_AFFINITY: "role=rbd-node"
...
...
223   # CSI_CEPHFS_PROVISIONER_NODE_AFFINITY: "role=cephfs-node"
...
...
#483行开启裸设备自动发现开关,生产环境一般都会将裸设备自动发现开关设为true(方便后面追加设备)
483   ROOK_ENABLE_DISCOVERY_DAEMON: "true"
...
...
#530行修改rook镜像为国内镜像
530           image: registry.cn-hangzhou.aliyuncs.com/abroad_images/ceph:v1.10.11
...
...
572             - name: DISCOVER_AGENT_NODE_AFFINITY
573               value: "role=storage-node; storage=rook-ceph"
...
...

3、修改完后,根据如上的节点标签亲和性设置,为三个工作节点打上对应的标签:

[root@master01 ~]# kubectl label node master03 node01 node02 role=storage-node
[root@master01 ~]# kubectl label node master03 node01 node02 storage=rook-ceph

4、确认修改完成后,在master节点上执行以下命令进行Rook Ceph Operator的部署

[root@master01 ~]# cd /root/2/rook/deploy/examples/
[root@master01 examples]# kubectl apply -f operator.yaml
[root@master01 examples]# kubectl get pods -n rook-ceph
NAME                                  READY   STATUS    RESTARTS   AGE
rook-ceph-operator-5cc8dd7b7b-4b6mb   1/1     Running   0          5m4s
rook-discover-s52hh                   1/1     Running   0          3m34s
rook-discover-tqpx7                   1/1     Running   0          3m34s
rook-discover-vbvwd                   1/1     Running   0          3m34s

5、确保rook-ceph-operator相关Pod都运行正常的情况下,修改 rook/deploy/examples/cluster.yaml文件

[root@master01 ~]# cd /root/2/rook/deploy/examples/
[root@master01 examples]# vim cluster.yaml
#修改24行镜像为国内镜像
 24     image: registry.cn-hangzhou.aliyuncs.com/abroad_images/ceph:v17.2.5
...
...
#如果已经安装Prometheus,可以设置开启
 74   monitoring:
 75     # requires Prometheus to be pre-installed
 76     enabled: false
...
...
#打开节点亲和性调度和污点容忍
147   placement:
148     all:
149       nodeAffinity:
150         requiredDuringSchedulingIgnoredDuringExecution:
151           nodeSelectorTerms:
152           - matchExpressions:
153             - key: role
154               operator: In
155               values:
156               - storage-node
...
...
#在196行处加入资源限制,建议内存设置4G以上.这里一定要修改一下集群osd的资源限制,否则osd的内存使用率会无限增长,这块需要根据自己环境设置,如果设置过高会导致rook-ceph-osd-0起不来
196   resources:
197       osd:
198         limits:
199           cpu: "2"
200           memory: "4000Mi"
201         requests:
202           cpu: "2"
203           memory: "4000Mi"
#不指定集群在使用存储时启用所有节点和设备
233   storage: # cluster level storage configuration and selection
234     useAllNodes: false
235     useAllDevices: false
...
...
#将存储设置为我们三个工作节点新加的sdb裸盘,可以通过lsblk查看磁盘信息,name这里不能设置成主机地址
246     nodes:
247     - name: "master03"
248       devices:
249       - name: "sdb"
250     - name: "node01"
251       devices:
252       - name: "sdb"
253     - name: "node02"
254       devices:
255       - name: "sdb"

cluster.yaml完整配置文件

[root@master01 examples]# egrep -v "#|^$" cluster.yaml
apiVersion: ceph.rook.io/v1
kind: CephCluster
metadata:
  name: rook-ceph
spec:
  cephVersion:
    image: registry.cn-hangzhou.aliyuncs.com/abroad_images/ceph:v17.2.5
    allowUnsupported: false
  dataDirHostPath: /var/lib/rook
  skipUpgradeChecks: false
  continueUpgradeAfterChecksEvenIfNotHealthy: false
  waitTimeoutForHealthyOSDInMinutes: 10
  mon:
    count: 3
    allowMultiplePerNode: false
  mgr:
    count: 2
    allowMultiplePerNode: false
    modules:
      - name: pg_autoscaler
        enabled: true
  dashboard:
    enabled: true
    ssl: true
  monitoring:
    enabled: false
  network:
    connections:
      encryption:
        enabled: false
      compression:
        enabled: false
  crashCollector:
    disable: false
  logCollector:
    enabled: true
  cleanupPolicy:
    confirmation: ""
    sanitizeDisks:
      method: quick
      dataSource: zero
      iteration: 1
    allowUninstallWithVolumes: false
  placement:
    all:
      nodeAffinity:
        requiredDuringSchedulingIgnoredDuringExecution:
          nodeSelectorTerms:
          - matchExpressions:
            - key: role
              operator: In
              values:
              - storage-node
  annotations:
  labels:
  resources:
    osd:
      limits:
        cpu: "2"
        memory: "4000Mi"
      requests:
        cpu: "2"
        memory: "4000Mi"
  removeOSDsIfOutAndSafeToRemove: false
  priorityClassNames:
    mon: system-node-critical
    osd: system-node-critical
    mgr: system-cluster-critical
    useAllNodes: false
    useAllDevices: false
    config:
    nodes:
    - name: "master03"
      devices:
      - name: "sdb"
    - name: "node01"
      devices:
      - name: "sdb"
    - name: "node02"
      devices:
      - name: "sdb"
    onlyApplyOSDPlacement: false
  disruptionManagement:
    managePodBudgets: true
    osdMaintenanceTimeout: 30
    pgHealthCheckTimeout: 0
    manageMachineDisruptionBudgets: false
    machineDisruptionBudgetNamespace: openshift-machine-api
  healthCheck:
    daemonHealth:
      mon:
        disabled: false
        interval: 45s
      osd:
        disabled: false
        interval: 60s
      status:
        disabled: false
        interval: 60s
    livenessProbe:
      mon:
        disabled: false
      mgr:
        disabled: false
      osd:
        disabled: false
    startupProbe:
      mon:
        disabled: false
      mgr:
        disabled: false
      osd:
        disabled: false

6、确认修改完成后,在master节点上执行以下命令进行Rook Ceph的部署

[root@master01 examples]# kubectl apply -f  cluster.yaml

#使用以下命令监控Ceph Cluster相关Pod的部署情况(rook-ceph为默认部署命名空间)
[root@master01 examples]# watch kubectl get pods -n rook-ceph

7、查看最终Pod的状态

[root@master01 examples]# kubectl get pods -n rook-ceph
NAME                                                 READY   STATUS      RESTARTS   AGE
csi-cephfsplugin-ch4f5                               2/2     Running     0          5m35s
csi-cephfsplugin-jsfzw                               2/2     Running     0          5m35s
csi-cephfsplugin-provisioner-64dcb9bb79-4chvb        5/5     Running     0          5m35s
csi-cephfsplugin-provisioner-64dcb9bb79-t5dr5        5/5     Running     0          5m35s
csi-cephfsplugin-tjzs6                               2/2     Running     0          5m35s
csi-rbdplugin-72n7k                                  2/2     Running     0          5m35s
csi-rbdplugin-hnc2v                                  2/2     Running     0          5m35s
csi-rbdplugin-kgbxl                                  2/2     Running     0          5m35s
csi-rbdplugin-provisioner-56b987dd9b-fbpqw           5/5     Running     0          5m35s
csi-rbdplugin-provisioner-56b987dd9b-x2bx2           5/5     Running     0          5m35s
rook-ceph-crashcollector-master01-7b6c875d97-xm24c   1/1     Running     0          90s
rook-ceph-crashcollector-master02-6fd8c9c69b-r7xck   1/1     Running     0          91s
rook-ceph-crashcollector-master03-79c5c959f7-zj67v   1/1     Running     0          104s
rook-ceph-crashcollector-node01-756f867fcc-xf7xc     1/1     Running     0          104s
rook-ceph-crashcollector-node02-5659d55f44-xww6g     1/1     Running     0          112s
rook-ceph-mds-myfs-a-78b54d49d5-l8gq6                1/2     Running     0          91s
rook-ceph-mds-myfs-b-59bc8c9c76-mctvs                1/2     Running     0          90s
rook-ceph-mgr-a-6489f446bf-xxchn                     3/3     Running     0          2m12s
rook-ceph-mgr-b-6b5b9c484d-l6dhj                     3/3     Running     0          2m12s
rook-ceph-mon-a-5c776579c6-2h9wk                     2/2     Running     0          4m15s
rook-ceph-mon-b-567d5bb859-6rt9j                     2/2     Running     0          2m32s
rook-ceph-mon-c-75d55c9657-ktdjn                     2/2     Running     0          2m22s
rook-ceph-operator-5cc8dd7b7b-4svpd                  1/1     Running     0          8m55s
rook-ceph-osd-0-5bc94c64f5-pds45                     2/2     Running     0          104s
rook-ceph-osd-1-674c4cf9c-lg6gm                      2/2     Running     0          104s
rook-ceph-osd-2-6b77fcfc4c-mcq7d                     2/2     Running     0          102s
rook-ceph-osd-prepare-master03-mrwld                 0/1     Completed   0          80s
rook-ceph-osd-prepare-node01-pdf92                   0/1     Completed   0          77s
rook-ceph-osd-prepare-node02-pwww8                   0/1     Completed   0          73s
rook-discover-4z7kv                                  1/1     Running     0          7m26s
rook-discover-5wz46                                  1/1     Running     0          7m26s
rook-discover-8xr2b                                  1/1     Running     0          7m26s

以上是所有组件 pod 完成后的状态,以 rook-ceph-osd-prepare 开头的 pod 用于自动感知集群新挂载硬盘,只不过我们前面手动指定了节点,所以这个不起作用。osd-0、osd-1、osd-2容器必须是存在且正常的,如果上述pod均正常运行成功,则视为集群安装成功。