一、数据持久化

1.1 prometheus数据持久化

默认Prometheus和Grafana不做数据持久化,那么服务重启以后配置的Dashboard、 账号密码、监控数据等信息将会丢失,所以做数据持久化也是很有必要的。

原始的数据是以 emptyDir 形式存放在pod里面,生命周期与pod相同,出现问题时,容 器重启,监控相关的数据就全部消失了。

# 末尾新增持久化存储,yaml
  retention: 3d         #加这个参数,表示prometheus数据保留的天数,默认会是1天
  storage:
    volumeClaimTemplate:
      spec:
        storageClassName: nfs-storage
        resources:
          requests:
            storage: 50Gi

# 完整配置文件
[root@master01 ~]# vim /root/7/kube-prometheus/manifests/prometheus-prometheus.yaml
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
  labels:
    app.kubernetes.io/component: prometheus
    app.kubernetes.io/instance: k8s
    app.kubernetes.io/name: prometheus
    app.kubernetes.io/part-of: kube-prometheus
    app.kubernetes.io/version: 2.54.1
  name: k8s
  namespace: monitoring
spec:
  alerting:
    alertmanagers:
    - apiVersion: v2
      name: alertmanager-main
      namespace: monitoring
      port: web
  enableFeatures: []
  externalLabels: {}
  image: registry.cn-hangzhou.aliyuncs.com/github_images1024/prometheus:v2.54.1
  nodeSelector:
    kubernetes.io/os: linux
  podMetadata:
    labels:
      app.kubernetes.io/component: prometheus
      app.kubernetes.io/instance: k8s
      app.kubernetes.io/name: prometheus
      app.kubernetes.io/part-of: kube-prometheus
      app.kubernetes.io/version: 2.54.1
  podMonitorNamespaceSelector: {}
  podMonitorSelector: {}
  probeNamespaceSelector: {}
  probeSelector: {}
  replicas: 2
  resources:
    requests:
      memory: 400Mi
  ruleNamespaceSelector: {}
  ruleSelector: {}
  scrapeConfigNamespaceSelector: {}
  scrapeConfigSelector: {}
  securityContext:
    fsGroup: 2000
    runAsNonRoot: true
    runAsUser: 1000
  serviceAccountName: prometheus-k8s
  serviceMonitorNamespaceSelector: {}
  serviceMonitorSelector: {}
  version: 2.54.1

# 新增持久化存储,yaml 末尾添加
  retention: 3d         #加这个参数,表示prometheus数据保留的天数,默认会是1天
  storage:
    volumeClaimTemplate:
      spec:
        storageClassName: nfs-storage
        resources:
          requests:
            storage: 50Gi

# 应用
[root@master01 ~]# cd /root/7
[root@master01 7]# kaf kube-prometheus/manifests/prometheus-prometheus.yaml

# 验证,新增两个pvc提供给prometheus做持久化
[root@master01 7]# kg pvc -n monitoring
NAME                                 STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE
prometheus-k8s-db-prometheus-k8s-0   Bound    pvc-6fb3de47-fe60-4204-8336-3d81801c7b23   50Gi       RWO            nfs-storage    34s
prometheus-k8s-db-prometheus-k8s-1   Bound    pvc-2572056e-af1a-4797-8b51-fe380f10d927   50Gi       RWO            nfs-storage    34s

1.2 grafana数据持久化

先手动创建grafana的持久化PVC:

[root@master01 ~]# cd /root/7/kube-prometheus/manifests
[root@matser manifests]# vim grafana-pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: grafana-pvc
  namespace: monitoring
spec:
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: 5Gi
  storageClassName: nfs-storage

# 应用
[root@matser manifests]# kaf grafana-pvc.yaml

# 验证
[root@master01 manifests]# kg pvc -n monitoring | grep grafana
grafana-pvc                          Bound    pvc-1292acfe-5a12-4b7f-89ff-3ef6090f2861   5Gi        RWX            nfs-storage    32s

再修改grafana的deployment的yaml文件

[root@master01 ~]# cd /root/7/kube-prometheus/manifests
[root@matser manifests]# vim grafana-deployment.yaml
...
...
# 注释掉第160行和第161行
160       - emptyDir: {}
161         name: grafana-storage
# 在第161行下面新增如下内容信息
      - name: grafana-storage
        persistentVolumeClaim:
          claimName: grafana-pvc

# 完整配置文件
[root@matser manifests]# cat grafana-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app.kubernetes.io/component: grafana
    app.kubernetes.io/name: grafana
    app.kubernetes.io/part-of: kube-prometheus
    app.kubernetes.io/version: 11.2.0
  name: grafana
  namespace: monitoring
spec:
  replicas: 1
  selector:
    matchLabels:
      app.kubernetes.io/component: grafana
      app.kubernetes.io/name: grafana
      app.kubernetes.io/part-of: kube-prometheus
  template:
    metadata:
      annotations:
        checksum/grafana-config: c4d088078bb55176e3910a42b41ecc08
        checksum/grafana-dashboardproviders: b66e063b0e9d7b9e152e066f0ab965ee
        checksum/grafana-datasources: 495c78a90b81354c8feeece92f6f5466
      labels:
        app.kubernetes.io/component: grafana
        app.kubernetes.io/name: grafana
        app.kubernetes.io/part-of: kube-prometheus
        app.kubernetes.io/version: 11.2.0
    spec:
      automountServiceAccountToken: false
      containers:
      - env: []
        image: registry.cn-hangzhou.aliyuncs.com/github_images1024/grafana:11.2.0
        name: grafana
        ports:
        - containerPort: 3000
          name: http
        readinessProbe:
          httpGet:
            path: /api/health
            port: http
        resources:
          limits:
            cpu: 200m
            memory: 200Mi
          requests:
            cpu: 100m
            memory: 100Mi
        securityContext:
          allowPrivilegeEscalation: false
          capabilities:
            drop:
            - ALL
          readOnlyRootFilesystem: true
          seccompProfile:
            type: RuntimeDefault
        volumeMounts:
        - mountPath: /var/lib/grafana
          name: grafana-storage
          readOnly: false
        - mountPath: /etc/grafana/provisioning/datasources
          name: grafana-datasources
          readOnly: false
        - mountPath: /etc/grafana/provisioning/dashboards
          name: grafana-dashboards
          readOnly: false
        - mountPath: /tmp
          name: tmp-plugins
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/alertmanager-overview
          name: grafana-dashboard-alertmanager-overview
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/apiserver
          name: grafana-dashboard-apiserver
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/cluster-total
          name: grafana-dashboard-cluster-total
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/controller-manager
          name: grafana-dashboard-controller-manager
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/grafana-overview
          name: grafana-dashboard-grafana-overview
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/k8s-resources-cluster
          name: grafana-dashboard-k8s-resources-cluster
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/k8s-resources-multicluster
          name: grafana-dashboard-k8s-resources-multicluster
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/k8s-resources-namespace
          name: grafana-dashboard-k8s-resources-namespace
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/k8s-resources-node
          name: grafana-dashboard-k8s-resources-node
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/k8s-resources-pod
          name: grafana-dashboard-k8s-resources-pod
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/k8s-resources-workload
          name: grafana-dashboard-k8s-resources-workload
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/k8s-resources-workloads-namespace
          name: grafana-dashboard-k8s-resources-workloads-namespace
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/kubelet
          name: grafana-dashboard-kubelet
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/namespace-by-pod
          name: grafana-dashboard-namespace-by-pod
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/namespace-by-workload
          name: grafana-dashboard-namespace-by-workload
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/node-cluster-rsrc-use
          name: grafana-dashboard-node-cluster-rsrc-use
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/node-rsrc-use
          name: grafana-dashboard-node-rsrc-use
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/nodes-darwin
          name: grafana-dashboard-nodes-darwin
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/nodes
          name: grafana-dashboard-nodes
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage
          name: grafana-dashboard-persistentvolumesusage
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/pod-total
          name: grafana-dashboard-pod-total
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/prometheus-remote-write
          name: grafana-dashboard-prometheus-remote-write
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/prometheus
          name: grafana-dashboard-prometheus
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/proxy
          name: grafana-dashboard-proxy
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/scheduler
          name: grafana-dashboard-scheduler
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/workload-total
          name: grafana-dashboard-workload-total
          readOnly: false
        - mountPath: /etc/grafana
          name: grafana-config
          readOnly: false
      nodeSelector:
        kubernetes.io/os: linux
      securityContext:
        fsGroup: 65534
        runAsGroup: 65534
        runAsNonRoot: true
        runAsUser: 65534
      serviceAccountName: grafana
      volumes:
      #- emptyDir: {}
      #  name: grafana-storage
      - name: grafana-storage
        persistentVolumeClaim:
          claimName: grafana-pvc
      - name: grafana-datasources
        secret:
          secretName: grafana-datasources
      - configMap:
          name: grafana-dashboards
        name: grafana-dashboards
      - emptyDir:
          medium: Memory
        name: tmp-plugins
      - configMap:
          name: grafana-dashboard-alertmanager-overview
        name: grafana-dashboard-alertmanager-overview
      - configMap:
          name: grafana-dashboard-apiserver
        name: grafana-dashboard-apiserver
      - configMap:
          name: grafana-dashboard-cluster-total
        name: grafana-dashboard-cluster-total
      - configMap:
          name: grafana-dashboard-controller-manager
        name: grafana-dashboard-controller-manager
      - configMap:
          name: grafana-dashboard-grafana-overview
        name: grafana-dashboard-grafana-overview
      - configMap:
          name: grafana-dashboard-k8s-resources-cluster
        name: grafana-dashboard-k8s-resources-cluster
      - configMap:
          name: grafana-dashboard-k8s-resources-multicluster
        name: grafana-dashboard-k8s-resources-multicluster
      - configMap:
          name: grafana-dashboard-k8s-resources-namespace
        name: grafana-dashboard-k8s-resources-namespace
      - configMap:
          name: grafana-dashboard-k8s-resources-node
        name: grafana-dashboard-k8s-resources-node
      - configMap:
          name: grafana-dashboard-k8s-resources-pod
        name: grafana-dashboard-k8s-resources-pod
      - configMap:
          name: grafana-dashboard-k8s-resources-workload
        name: grafana-dashboard-k8s-resources-workload
      - configMap:
          name: grafana-dashboard-k8s-resources-workloads-namespace
        name: grafana-dashboard-k8s-resources-workloads-namespace
      - configMap:
          name: grafana-dashboard-kubelet
        name: grafana-dashboard-kubelet
      - configMap:
          name: grafana-dashboard-namespace-by-pod
        name: grafana-dashboard-namespace-by-pod
      - configMap:
          name: grafana-dashboard-namespace-by-workload
        name: grafana-dashboard-namespace-by-workload
      - configMap:
          name: grafana-dashboard-node-cluster-rsrc-use
        name: grafana-dashboard-node-cluster-rsrc-use
      - configMap:
          name: grafana-dashboard-node-rsrc-use
        name: grafana-dashboard-node-rsrc-use
      - configMap:
          name: grafana-dashboard-nodes-darwin
        name: grafana-dashboard-nodes-darwin
      - configMap:
          name: grafana-dashboard-nodes
        name: grafana-dashboard-nodes
      - configMap:
          name: grafana-dashboard-persistentvolumesusage
        name: grafana-dashboard-persistentvolumesusage
      - configMap:
          name: grafana-dashboard-pod-total
        name: grafana-dashboard-pod-total
      - configMap:
          name: grafana-dashboard-prometheus-remote-write
        name: grafana-dashboard-prometheus-remote-write
      - configMap:
          name: grafana-dashboard-prometheus
        name: grafana-dashboard-prometheus
      - configMap:
          name: grafana-dashboard-proxy
        name: grafana-dashboard-proxy
      - configMap:
          name: grafana-dashboard-scheduler
        name: grafana-dashboard-scheduler
      - configMap:
          name: grafana-dashboard-workload-total
        name: grafana-dashboard-workload-total
      - name: grafana-config
        secret:
          secretName: grafana-config

为了固定grafana的登录密码,添加下面环境变量

# 在第41行下面添加环境变量(行号是基于在上面操作完成的基础上)
        env:
        - name: GF_SECURITY_ADMIN_USER
          value: admin
        - name: GF_SECURITY_ADMIN_PASSWORD
          value: admin

# 完整配置文件
[root@master01 manifests]# cat grafana-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app.kubernetes.io/component: grafana
    app.kubernetes.io/name: grafana
    app.kubernetes.io/part-of: kube-prometheus
    app.kubernetes.io/version: 11.2.0
  name: grafana
  namespace: monitoring
spec:
  replicas: 1
  selector:
    matchLabels:
      app.kubernetes.io/component: grafana
      app.kubernetes.io/name: grafana
      app.kubernetes.io/part-of: kube-prometheus
  template:
    metadata:
      annotations:
        checksum/grafana-config: c4d088078bb55176e3910a42b41ecc08
        checksum/grafana-dashboardproviders: b66e063b0e9d7b9e152e066f0ab965ee
        checksum/grafana-datasources: 495c78a90b81354c8feeece92f6f5466
      labels:
        app.kubernetes.io/component: grafana
        app.kubernetes.io/name: grafana
        app.kubernetes.io/part-of: kube-prometheus
        app.kubernetes.io/version: 11.2.0
    spec:
      automountServiceAccountToken: false
      containers:
      - env: []
        image: registry.cn-hangzhou.aliyuncs.com/github_images1024/grafana:11.2.0
        name: grafana
        ports:
        - containerPort: 3000
          name: http
        readinessProbe:
          httpGet:
            path: /api/health
            port: http
        env:
        - name: GF_SECURITY_ADMIN_USER
          value: admin
        - name: GF_SECURITY_ADMIN_PASSWORD
          value: admin
        resources:
          limits:
            cpu: 200m
            memory: 200Mi
          requests:
            cpu: 100m
            memory: 100Mi
        securityContext:
          allowPrivilegeEscalation: false
          capabilities:
            drop:
            - ALL
          readOnlyRootFilesystem: true
          seccompProfile:
            type: RuntimeDefault
        volumeMounts:
        - mountPath: /var/lib/grafana
          name: grafana-storage
          readOnly: false
        - mountPath: /etc/grafana/provisioning/datasources
          name: grafana-datasources
          readOnly: false
        - mountPath: /etc/grafana/provisioning/dashboards
          name: grafana-dashboards
          readOnly: false
        - mountPath: /tmp
          name: tmp-plugins
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/alertmanager-overview
          name: grafana-dashboard-alertmanager-overview
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/apiserver
          name: grafana-dashboard-apiserver
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/cluster-total
          name: grafana-dashboard-cluster-total
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/controller-manager
          name: grafana-dashboard-controller-manager
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/grafana-overview
          name: grafana-dashboard-grafana-overview
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/k8s-resources-cluster
          name: grafana-dashboard-k8s-resources-cluster
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/k8s-resources-multicluster
          name: grafana-dashboard-k8s-resources-multicluster
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/k8s-resources-namespace
          name: grafana-dashboard-k8s-resources-namespace
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/k8s-resources-node
          name: grafana-dashboard-k8s-resources-node
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/k8s-resources-pod
          name: grafana-dashboard-k8s-resources-pod
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/k8s-resources-workload
          name: grafana-dashboard-k8s-resources-workload
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/k8s-resources-workloads-namespace
          name: grafana-dashboard-k8s-resources-workloads-namespace
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/kubelet
          name: grafana-dashboard-kubelet
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/namespace-by-pod
          name: grafana-dashboard-namespace-by-pod
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/namespace-by-workload
          name: grafana-dashboard-namespace-by-workload
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/node-cluster-rsrc-use
          name: grafana-dashboard-node-cluster-rsrc-use
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/node-rsrc-use
          name: grafana-dashboard-node-rsrc-use
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/nodes-darwin
          name: grafana-dashboard-nodes-darwin
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/nodes
          name: grafana-dashboard-nodes
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage
          name: grafana-dashboard-persistentvolumesusage
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/pod-total
          name: grafana-dashboard-pod-total
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/prometheus-remote-write
          name: grafana-dashboard-prometheus-remote-write
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/prometheus
          name: grafana-dashboard-prometheus
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/proxy
          name: grafana-dashboard-proxy
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/scheduler
          name: grafana-dashboard-scheduler
          readOnly: false
        - mountPath: /grafana-dashboard-definitions/0/workload-total
          name: grafana-dashboard-workload-total
          readOnly: false
        - mountPath: /etc/grafana
          name: grafana-config
          readOnly: false
      nodeSelector:
        kubernetes.io/os: linux
      securityContext:
        fsGroup: 65534
        runAsGroup: 65534
        runAsNonRoot: true
        runAsUser: 65534
      serviceAccountName: grafana
      volumes:
      #- emptyDir: {}
      #  name: grafana-storage
      - name: grafana-storage
        persistentVolumeClaim:
          claimName: grafana-pvc
      - name: grafana-datasources
        secret:
          secretName: grafana-datasources
      - configMap:
          name: grafana-dashboards
        name: grafana-dashboards
      - emptyDir:
          medium: Memory
        name: tmp-plugins
      - configMap:
          name: grafana-dashboard-alertmanager-overview
        name: grafana-dashboard-alertmanager-overview
      - configMap:
          name: grafana-dashboard-apiserver
        name: grafana-dashboard-apiserver
      - configMap:
          name: grafana-dashboard-cluster-total
        name: grafana-dashboard-cluster-total
      - configMap:
          name: grafana-dashboard-controller-manager
        name: grafana-dashboard-controller-manager
      - configMap:
          name: grafana-dashboard-grafana-overview
        name: grafana-dashboard-grafana-overview
      - configMap:
          name: grafana-dashboard-k8s-resources-cluster
        name: grafana-dashboard-k8s-resources-cluster
      - configMap:
          name: grafana-dashboard-k8s-resources-multicluster
        name: grafana-dashboard-k8s-resources-multicluster
      - configMap:
          name: grafana-dashboard-k8s-resources-namespace
        name: grafana-dashboard-k8s-resources-namespace
      - configMap:
          name: grafana-dashboard-k8s-resources-node
        name: grafana-dashboard-k8s-resources-node
      - configMap:
          name: grafana-dashboard-k8s-resources-pod
        name: grafana-dashboard-k8s-resources-pod
      - configMap:
          name: grafana-dashboard-k8s-resources-workload
        name: grafana-dashboard-k8s-resources-workload
      - configMap:
          name: grafana-dashboard-k8s-resources-workloads-namespace
        name: grafana-dashboard-k8s-resources-workloads-namespace
      - configMap:
          name: grafana-dashboard-kubelet
        name: grafana-dashboard-kubelet
      - configMap:
          name: grafana-dashboard-namespace-by-pod
        name: grafana-dashboard-namespace-by-pod
      - configMap:
          name: grafana-dashboard-namespace-by-workload
        name: grafana-dashboard-namespace-by-workload
      - configMap:
          name: grafana-dashboard-node-cluster-rsrc-use
        name: grafana-dashboard-node-cluster-rsrc-use
      - configMap:
          name: grafana-dashboard-node-rsrc-use
        name: grafana-dashboard-node-rsrc-use
      - configMap:
          name: grafana-dashboard-nodes-darwin
        name: grafana-dashboard-nodes-darwin
      - configMap:
          name: grafana-dashboard-nodes
        name: grafana-dashboard-nodes
      - configMap:
          name: grafana-dashboard-persistentvolumesusage
        name: grafana-dashboard-persistentvolumesusage
      - configMap:
          name: grafana-dashboard-pod-total
        name: grafana-dashboard-pod-total
      - configMap:
          name: grafana-dashboard-prometheus-remote-write
        name: grafana-dashboard-prometheus-remote-write
      - configMap:
          name: grafana-dashboard-prometheus
        name: grafana-dashboard-prometheus
      - configMap:
          name: grafana-dashboard-proxy
        name: grafana-dashboard-proxy
      - configMap:
          name: grafana-dashboard-scheduler
        name: grafana-dashboard-scheduler
      - configMap:
          name: grafana-dashboard-workload-total
        name: grafana-dashboard-workload-total
      - name: grafana-config
        secret:
          secretName: grafana-config

修改完成上面内容后重新应用

[root@master01 manifests]# kaf grafana-deployment.yaml

# 验证
[root@master01 manifests]# kgp -n monitoring | grep grafana
grafana-7ff776777c-wrs8d               1/1     Running   0          58s

二、优化配置

grafana-kubernetes-app 插件

# 进入grafana的pod下载插件
[root@master01 manifests]# kubectl exec -it $(kubectl get pod -n monitoring -l app.kubernetes.io/name=grafana \
-o jsonpath='{.items[*].metadata.name}') -n monitoring -- sh

/usr/share/grafana $ grafana-cli plugins install grafana-piechart-panel
/usr/share/grafana $ grafana-cli plugins install camptocamp-prometheus-alertmanager-datasource
/usr/share/grafana $ grafana-cli plugins install grafana-kubernetes-app

# 插件生效
[root@master01 manifests]# kubectl rollout restart deploy grafana -n monitoring

grafana dashboard 时区默认为UTC,比北京时间慢了8小时,很不便于日常监控查看,这里可以修改

[root@master01 ~]# cd /root/7/kube-prometheus/manifests
[root@master01 manifests]# sed -i 's/UTC/UTC+8/g'  grafana-dashboardDefinitions.yaml
[root@master01 manifests]# sed -i 's/utc/UTC+8/g'  grafana-dashboardDefinitions.yaml
[root@master01 manifests]# kubectl apply -f grafana-dashboardDefinitions.yaml

三、如何修改alert rule?

3.1 方式一:在线修改rule规则

# edit
[root@master01 manifests]# kubectl edit cm  prometheus-k8s-rulefiles-0  -n monitoring

3.2 方式二:修改配置文件方式

[root@master01 ~]# cd /root/7/kube-prometheus/manifests
# 自己根据需要修改配置文件
[root@master01 manifests]# vim prometheus-prometheusRule.yaml

#重新应用
[root@master01 manifests]# kubectl apply prometheus-prometheusRule.yaml

四、AlterManager报警配置

这里给出精简版本,详细可以参考 kube-prometheus/manifests/alertmanager-secret.yaml

cat << EOF > alertmanager-prometheusAlert.yaml
apiVersion: v1
kind: Secret
metadata:
  labels:
    app.kubernetes.io/component: alert-router
    app.kubernetes.io/instance: main
    app.kubernetes.io/name: alertmanager
    app.kubernetes.io/part-of: kube-prometheus
    app.kubernetes.io/version: 0.24.0
  name: alertmanager-main
  namespace: monitoring
stringData:
  alertmanager.yaml: |-
    global:
      resolve_timeout: 5m
    route:
      group_by: ['env','instance','type','group','job','alertname','cluster']
      group_wait: 10s
      group_interval: 2m
      repeat_interval: 10m
      receiver: 'webhook'
    receivers:
    - name: 'webhook'
      webhook_configs:
      - url: 'http://prometheus-alert-center.monitor.svc:8080/prometheusalert?type=wx&tpl=prometheus-wx&wxurqq.com/cgi-bin/webhook/send?key=9d8866d6-ab55-48f3-8336-786325667640&at=ZhangDaDan,ZHDYA'
        send_resolved: true
type: Opaque
EOF

应用

kubectl apply -f alertmanager-prometheusAlert.yaml