一、scheduler

[root@tiaoban prometheus]# kubectl describe pod -n kube-system kube-scheduler-master01
Name:                 kube-scheduler-master01
Namespace:            kube-system
...
Labels:               component=kube-scheduler
                      tier=control-plane
...

由上可知,匹配pod对象,lable标签为component=kube-scheduler即可scheduler和controller-manager一样,默认监听0端口,需要注释

所有Master节点修改 /etc/kubernetes/manifests/kube-scheduler.yaml

# cat /etc/kubernetes/manifests/kube-scheduler.yaml
...
  - command:
    - --bind-address=0.0.0.0 # 端口改为0.0.0.0
    #- --port=0 # 注释0端口
...

# 查看暴露的端口
[root@master01 7]# netstat -lntup | grep kube-schedul
tcp6       0      0 :::10259                :::*                    LISTEN      115708/kube-schedul
  • 编写prometheus配置文件,需要注意的是,他默认匹配到的是80端口,需要手动指定为10259端口,同时指定token,否则会提示 server returned HTTP status 400 Bad Request;
    ########## kube-scheduler 监控配置 ##########
    - job_name: 'kube-scheduler'
      kubernetes_sd_configs:
        - role: pod
      scheme: https
      tls_config:
        insecure_skip_verify: true
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
        - source_labels: [__meta_kubernetes_pod_label_component]
          regex: kube-scheduler
          action: keep
        - source_labels: [__meta_kubernetes_pod_ip]
          regex: (.+)
          target_label: __address__
          replacement: "${1}:10259"
        - source_labels: []
          regex: .*
          target_label: __scheme__
          replacement: https
        - source_labels: [__meta_kubernetes_endpoints_name]
          action: replace
          target_label: endpoint
        - source_labels: [__meta_kubernetes_pod_name]
          action: replace
          target_label: pod
        - source_labels: [__meta_kubernetes_service_name]
          action: replace
          target_label: service
        - source_labels: [__meta_kubernetes_namespace]
          action: replace
          target_label: namespace

# 完整配置文件
[root@master01 7]# cat  prometheus-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: monitor
data:
  prometheus.yml: |
    global:
      scrape_interval:     15s
      evaluation_interval: 15s
      external_labels:
        cluster: "kubernetes"

    ############ 数据采集job ###################

    scrape_configs:
    ########## prometheus 监控配置 ##########
    - job_name: prometheus
      static_configs:
      - targets: ['127.0.0.1:9090']
        labels:
          instance: prometheus

    ########## kube-apiserver 监控配置 ##########
    - job_name: kube-apiserver
      kubernetes_sd_configs:
      - role: endpoints
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name]
        action: keep
        regex: default;kubernetes
      - source_labels: [__meta_kubernetes_endpoints_name]
        action: replace
        target_label: endpoint
      - source_labels: [__meta_kubernetes_pod_name]
        action: replace
        target_label: pod
      - source_labels: [__meta_kubernetes_service_name]
        action: replace
        target_label: service
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: namespace

    ########## kube-controller-manager 监控配置 ##########
    - job_name: 'kube-controller-manager'
      # 使用 Kubernetes Pod 发现机制
      kubernetes_sd_configs:
        - role: pod
      # 强制使用 HTTPS 协议
      scheme: https
      # TLS 配置(测试环境跳过验证)
      tls_config:
        insecure_skip_verify: true
      # 使用 ServiceAccount 的 Token 认证
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
        # 保留标签为 component=kube-controller-manager 的 Pod
        - source_labels: [__meta_kubernetes_pod_label_component]
          regex: kube-controller-manager
          action: keep
        # 重写目标地址为 Pod IP + 10257 端口
        - source_labels: [__meta_kubernetes_pod_ip]
          regex: (.+)
          target_label: __address__
          replacement: "${1}:10257"
        # 强制使用 HTTPS 协议(冗余但明确)
        - source_labels: []
          regex: .*
          target_label: __scheme__
          replacement: https
        # 附加元数据标签
        - source_labels: [__meta_kubernetes_endpoints_name]
          action: replace
          target_label: endpoint
        - source_labels: [__meta_kubernetes_pod_name]
          action: replace
          target_label: pod
        - source_labels: [__meta_kubernetes_service_name]
          action: replace
          target_label: service
        - source_labels: [__meta_kubernetes_namespace]
          action: replace
          target_label: namespace

    ########## kube-scheduler 监控配置 ##########
    - job_name: 'kube-scheduler'
      kubernetes_sd_configs:
        - role: pod
      scheme: https
      tls_config:
        insecure_skip_verify: true
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
        - source_labels: [__meta_kubernetes_pod_label_component]
          regex: kube-scheduler
          action: keep
        - source_labels: [__meta_kubernetes_pod_ip]
          regex: (.+)
          target_label: __address__
          replacement: "${1}:10259"
        - source_labels: []
          regex: .*
          target_label: __scheme__
          replacement: https
        - source_labels: [__meta_kubernetes_endpoints_name]
          action: replace
          target_label: endpoint
        - source_labels: [__meta_kubernetes_pod_name]
          action: replace
          target_label: pod
        - source_labels: [__meta_kubernetes_service_name]
          action: replace
          target_label: service
        - source_labels: [__meta_kubernetes_namespace]
          action: replace
          target_label: namespace

    ############ 指定告警规则文件路径位置 ###################
    rule_files:
    - /etc/prometheus/rules/*.rules

# 配置生效
[root@master01 7]# kaf prometheus-config.yaml

# 手动热加载
[root@master01 7]# curl -XPOST http://prometheus.zhang-qing.com/-/reload

二、kube-state-metrics

  • 编写prometheus配置文件,需要注意的是,他默认匹配到的是8080和801两个端口,需要手动指定为8080端口;
#添加如下配置
    ########## kube-state-metrics 监控配置 ##########
    - job_name: kube-state-metrics
      kubernetes_sd_configs:
      - role: endpoints
      relabel_configs:
      - source_labels: [__meta_kubernetes_service_name]
        regex: kube-state-metrics
        action: keep
      - source_labels: [__meta_kubernetes_pod_ip]
        regex: (.+)
        target_label: __address__
        replacement: ${1}:8080
      - source_labels: [__meta_kubernetes_endpoints_name]
        action: replace
        target_label: endpoint
      - source_labels: [__meta_kubernetes_pod_name]
        action: replace
        target_label: pod
      - source_labels: [__meta_kubernetes_service_name]
        action: replace
        target_label: service
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: namespace

#完整配置文件如下
[root@master01 7]# cat prometheus-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: monitor
data:
  prometheus.yml: |
    global:
      scrape_interval:     15s
      evaluation_interval: 15s
      external_labels:
        cluster: "kubernetes"

    ############ 数据采集job ###################

    scrape_configs:
    ########## prometheus 监控配置 ##########
    - job_name: prometheus
      static_configs:
      - targets: ['127.0.0.1:9090']
        labels:
          instance: prometheus

    ########## kube-apiserver 监控配置 ##########
    - job_name: kube-apiserver
      kubernetes_sd_configs:
      - role: endpoints
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name]
        action: keep
        regex: default;kubernetes
      - source_labels: [__meta_kubernetes_endpoints_name]
        action: replace
        target_label: endpoint
      - source_labels: [__meta_kubernetes_pod_name]
        action: replace
        target_label: pod
      - source_labels: [__meta_kubernetes_service_name]
        action: replace
        target_label: service
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: namespace

    ########## kube-controller-manager 监控配置 ##########
    - job_name: 'kube-controller-manager'
      # 使用 Kubernetes Pod 发现机制
      kubernetes_sd_configs:
        - role: pod
      # 强制使用 HTTPS 协议
      scheme: https
      # TLS 配置(测试环境跳过验证)
      tls_config:
        insecure_skip_verify: true
      # 使用 ServiceAccount 的 Token 认证
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
        # 保留标签为 component=kube-controller-manager 的 Pod
        - source_labels: [__meta_kubernetes_pod_label_component]
          regex: kube-controller-manager
          action: keep
        # 重写目标地址为 Pod IP + 10257 端口
        - source_labels: [__meta_kubernetes_pod_ip]
          regex: (.+)
          target_label: __address__
          replacement: "${1}:10257"
        # 强制使用 HTTPS 协议(冗余但明确)
        - source_labels: []
          regex: .*
          target_label: __scheme__
          replacement: https
        # 附加元数据标签
        - source_labels: [__meta_kubernetes_endpoints_name]
          action: replace
          target_label: endpoint
        - source_labels: [__meta_kubernetes_pod_name]
          action: replace
          target_label: pod
        - source_labels: [__meta_kubernetes_service_name]
          action: replace
          target_label: service
        - source_labels: [__meta_kubernetes_namespace]
          action: replace
          target_label: namespace

    ########## kube-scheduler 监控配置 ##########
    - job_name: 'kube-scheduler'
      kubernetes_sd_configs:
        - role: pod
      scheme: https
      tls_config:
        insecure_skip_verify: true
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
        - source_labels: [__meta_kubernetes_pod_label_component]
          regex: kube-scheduler
          action: keep
        - source_labels: [__meta_kubernetes_pod_ip]
          regex: (.+)
          target_label: __address__
          replacement: "${1}:10259"
        - source_labels: []
          regex: .*
          target_label: __scheme__
          replacement: https
        - source_labels: [__meta_kubernetes_endpoints_name]
          action: replace
          target_label: endpoint
        - source_labels: [__meta_kubernetes_pod_name]
          action: replace
          target_label: pod
        - source_labels: [__meta_kubernetes_service_name]
          action: replace
          target_label: service
        - source_labels: [__meta_kubernetes_namespace]
          action: replace
          target_label: namespace

    ########## kube-state-metrics 监控配置 ##########
    - job_name: kube-state-metrics
      kubernetes_sd_configs:
      - role: endpoints
      relabel_configs:
      - source_labels: [__meta_kubernetes_service_name]
        regex: kube-state-metrics
        action: keep
      - source_labels: [__meta_kubernetes_pod_ip]
        regex: (.+)
        target_label: __address__
        replacement: ${1}:8080
      - source_labels: [__meta_kubernetes_endpoints_name]
        action: replace
        target_label: endpoint
      - source_labels: [__meta_kubernetes_pod_name]
        action: replace
        target_label: pod
      - source_labels: [__meta_kubernetes_service_name]
        action: replace
        target_label: service
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: namespace

    ############ 指定告警规则文件路径位置 ###################
    rule_files:
    - /etc/prometheus/rules/*.rules

# 配置生效
[root@master01 7]# kaf prometheus-config.yaml

# 手动热加载
[root@master01 7]# curl -XPOST http://prometheus.zhang-qing.com/-/reload

三、coredns

  • 编写prometheus配置文件,需要注意的是,他默认匹配到的是53端口,需要手动指定为9153端口
    ########## coredns 监控配置 ##########
    - job_name: coredns
      kubernetes_sd_configs:
      - role: endpoints
      relabel_configs:
      - source_labels:
          - __meta_kubernetes_service_label_k8s_app
        regex: kube-dns
        action: keep
      - source_labels: [__meta_kubernetes_pod_ip]
        regex: (.+)
        target_label: __address__
        replacement: ${1}:9153
      - source_labels: [__meta_kubernetes_endpoints_name]
        action: replace
        target_label: endpoint
      - source_labels: [__meta_kubernetes_pod_name]
        action: replace
        target_label: pod
      - source_labels: [__meta_kubernetes_service_name]
        action: replace
        target_label: service
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: namespace

#完整配置文件如下
[root@master01 7]# cat  prometheus-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: monitor
data:
  prometheus.yml: |
    global:
      scrape_interval:     15s
      evaluation_interval: 15s
      external_labels:
        cluster: "kubernetes"

    ############ 数据采集job ###################

    scrape_configs:
    ########## prometheus 监控配置 ##########
    - job_name: prometheus
      static_configs:
      - targets: ['127.0.0.1:9090']
        labels:
          instance: prometheus

    ########## kube-apiserver 监控配置 ##########
    - job_name: kube-apiserver
      kubernetes_sd_configs:
      - role: endpoints
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name]
        action: keep
        regex: default;kubernetes
      - source_labels: [__meta_kubernetes_endpoints_name]
        action: replace
        target_label: endpoint
      - source_labels: [__meta_kubernetes_pod_name]
        action: replace
        target_label: pod
      - source_labels: [__meta_kubernetes_service_name]
        action: replace
        target_label: service
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: namespace

    ########## kube-controller-manager 监控配置 ##########
    - job_name: 'kube-controller-manager'
      # 使用 Kubernetes Pod 发现机制
      kubernetes_sd_configs:
        - role: pod
      # 强制使用 HTTPS 协议
      scheme: https
      # TLS 配置(测试环境跳过验证)
      tls_config:
        insecure_skip_verify: true
      # 使用 ServiceAccount 的 Token 认证
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
        # 保留标签为 component=kube-controller-manager 的 Pod
        - source_labels: [__meta_kubernetes_pod_label_component]
          regex: kube-controller-manager
          action: keep
        # 重写目标地址为 Pod IP + 10257 端口
        - source_labels: [__meta_kubernetes_pod_ip]
          regex: (.+)
          target_label: __address__
          replacement: "${1}:10257"
        # 强制使用 HTTPS 协议(冗余但明确)
        - source_labels: []
          regex: .*
          target_label: __scheme__
          replacement: https
        # 附加元数据标签
        - source_labels: [__meta_kubernetes_endpoints_name]
          action: replace
          target_label: endpoint
        - source_labels: [__meta_kubernetes_pod_name]
          action: replace
          target_label: pod
        - source_labels: [__meta_kubernetes_service_name]
          action: replace
          target_label: service
        - source_labels: [__meta_kubernetes_namespace]
          action: replace
          target_label: namespace

    ########## kube-scheduler 监控配置 ##########
    - job_name: 'kube-scheduler'
      kubernetes_sd_configs:
        - role: pod
      scheme: https
      tls_config:
        insecure_skip_verify: true
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
        - source_labels: [__meta_kubernetes_pod_label_component]
          regex: kube-scheduler
          action: keep
        - source_labels: [__meta_kubernetes_pod_ip]
          regex: (.+)
          target_label: __address__
          replacement: "${1}:10259"
        - source_labels: []
          regex: .*
          target_label: __scheme__
          replacement: https
        - source_labels: [__meta_kubernetes_endpoints_name]
          action: replace
          target_label: endpoint
        - source_labels: [__meta_kubernetes_pod_name]
          action: replace
          target_label: pod
        - source_labels: [__meta_kubernetes_service_name]
          action: replace
          target_label: service
        - source_labels: [__meta_kubernetes_namespace]
          action: replace
          target_label: namespace

    ########## kube-state-metrics 监控配置 ##########
    - job_name: kube-state-metrics
      kubernetes_sd_configs:
      - role: endpoints
      relabel_configs:
      - source_labels: [__meta_kubernetes_service_name]
        regex: kube-state-metrics
        action: keep
      - source_labels: [__meta_kubernetes_pod_ip]
        regex: (.+)
        target_label: __address__
        replacement: ${1}:8080
      - source_labels: [__meta_kubernetes_endpoints_name]
        action: replace
        target_label: endpoint
      - source_labels: [__meta_kubernetes_pod_name]
        action: replace
        target_label: pod
      - source_labels: [__meta_kubernetes_service_name]
        action: replace
        target_label: service
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: namespace

    ########## coredns 监控配置 ##########
    - job_name: coredns
      kubernetes_sd_configs:
      - role: endpoints
      relabel_configs:
      - source_labels:
          - __meta_kubernetes_service_label_k8s_app
        regex: kube-dns
        action: keep
      - source_labels: [__meta_kubernetes_pod_ip]
        regex: (.+)
        target_label: __address__
        replacement: ${1}:9153
      - source_labels: [__meta_kubernetes_endpoints_name]
        action: replace
        target_label: endpoint
      - source_labels: [__meta_kubernetes_pod_name]
        action: replace
        target_label: pod
      - source_labels: [__meta_kubernetes_service_name]
        action: replace
        target_label: service
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: namespace

# 配置生效
[root@master01 7]# kaf prometheus-config.yaml

# 手动热加载
[root@master01 7]# curl -XPOST http://prometheus.zhang-qing.com/-/reload