在k8s集群的master1节点操作
kubectl create ns monitor-sa
cd /root/k8s/monitor
vi /root/k8s/monitor/node-export.yaml
内容
apiVersion: apps/v1
kind: DaemonSet
metadata:
# DaemonSet 名称
name: node-exporter
# 名称空间为 monitor-sa
namespace: monitor-sa
# DaemonSet 标签
labels:
name: node-exporter
spec:
# 定义选择template的标签
selector:
matchLabels:
name: node-exporter
# 定义template标签
template:
metadata:
labels:
name: node-exporter
# 容器配置
spec:
# 使用主机的 pid 名称空间(资源隔离,docker网络有介绍到)
hostPID: true
# 使用主机的ipc 名称空间(资源隔离,docker网络有介绍到)
hostIPC: true
# 使用主机的网络 名称空间(资源隔离,docker网络有介绍到)
hostNetwork: true
containers:
- name: node-exporter
image: prom/node-exporter:v0.16.0
ports:
- containerPort: 9100
resources:
requests:
cpu: 0.15
securityContext:
# 使容器真正拥有root权限
privileged: true
args:
- --path.procfs
- /host/proc
- --path.sysfs
- /host/sys
- --collector.filesystem.ignored-mount-points
- '"^/(sys|proc|dev|host|etc)($|/)"'
# 使用存储卷
volumeMounts:
- name: dev
mountPath: /host/dev
- name: proc
mountPath: /host/proc
- name: sys
mountPath: /host/sys
- name: rootfs
mountPath: /rootfs
# 容忍度设置
tolerations:
- key: "node-role.kubernetes.io/master"
operator: "Exists"
effect: "NoSchedule"
# hostPath存储卷设置
volumes:
- name: proc
hostPath:
path: /proc
- name: dev
hostPath:
path: /dev
- name: sys
hostPath:
path: /sys
- name: rootfs
hostPath:
path: /
kubectl apply -f /root/k8s/monitor/node-export.yaml
# 查看
kubectl get pods -n monitor-sa
curl http://192.168.187.154:9100/metrics
# HELP node_cpu_seconds_total Seconds the cpus spent in each mode.
# TYPE node_cpu_seconds_total counter
node_cpu_seconds_total{cpu="0",mode="idle"} 56136.98
# HELP node_load1 1m load average.
# TYPE node_load1 gauge
node_load1 0.58
# HELP node_cpu_seconds_total Seconds the cpus spent in each mode.
# TYPE node_cpu_seconds_total counter
node_cpu_seconds_total{cpu="0",mode="idle"} 56136.98
# HELP:当前指标的含义
上面表示:在每种模式下,node节点的cpu花费的时间,以s为单位
# TYPE:当前指标的数据类型
上面表示:counter类型
总结:
cpu0上idle进程占用CPU的总时间为56136.98秒
指标类型:counter
# HELP node_load1 1m load average.
# TYPE node_load1 gauge
node_load1 0.58
# HELP:当前指标的含义
上面表示:当前主机在最近1分钟以内的平均负载
# TYPE:当前指标的数据类型
上面表示:gauge类型
总结:
当前主机在最近1分钟以内的平均负载是 0.58
指标类型:gauge类型
计数器:只是采集递增的指标
标准尺寸:统计的指标可增加可减少