二、Docker-compose 安装 Prometheus
1、【方式一】手动创建 docker-compose 和 配置文件
创建alertmanager的配置文件 - config.yml
新建grafana的配置文件 - config.monitoring
新建prometheus的配置文件 - prometheus.yml
localhost
ip:192.168.11.61
2核4g
Ubuntu 20.04
docker 版本23.0.1
docker-compose版本1.29.2
-
- #创建文件夹,父级目录也被自动创建
- sudo mkdir -p /etc/docker
- sudo tee /etc/docker/daemon.json <<-'EOF'
- {
- "registry-mirrors": ["https://n14or9zx.mirror.aliyuncs.com",
- "https://mirror.ccs.tencentyun.com",
- "http://registry.docker-cn.com",
- "http://docker.mirrors.ustc.edu.cn",
- "http://hub-mirror.c.163.com"],
- "insecure-registries": [
- "registry.docker-cn.com",
- "docker.mirrors.ustc.edu.cn"
- ],
- "log-driver": "json-file",
- "log-opts": {
- "max-size": "500m"
- }
- }
- EOF
- #设置下载目录
- export DOWNLOAD_URL="http://mirrors.163.com/docker-ce"
- curl -fssL https://get.docker.com/ | sh
- docker -v
- 或
- systemctl status docker
- # 下载
- curl -L https://get.daocloud.io/docker/compose/releases/download/1.29.2/docker-compose-`uname -s`-`uname -m` > /usr/local/bin/docker-compose
-
- # 设置文件具备执行权限
- chmod +x /usr/local/bin/docker-compose
-
- # 查看版本
- docker-compose --version
- #切换到root用户
- sudo -i
- mkdir /data/docker-prometheus -p
- mkdir /data/docker-prometheus/{grafana,prometheus,alertmanager} -p
- cd /data/docker-prometheus/
- global:
- #163服务器
- smtp_smarthost: 'smtp.163.com:465'
- #发邮件的邮箱
- smtp_from: 'cdring@163.com'
- #发邮件的邮箱用户名,也就是你的邮箱
- smtp_auth_username: 'cdring@163.com'
- #发邮件的邮箱密码
- smtp_auth_password: 'your-password'
- #进行tls验证
- smtp_require_tls: false
-
- route:
- group_by: ['alertname']
- # 当收到告警的时候,等待group_wait配置的时间,看是否还有告警,如果有就一起发出去
- group_wait: 10s
- # 如果上次告警信息发送成功,此时又来了一个新的告警数据,则需要等待group_interval配置的时间才可以发送出去
- group_interval: 10s
- # 如果上次告警信息发送成功,且问题没有解决,则等待 repeat_interval配置的时间再次发送告警数据
- repeat_interval: 10m
- # 全局报警组,这个参数是必选的
- receiver: email
-
- receivers:
- - name: 'email'
- #收邮件的邮箱
- email_configs:
- - to: 'cdring@163.com'
- inhibit_rules:
- - source_match:
- severity: 'critical'
- target_match:
- severity: 'warning'
- equal: ['alertname', 'dev', 'instance']
- # admin登录密码为password
- GF_SECURITY_ADMIN_PASSWORD=password
- GF_USERS_ALLOW_SIGN_UP=false
- # 全局配置
- global:
- scrape_interval: 15s # 将搜刮间隔设置为每15秒一次。默认是每1分钟一次。
- evaluation_interval: 15s # 每15秒评估一次规则。默认是每1分钟一次。
-
- # Alertmanager 配置
- alerting:
- alertmanagers:
- - static_configs:
- - targets: ['alertmanager:9093']
-
- # 报警(触发器)配置
- rule_files:
- - "alert.yml"
-
- # 搜刮配置
- scrape_configs:
- - job_name: 'prometheus'
- # 覆盖全局默认值,每15秒从该作业中刮取一次目标
- scrape_interval: 15s
- static_configs:
- - targets: ['localhost:9090']
- - job_name: 'alertmanager'
- scrape_interval: 15s
- static_configs:
- - targets: ['alertmanager:9093']
- - job_name: 'cadvisor'
- scrape_interval: 15s
- static_configs:
- - targets: ['cadvisor:8080']
- labels:
- instance: Prometheus服务器
-
- - job_name: 'node-exporter'
- scrape_interval: 15s
- static_configs:
- - targets: ['node_exporter:9100']
- labels:
- instance: Prometheus服务器
- groups:
- - name: Prometheus alert
- rules:
- # 对任何实例超过30秒无法联系的情况发出警报
- - alert: 服务告警
- expr: up == 0
- for: 30s
- labels:
- severity: critical
- annotations:
- summary: "服务异常,实例:{{ $labels.instance }}"
- description: "{{ $labels.job }} 服务已关闭"
- version: '3.3'
-
- # 存储卷
- volumes:
- prometheus_data: {}
- grafana_data: {}
-
- networks:
- monitoring:
- driver: bridge
-
- services:
- prometheus:
- image: prom/prometheus:v2.37.6
- container_name: prometheus
- restart: always
- volumes:
- - /etc/localtime:/etc/localtime:ro # 本地时区挂载在镜像中
- - ./prometheus/:/etc/prometheus/
- - prometheus_data:/prometheus # 数据存储位置
- command:
- - '--config.file=/etc/prometheus/prometheus.yml'
- - '--storage.tsdb.path=/prometheus'
- - '--web.console.libraries=/usr/share/prometheus/console_libraries' # 控制台库
- - '--web.console.templates=/usr/share/prometheus/consoles' # 控制台模板
- #热加载配置
- - '--web.enable-lifecycle'
- #api配置
- #- '--web.enable-admin-api'
- #历史数据最大保留时间,默认15天
- - '--storage.tsdb.retention.time=30d'
- networks:
- - monitoring
- links:
- - alertmanager
- - cadvisor
- - node_exporter
- expose:
- - '9090'
- ports:
- - 9090:9090
- depends_on:
- - cadvisor # 等待cadvisor启动完成后prometheus再启动
-
- alertmanager:
- image: prom/alertmanager:v0.25.0
- container_name: alertmanager
- restart: always
- volumes:
- - /etc/localtime:/etc/localtime:ro
- - ./alertmanager/:/etc/alertmanager/
- command:
- - '--config.file=/etc/alertmanager/config.yml'
- - '--storage.path=/alertmanager'
- networks:
- - monitoring
- expose:
- - '9093'
- ports:
- - 9093:9093
-
- # 监控容器
- cadvisor:
- image: google/cadvisor:latest
- container_name: cadvisor
- restart: always
- volumes:
- - /etc/localtime:/etc/localtime:ro
- - /:/rootfs:ro
- - /var/run:/var/run:rw
- - /sys:/sys:ro
- - /var/lib/docker/:/var/lib/docker:ro
- networks:
- - monitoring
- expose:
- - '8080'
-
- node_exporter:
- image: prom/node-exporter:v1.5.0
- container_name: node-exporter
- restart: always
- volumes:
- - /etc/localtime:/etc/localtime:ro
- - /proc:/host/proc:ro
- - /sys:/host/sys:ro
- - /:/rootfs:ro
- command:
- - '--path.procfs=/host/proc'
- - '--path.sysfs=/host/sys'
- - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc|rootfs/var/lib/docker)($$|/)'
- networks:
- - monitoring
- ports:
- - '9100:9100'
-
- grafana:
- image: grafana/grafana:9.4.3
- container_name: grafana
- restart: always
- volumes:
- - /etc/localtime:/etc/localtime:ro
- - grafana_data:/var/lib/grafana
- - ./grafana/provisioning/:/etc/grafana/provisioning/
- env_file:
- - ./grafana/config.monitoring
- networks:
- - monitoring
- links:
- - prometheus
- ports:
- - 3000:3000
- depends_on:
- - prometheus
- mkdir /mnt/docker/
- cd /mnt/docker/
- git clone https://gitee.com/linge365/docker-prometheus.git
- cd docker-prometheus
- cd /data/docker-prometheus
- docker-compose up -d
docker ps
ss -lntp|egrep "3000|9090|9100|9093"
| 应用 | 访问地址 | 账号密码 |
| prometheus | http://xxx.116.6.228:9090 | 无 |
| grafana | http://xxx.116.6.228:3000 | admin/password |
| altermanager | http://xxx.116.6.228:9093 | 无 |
| node-exporter | http://xxx.116.6.228:9100/metrics | 无 |











