Prometheus 二进制安装
下载解压二进制程序
1 2 3 4 5 6 7 8 9 10 11 12 mkdir /apps && cd /apps # wget https://github.com/prometheus/alertmanager/releases/download/v0.24.0/alertmanager-0.24.0.linux-amd64.tar.gz wget https://github.com/prometheus/prometheus/releases/download/v2.35.0/prometheus-2.35.0.linux-amd64.tar.gz tar xf prometheus-2.35.0.linux-amd64.tar.gz # 创建软连接 ln -sv prometheus-2.35.0.linux-amd64 /apps/prometheus # 检查配置文件 ./promtool check config prometheus.yml
创建 prometheus service 启动脚本
vim /etc/systemd/system/prometheus.service
1 2 3 4 5 6 7 8 9 10 11 12 [Unit] Description=Prometheus Server Documentation=https://prometheus.io/docs/introduction/overview/ After=network.target [Service] Restart=on-failure WorkingDirectory=/apps/prometheus/ ExecStart=/apps/prometheus/prometheus --config.file=/apps/prometheus/prometheus.yml [Install] WantedBy=multi-user.target
启动 prometheus 服务
1 2 3 systemctl daemon-reload systemctl restart prometheus systemctl enable prometheus
node export 二进制安装
下载解压二进制程序
1 2 3 4 5 6 cd /apps wget https://github.com/prometheus/node_exporter/releases/download/v1.3.1/node_exporter-1.3.1.linux-amd64.tar.gz tar xf node_exporter-1.3.1.linux-amd64.tar.gz # 创建软连接 ln -sv /apps/node_exporter-1.3.1.linux-amd64 /apps/node_exporter
创建 node-exporter service 启动脚本
vim /etc/systemd/system/node-exporter.service
1 2 3 4 5 6 7 8 9 10 [Unit] Description=Prometheus Node Exporter After=network.target [Service] ExecStart=/apps/node_exporter/node_exporter [Install] WantedBy=multi-user.target
启动 node exporter 服务
1 2 3 systemctl daemon-reload systemctl restart node-exporter systemctl enable node-exporter.service
添加node节点数据收集
vim /apps/prometheus/prometheus.yml
1 2 3 - job_name: 'prometheus-node' static_configs: - targets: ['192.168.15.100:9100']
重启服务
systemctl restart prometheus.service
Alertmanager 二进制安装
下载解压二进制程序
1 2 3 4 5 6 cd /apps wget https://github.com/prometheus/alertmanager/releases/download/v0.24.0/alertmanager-0.24.0.linux-amd64.tar.gz tar xf alertmanager-0.24.0.linux-amd64.tar.gz # 创建软连接 ln -sv /apps/alertmanager-0.24.0.linux-amd64 /apps/alertmanager
创建 alertmanager service 启动脚本
vim /etc/systemd/system/alertmanager.service
1 2 3 4 5 6 7 8 9 10 11 [Unit] Description=alertmanager Documentation=https://github.com/prometheus/alertmanager After=network.target [Service] Type=simple User=root ExecStart=/apps/alertmanager/alertmanager --config.file=/apps/alertmanager/alertmanager.yml Restart=on-failure [Install] WantedBy=multi-user.target
编辑配置文件 (邮件)
vim /apps/alertmanager/alertmanager.yml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 global: resolve_timeout: 5m smtp_from: 'xxxx@qq.com' smtp_smarthost: 'smtp.qq.com:465' smtp_auth_username: 'xxxx@qq.com' smtp_auth_password: 'uuxxxxdvnxzbiaf' smtp_require_tls: false smtp_hello: '@qq.com' route: group_by: ['alertname' ] group_wait: 10s group_interval: 2m repeat_interval: 5m receiver: 'web.hook' receivers: - name: 'web.hook' email_configs: - to: 'xxxx@qq.com' inhibit_rules: - source_match: severity: 'critical' target_match: severity: 'warning' equal: ['alertname' , 'dev' , 'instance' ]
编辑配置文件 (企业微信)
vim /apps/alertmanager/alertmanager.yml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 global: resolve_timeout: 5m route: group_by: ['alertname' ] group_wait: 10s group_interval: 10s repeat_interval: 60m receiver: 'wechat' receivers: - name: 'wechat' webhook_configs: - url: 'http://172.20.254.138:9080/wechatbot' send_resolved: true inhibit_rules: - source_match: severity: 'critical' target_match: severity: 'warning' equal: ['alertname' , 'dev' , 'instance' ]
启动 Alertmanager服务
1 2 3 systemctl daemon-reload systemctl start alertmanager systemctl enable alertmanager
安装 Adapter 适配器
1 git clone https://github.com/lckei/prometheus-wechatbot-webhook.git
编辑 Dockerfile
1 2 3 4 5 6 7 8 9 10 11 cat Dockerfile FROM alpine:latestLABEL maintainer="kei" ENV VERSION 1.0 WORKDIR /apps ADD src/app /apps/app RUN chmod +x /apps/app ADD src/wechatbot2.tmpl /apps/wechatbot.tmpl EXPOSE 9080 CMD ["/apps/app" ]
编辑告警模版文件
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 {{ define "wechatbot.url.api" }}https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=2730f396-a070-4618-aedb-d290ad132ffc{{end}} {{- if gt (len .Alerts.Firing) 0 -}} {{- range $index, $alert := .Alerts -}} {{- if eq $index 0 }} ==========异常告警========== 告警类型: {{ $alert.Labels.alertname }} 告警级别: {{ $alert.Labels.severity }} 告警详情: {{ $alert.Annotations.description}};{{$alert.Annotations.summary}} 故障时间: {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }} {{- if gt (len $alert.Labels.instance) 0 }} 实例信息: {{ $alert.Labels.instance }} {{- end }} {{- if gt (len $alert.Labels.namespace) 0 }} 命名空间: {{ $alert.Labels.namespace }} {{- end }} {{- if gt (len $alert.Labels.node) 0 }} 节点信息: {{ $alert.Labels.node }} {{- end }} {{- if gt (len $alert.Labels.pod) 0 }} 实例名称: {{ $alert.Labels.pod }} {{- end }} ============END============ {{- end }} {{- end }} {{- end }} {{- if gt (len .Alerts.Resolved) 0 -}} {{- range $index, $alert := .Alerts -}} {{- if eq $index 0 }} ==========异常恢复========== 告警类型: {{ $alert.Labels.alertname }} 告警级别: {{ $alert.Labels.severity }} 告警详情: {{ $alert.Annotations.description}};{{$alert.Annotations.summary}} 故障时间: {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }} 恢复时间: {{ ($alert.EndsAt.Add 28800e9).Format "2006-01-02 15:04:05" }} {{- if gt (len $alert.Labels.instance) 0 }} 实例信息: {{ $alert.Labels.instance }} {{- end }} {{- if gt (len $alert.Labels.namespace) 0 }} 命名空间: {{ $alert.Labels.namespace }} {{- end }} {{- if gt (len $alert.Labels.node) 0 }} 节点信息: {{ $alert.Labels.node }} {{- end }} {{- if gt (len $alert.Labels.pod) 0 }} 实例名称: {{ $alert.Labels.pod }} {{- end }} ============END============ {{- end }} {{- end }} {{- end }}
构建 Adapter
1 docker build -t wechatbot:v1 .
运行 Adapter
1 2 3 4 docker run -d --name wechatbot --restart=always \ -v /etc/localtime:/etc/localtime \ -v src/wechatbot.tmpl:/apps/wechatbot.tmpl \ -p 9080:9080 wechatbot:v1
rpm 安装 grafana
安装 mysql
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 # 下载mysql源安装包 wget http://dev.mysql.com/get/mysql57-community-release-el7-8.noarch.rpm # 安装mysql源 yum localinstall mysql57-community-release-el7-8.noarch.rpm -y # 检查mysql源是否安装成功 yum repolist enabled | grep "mysql.*-community.*" # 安装MySQL (5.7需绕过验证) yum install mysql-community-server -y --nogpgcheck # 3、启动MySQL服务 systemctl start mysqld # 查看MySQL的启动状态 systemctl status mysqld # 4、开机启动 systemctl enable mysqld systemctl daemon-reload # 5、修改root本地登录密码 # mysql安装完成之后,在/var/log /mysqld.log文件中给root生成了一个默认密码。通过下面的方式找到root默认密码,然后登录mysql进行修改: grep 'temporary password' /var/log/mysqld.log mysql -uroot -p # mysql5.7默认安装了密码安全检查插件(validate_password),默认密码检查策略要求密码必须包含:大小写字母、数字和特殊符号, # 并且长度不能少于8位。否则会提示ERROR 1819 (HY000): Your password does not satisfy the current policy requirements错误 # 如果不需要密码策略,添加my.cnf文件中添加如下配置禁用即可: # 配置默认编码为utf8 # 关闭客户端dns反解 echo -e "validate_password = off\ncharacter_set_server=utf8\ninit_connect='SET NAMES utf8'\nskip-name-resolve\n" >> /etc/my.cnf systemctl restart mysqld mysql -uroot -p # alter user 'root'@'localhost' identified by '123123'; grant all privileges on *.* to root@'%' identified by '123123' with grant option; flush privileges;
下载安装包
1 2 3 4 5 6 7 8 9 10 11 12 cd /apps # 地址 https://grafana.com/grafana/download wget -O /opt/tgzs/grafana-7.5.1-1.x86_64.rpm https://dl.grafana.com/oss/release/grafana-7.5.1-1.x86_64.rpm yum install grafana-7.5.1-1.x86_64.rpm # 下载grafana8 wget https://dl.grafana.com/oss/release/grafana-8.5.1-1.x86_64.rpm yum install -y grafana-8.5.1-1.x86_64.rpm # 下载grafana10 (建议安装10及以上的,UI更漂亮~) wget https://dl.grafana.com/oss/release/grafana-10.2.1-1.x86_64.rpm yum install -y grafana-10.2.1-1.x86_64.rpm
在 mysql 中创建数据库
1 CREATE DATABASE IF NOT EXISTS grafana DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
修改配置文件
1 2 3 4 5 6 7 vim /etc/grafana/grafana.ini type = mysql host = 127.0.0.1:3306 name = grafana user = root password = 123456
启动服务
1 2 3 4 5 6 systemctl start grafana-server systemctl enable grafana-server systemctl status grafana-server # 查看日志 tail -f /var/log/grafana/grafana.log