一、下载文件
wget https://github.com/prometheus/prometheus/releases/download/v2.37.0/prometheus-2.37.0.linux-amd64.tar.gz
wget https://releases.hashicorp.com/consul/1.12.3/consul_1.12.3_linux_amd64.zip
wget https://github.com/prometheus/consul_exporter/releases/download/v0.8.0/consul_exporter-0.8.0.linux-amd64.tar.gz
wget https://github.com/prometheus/node_exporter/releases/download/v1.3.1/node_exporter-1.3.1.linux-amd64.tar.gz
wget https://github.com/prometheus/alertmanager/releases/download/v0.24.0/alertmanager-0.24.0.linux-amd64.tar.gz
wget https://dl.grafana.com/enterprise/release/grafana-enterprise-9.0.5.linux-amd64.tar.gz
二、解压文件
yum install -y vim unzip
mkdir -p /app
tar -zxvf prometheus-2.37.0.linux-amd64.tar.gz
mv -f prometheus-2.37.0.linux-amd64 /app/prometheus
mkdir -p /app/consul/conf.d
unzip consul_1.12.3_linux_amd64.zip
mv consul /app/consul/
tar -zxvf consul_exporter-0.8.0.linux-amd64.tar.gz
mv -f consul_exporter-0.8.0.linux-amd64 /app/consul_exporter
tar -zxvf node_exporter-1.3.1.linux-amd64.tar.gz
mv -f node_exporter-1.3.1.linux-amd64 /app/node_exporter
tar -zxvf alertmanager-0.24.0.linux-amd64.tar.gz
mv -f alertmanager-0.24.0.linux-amd64 /app/alertmanager
tar -zxvf grafana-enterprise-9.0.5.linux-amd64.tar.gz
mv -f grafana-9.0.5 /app/grafana-9.0.5
三、修改相关配置
1、alertmanager 告警规则
mkdir -p /app/alertmanager/rule_files
touch /app/alertmanager/rule_files/alertmanager.rules
touch /app/alertmanager/rule_files/consul_exporter.rules
touch /app/alertmanager/rule_files/node_exporter.rules
touch /app/alertmanager/rule_files/prometheus.rules
cat >> /app/alertmanager/rule_files/alertmanager.rules << EOF
groups:
- name: alertmanager 实例存活规则
rules:
- alert: alertmanager 实例存活告警 # 告警规则的名称(alertname)
# 这里的job就是alertmanager.yml里的job名称
expr: up{job="alertmanager"} == 0
for: 15s
labels:
severity: Disaster
team: node
annotations:
summary: "alertmanager 服务节点失联告警"
description: "alertmanager 服务节点 {{ $labels.instance }} 断联已超过 30 秒!"
EOF
cat >> /app/alertmanager/rule_files/consul_exporter.rules << EOF
groups:
- name: consul_exporter 实例存活规则
rules:
- alert: consul_exporter 实例存活告警 # 告警规则的名称(alertname)
# 这里的job就是prometheus.yml里的job名称
expr: up{job="consul_exporter"} == 0
for: 15s
labels:
severity: Disaster
team: node
annotations:
summary: "consul_exporter 服务节点失联告警"
description: "consul_exporter 服务节点 {{ $labels.instance }} 断联已超过 30 秒!"
EOF
cat >> /app/alertmanager/rule_files/node_exporter.rules << EOF
groups:
- name: node_exporter 实例存活规则
rules:
- alert: node_exporter 实例存活告警 # 告警规则的名称(alertname)
# 这里的job就是node_exporter.yml里的job名称
expr: up{job="node_exporter"} == 0
for: 15s
labels:
severity: Disaster
team: node
annotations:
summary: "node_exporter 服务节点失联告警"
description: "node_exporter 服务节点 {{ $labels.instance }} 断联已超过 30 秒!"
EOF
cat >> /app/alertmanager/rule_files/prometheus.rules << EOF
groups:
- name: prometheus 实例存活规则
rules:
- alert: prometheus 实例存活告警 # 告警规则的名称(alertname)
# 这里的job就是prometheus.yml里的job名称
expr: up{job="prometheus"} == 0
for: 15s
labels:
severity: Disaster
team: node
annotations:
summary: "prometheus 服务节点失联告警"
description: "prometheus 服务节点 {{ $labels.instance }} 断联已超过 30 秒!"
EOF
2、Prometheus 配置
vim /app/prometheus/prometheus.yml
# my global config
global:
scrape_interval: 15s # 将抓取间隔设置为每 15 秒。 默认为每 1 分钟。
evaluation_interval: 15s # 每 15 秒评估一次规则。 默认值为每 1 分钟。
# scrape_timeout 设置为全局默认值(10 秒)。
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# 如果有alertmanager,则需要取消下面这一行注释,指定ip和端口
- localhost:9093
# 加载规则一次并根据全局“evaluation_interval”定期评估它们。
rule_files:
- "/app/alertmanager/rule_files/*.rules"
- "/app/alertmanager/rule_files/*.groups"
- "/app/alertmanager/rule_files/*.yml"
# - "first_rules.yml"
# - "second_rules.yml"
# 一个抓取配置,只包含一个要抓取的端点:
# 这里是prometheus本身。
scrape_configs:
# job名称作为标签 `job=<job_name>` 添加到从此配置中抓取的任何时间序列中。
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
#可以添加新的job,需要设置job名称和对应的IP、端口
- job_name: "consul_exporter"
static_configs:
- targets: ["localhost:9107"]
#可以添加新的job,需要设置job名称和对应的IP、端口
- job_name: "node_exporter"
static_configs:
- targets: ["localhost:9100"]
- job_name: "alertmanager"
static_configs:
- targets: ["localhost:9093"]
3、consul 配置
mkdir -p /app/consul/conf.d
vim /app/consul/conf.d/services.json
{
"datacenter": "dc1",
"log_level": "INFO",
"node_name": "consul-dev",
"ui": true,
"services": [
{
"ID": "alertmanager",
"Name": "alertmanager",
"Tags": [
"primary",
"v1"
],
"address": "localhost",
"port": 9093,
"check": {
"http": "http://localhost:9093/metrics",
"interval": "10s"
}
},
{
"ID": "node_exporter",
"Name": "node_exporter",
"Tags": [
"primary",
"v1"
],
"address": "localhost",
"port": 9100,
"check": {
"http": "http://localhost:9100/metrics",
"interval": "10s"
}
},
{
"ID": "consul_exporter",
"Name": "consul_exporter",
"Tags": [
"primary",
"v1"
],
"address": "localhost",
"port": 9107,
"check": {
"http": "http://localhost:9107/metrics",
"interval": "10s"
}
},
{
"ID": "prometheus",
"Name": "prometheus",
"Tags": [
"primary",
"v1"
],
"address": "localhost",
"port": 9090,
"check": {
"http": "http://localhost:9090/metrics",
"interval": "10s"
}
}
]
}
四、创建启动、停止脚本
1、启动脚本
vim /app/start.sh
chmod 777 /app/start.sh
#!/bin/sh
hostString=""
if [ $HOST ];then
hostString=$(echo $HOST)
if [ "$hostString" = "localhost" ]; then
echo -e "HOST Env value is localhost!"
else
echo -e "HOST Env value is not localhost!"
sed -i 's/localhost/'$hostString'/g' /app/prometheus/prometheus.yml
sed -i 's/localhost/'$hostString'/g' /app/consul/conf.d/services.json
fi
else
export HOST=localhost
echo -e "HOST Env not found, use default value localhost!"
hostString="localhost"
fi
echo -e "HOST Env value = $hostString\n"
nohup /app/prometheus/prometheus --log.level=info --config.file="/app/prometheus/prometheus.yml" >/app/prometheus/prometheus.log 2>&1 &
nohup /app/consul/consul agent -dev -client=0.0.0.0 -config-dir /app/consul/conf.d >/app/consul/consul.log 2>&1 &
nohup /app/consul_exporter/consul_exporter --log.level=info >/app/consul_exporter/consul_exporter.log 2>&1 &
nohup /app/node_exporter/node_exporter --log.level=info >/app/node_exporter/node_exporter.log 2>&1 &
nohup /app/alertmanager/alertmanager --config.file="/app/alertmanager/alertmanager.yml" --log.level=info >/app/alertmanager/alertmanager.log 2>&1 &
nohup /app/grafana-9.0.5/bin/grafana-server --config "/app/grafana-9.0.5/conf/defaults.ini" --homepath=/app/grafana-9.0.5 >/app/grafana-9.0.5/grafana.log 2>&1 &
while true
do
procedure_name=("prometheus consul consul_exporter node_exporter alertmanager grafana-server")
for name in $procedure_name
do
ProcNumber=$(ps -ef |grep -w $name|wc -l)
if [ ${ProcNumber} -gt 1 ];then
echo -e "`date \"+%Y-%m-%d %H:%M:%S\"` \e[32m The $name is started! \e[0m"
else
echo -e "`date \"+%Y-%m-%d %H:%M:%S\"` \e[31m The $name is stop! \e[0m"
fi
done
echo -e "`date \"+%Y-%m-%d %H:%M:%S\"` \e[34m Sleep 3 seconds.... \e[0m \n"
sleep 3s
done
2、停止脚本
vim /app/stop.sh
chmod 777 /app/stop.sh
#!/bin/bash
#根据进程名杀死进程
procedure_name=("prometheus consul consul_exporter node_exporter alertmanager grafana-server")
for name in $procedure_name
do
PROCESS=`ps -ef|grep $name|grep -v grep|grep -v PPID|awk '{print $2}'`
for i in $PROCESS
do
echo -e "`date \"+%Y-%m-%d %H:%M:%S\"` \e[32m Kill the $name process [ $i ] \e[0m"
kill -9 $i
done
done
五、构建镜像
1、Dockerfile
vim pro
# 设置docker的运行环境
FROM centos:7
# 编写人
MAINTAINER wxhntmy
RUN yum -y install vim net-tools
RUN mkdir -p /app
ADD prometheus /app/prometheus
RUN mkdir -p /app/consul
RUN mkdir -p /app/consul/conf.d
ADD server.json /app/consul/conf.d/
ADD consul /app/consul/
ADD consul_exporter /app/consul_exporter
ADD node_exporter /app/node_exporter
ADD alertmanager /app/alertmanager
ADD grafana-9.0.5 /app/grafana-9.0.5
RUN touch /app/prometheus/prometheus.log
RUN touch /app/consul/consul.log
RUN touch /app/consul_exporter/consul_exporter.log
RUN touch /app/node_exporter/node_exporter.log
RUN touch /app/alertmanager/alertmanager.log
RUN touch /app/grafana-9.0.5/grafana.log
ADD start.sh /app/
ADD stop.sh /app/
RUN chmod 777 /app/start.sh
RUN chmod 777 /app/stop.sh
WORKDIR /app
ENV HOST localhost
EXPOSE 9090
EXPOSE 8500
EXPOSE 9107
EXPOSE 9100
EXPOSE 9093
EXPOSE 3000
CMD ["/bin/bash", "-c", "/app/start.sh"]
2、构建镜像
docker build -f pro -t prometheus-grafana-all:1.0.0 .
六、运行容器
docker run -itd --name pro -e HOST=192.168.102.129 \
-p 9090:9090 -p 8500:8500 -p 9100:9100 -p 9107:9107 -p 9093:9093 -p 3000:3000 \
prometheus-grafana-all:1.0.0
docker ps -a
docker logs -f pro
docker exec -it pro /bin/bash