process-exporter主要用来做进程监控,比如某个服务的进程数、消耗了多少CPU、内存、IO资源等。
process-exporter [options] -config.path filename.yml
如果选择监控的进程并将其分组,可以提供命令行参数或使用 yaml 配置文件。推荐通过 -config.path 指定配置文件。
-config.path yaml 文件的常规格式是顶级 process_names 部分,其中包含名称匹配器列表:
process_names: - matcher1 - matcher2 ... - matcherN
deb/rpm 软件包附带的默认配置为:
process_names:
- name: "{{.Comm}}"
cmdline:
- '.+'
一个进程仅可能属于一个组:即使匹配多个,也只会归属于第一个匹配的 groupname 组。
其中的每一项 process_names 都提供了用于识别和命名过程的方法。可选 name 标签定义用于命名匹配过程的模板;如果未指定,则 name 默认为 {{.ExeBase}}。
可用的模板变量:
{{.Comm}} 包含原始可执行文件的基本名称,即 /proc/<pid>/stat
{{.ExeBase}} 包含可执行文件的基本名称
{{.ExeFull}} 包含可执行文件的标准路径
{{.Username}} 包含有效用户的用户名
{{.Matches}} 包含所有由于应用cmdline正则表达式而产生的匹配项
{{.PID}} 包含过程的PID。请注意,使用PID意味着该组将仅包含一个进程
{{.StartTime}} 包含过程的开始时间。与PID结合使用时,这很有用,因为PID会随着时间的推移而被重用
不建议使用 PID 或 StartTime:这并不会得到想要的结果,并且可能会导致 prometheus 遇到麻烦——metrics 基数过高。
process_exporter 配置参考:https://github.com/ncabatoff/process-exporter/
touch process.sh && chmod +x process.sh
#!/bin/bash
#用于安装process_exporter
#版本号
PROCESS_EXPORTER_VER=0.7.10
#安装位置
PROCESS_EXPORTER_DIR=/app/process_exporter
#安装包存放路径
INSTALL_PACKAGE=/app/install
#日志文件
LOG_FILE=/tmp/process_exporter.log
# PROCESS_EXPORTER的用户名
PROCESS_EXPORTER_USER=prometheus
# PROCESS_EXPORTER监听端口
PROCESS_EXPORTER_PORT=9256
check_err()
{
if [ $? -ne 0 ]; then
echo "$1"
exit 1
else
echo "$2"
fi
}
start() {
[ ! -d /app ] && mkdir -p /app
#检查是否存在wget命令,没有则安装
type wget &> /dev/null
if [ $? -ne 0 ]; then
yum install wget -y
check_err "\033[31myum安装依赖包 wget 失败,请检查\033[0m" "\033[36myum安装依赖包 wget 成功\033[0m"
fi
#检查是否存在netstat命令,没有则安装
type netstat &> /dev/null
if [ $? -ne 0 ]; then
yum install net-tools -y
check_err "\033[31myum安装依赖包 net-tools 失败,请检查\033[0m" "\033[36myum安装依赖包 net-tools 成功\033[0m"
fi
netstat -tln | grep ":$PROCESS_EXPORTER_PORT"
CHECK_RESULT=$?
if [ $CHECK_RESULT -eq 0 ];then
echo "PROCESS_EXPORTER 已启动,请勿重复启动应用!"
exit 1
fi
echo "正在启动 PROCESS_EXPORTER....."
[ ! -d $INSTALL_PACKAGE ] && mkdir -p $INSTALL_PACKAGE
[ ! -f $INSTALL_PACKAGE/process-exporter-${PROCESS_EXPORTER_VER}.linux-amd64.tar.gz ] && wget -O $INSTALL_PACKAGE/process-exporter-${PROCESS_EXPORTER_VER}.linux-amd64.tar.gz https://github.com/ncabatoff/process-exporter/releases/download/v${PROCESS_EXPORTER_VER}/process-exporter-${PROCESS_EXPORTER_VER}.linux-amd64.tar.gz
[ ! -d $PROCESS_EXPORTER_DIR ] && mkdir -p $PROCESS_EXPORTER_DIR
count=`ls $PROCESS_EXPORTER_DIR | wc -l`
if [ $count -gt 0 ]; then
echo "目录 $PROCESS_EXPORTER_DIR 不为空,不再重复解压压缩包"
else
tar -xvzf $INSTALL_PACKAGE/process-exporter-${PROCESS_EXPORTER_VER}.linux-amd64.tar.gz --strip-components 1 -C $PROCESS_EXPORTER_DIR
fi
cat /etc/group | grep $PROCESS_EXPORTER_USER
if [ $? -ne 0 ]; then
groupadd -g 10002 $PROCESS_EXPORTER_USER
fi
cat /etc/passwd | grep $PROCESS_EXPORTER_USER
if [ $? -ne 0 ]; then
useradd -u 10002 -g $PROCESS_EXPORTER_USER -d /home/$PROCESS_EXPORTER_USER -m $PROCESS_EXPORTER_USER
fi
[ ! -f $PROCESS_EXPORTER_DIR/process-exporter.yaml ] && touch $PROCESS_EXPORTER_DIR/process-exporter.yaml
cat > $PROCESS_EXPORTER_DIR/process-exporter.yaml << EOF
process_names:
- name: "{{.Matches}}"
cmdline:
- 'process-exporter'
EOF
chown -R $PROCESS_EXPORTER_USER:$PROCESS_EXPORTER_USER $PROCESS_EXPORTER_DIR
[ ! -f $LOG_FILE ] && touch $LOG_FILE
chown -R $PROCESS_EXPORTER_USER:$PROCESS_EXPORTER_USER $LOG_FILE
# 切换用户
su - $PROCESS_EXPORTER_USER << EOF
# 不加反斜杠,取的值是切换用户前的
nohup $PROCESS_EXPORTER_DIR/process-exporter --config.path=$PROCESS_EXPORTER_DIR/process-exporter.yaml >> $LOG_FILE 2>&1 &
#每2s检查一次监听端口是否启动,总共检查60次共2分钟,超时未启动则退出
CHECK_RESULT=1
CHECK_COUNT=60
while [ \$CHECK_RESULT -ne 0 ];do
if [ \$CHECK_COUNT -gt 0 ];then
echo "等待监听端口启动..."
sleep 2
netstat -tln | grep ":$PROCESS_EXPORTER_PORT"
CHECK_RESULT=\$?
CHECK_COUNT=\$((\$CHECK_COUNT-1))
else
echo "ERROR: 启动$PROCESS_EXPORTER_PORT端口超时,请检查!"
exit 1
fi
done
echo "PROCESS_EXPORTER 启动成功!"
EOF
#检查防火墙是否已启动,如果启动则开放端口
systemctl status firewalld > /dev/null
if [ $? -eq 0 ];then
firewall-cmd --add-port=$PROCESS_EXPORTER_PORT/tcp --permanent
firewall-cmd --reload
fi
}
stop(){
netstat -tln | grep ":$PROCESS_EXPORTER_PORT"
check_err "PROCESS_EXPORTER 未启动!" "PROCESS_EXPORTER 正在运行,开始停止应用....."
su - $PROCESS_EXPORTER_USER << EOF
PROCESS=`ps aux | grep $PROCESS_EXPORTER_DIR/process-exporter | grep -v "grep" | awk '{print $2}'`
for i in \${PROCESS}
do
echo "Kill the PROCESS_EXPORTER process [ \$i ]"
kill -9 \$i
done
EOF
CHECK_RESULT=0
CHECK_COUNT=30
while [ $CHECK_RESULT -eq 0 ];do
if [ $CHECK_COUNT -gt 0 ];then
echo "等待监听端口关闭..."
sleep 2
netstat -tln | grep ":$PROCESS_EXPORTER_PORT"
CHECK_RESULT=$?
CHECK_COUNT=$(($CHECK_COUNT-1))
else
echo "ERROR: 关闭$PROCESS_EXPORTER_PORT端口超时,请检查!"
exit 1
fi
done
echo "关闭 PROCESS_EXPORTER 成功!"
}
restart(){
stop
start
}
status(){
su $PROCESS_EXPORTER_USER -c "ps aux | grep $PROCESS_EXPORTER_DIR/process-exporter | grep -v \"grep\" | grep -v \"su $PROCESS_EXPORTER_USER\""
}
remove(){
rm -rf $LOG_FILE
rm -rf $PROCESS_EXPORTER_DIR
#userdel -r $PROCESS_EXPORTER_USER
}
usage ()
{
echo " "
echo "Please input start to start PROCESS_EXPORTER."
echo "Please input stop to stop PROCESS_EXPORTER."
echo "Please input status to get PROCESS_EXPORTER status."
echo "Please input restart to restart PROCESS_EXPORTER."
echo "Please input remove to remove PROCESS_EXPORTER."
echo " "
}
INPUT_ACTIVE=$1
ACTIVE=${INPUT_ACTIVE:=start}
case ${ACTIVE} in
start)
start
;;
stop)
stop
;;
status)
status
;;
restart)
restart
;;
remove)
remove
;;
*)
usage
;;
esac
vim /app/process-exporter/process-exporter.yaml
process_names:
- name: "{{.Matches}}"
cmdline:
- 'docker'
systemctl restart process_exporter

vim /app/prometheus/prometheus.yml
- job_name: 'wxhntmy-process' #进程监控 static_configs: - targets: ['localhost:9256']

vim /app/prometheus/prometheus.yml
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: - "/app/prometheus/*_rules.yaml" # - "first_rules.yml" # - "second_rules.yml"

vim /app/prometheus/process_exporter_rules.yaml

groups:
- name: process
rules:
- alert: ProcessDockerDown
expr: (namedprocess_namegroup_num_procs{groupname="map[:docker]"}) == 0
for: 1m
labels:
severity: warning
annotations:
summary: "{{ $labels.instance }}: Process Docker Down"
description: "{{ $labels.instance }}: Process Docker has been down for more than 1m"
value: "{{ $value }}"

重启Prometheus
systemctl restart prometheus



:::info
Import ID 是:249
:::
