process-exporter主要用来做进程监控,比如某个服务的进程数、消耗了多少CPU、内存、IO资源等。
process-exporter [options] -config.path filename.yml
如果选择监控的进程并将其分组,可以提供命令行参数或使用 yaml 配置文件。推荐通过 -config.path 指定配置文件。
-config.path yaml 文件的常规格式是顶级 process_names 部分,其中包含名称匹配器列表:
process_names: - matcher1 - matcher2 ... - matcherN
deb/rpm 软件包附带的默认配置为:
process_names: - name: "{{.Comm}}" cmdline: - '.+'
一个进程仅可能属于一个组:即使匹配多个,也只会归属于第一个匹配的 groupname 组。
其中的每一项 process_names 都提供了用于识别和命名过程的方法。可选 name 标签定义用于命名匹配过程的模板;如果未指定,则 name 默认为 {{.ExeBase}}。
可用的模板变量:
{{.Comm}} 包含原始可执行文件的基本名称,即 /proc/<pid>/stat {{.ExeBase}} 包含可执行文件的基本名称 {{.ExeFull}} 包含可执行文件的标准路径 {{.Username}} 包含有效用户的用户名 {{.Matches}} 包含所有由于应用cmdline正则表达式而产生的匹配项 {{.PID}} 包含过程的PID。请注意,使用PID意味着该组将仅包含一个进程 {{.StartTime}} 包含过程的开始时间。与PID结合使用时,这很有用,因为PID会随着时间的推移而被重用
不建议使用 PID 或 StartTime:这并不会得到想要的结果,并且可能会导致 prometheus 遇到麻烦——metrics 基数过高。
process_exporter 配置参考:https://github.com/ncabatoff/process-exporter/
touch process.sh && chmod +x process.sh
#!/bin/bash #用于安装process_exporter #版本号 PROCESS_EXPORTER_VER=0.7.10 #安装位置 PROCESS_EXPORTER_DIR=/app/process_exporter #安装包存放路径 INSTALL_PACKAGE=/app/install #日志文件 LOG_FILE=/tmp/process_exporter.log # PROCESS_EXPORTER的用户名 PROCESS_EXPORTER_USER=prometheus # PROCESS_EXPORTER监听端口 PROCESS_EXPORTER_PORT=9256 check_err() { if [ $? -ne 0 ]; then echo "$1" exit 1 else echo "$2" fi } start() { [ ! -d /app ] && mkdir -p /app #检查是否存在wget命令,没有则安装 type wget &> /dev/null if [ $? -ne 0 ]; then yum install wget -y check_err "\033[31myum安装依赖包 wget 失败,请检查\033[0m" "\033[36myum安装依赖包 wget 成功\033[0m" fi #检查是否存在netstat命令,没有则安装 type netstat &> /dev/null if [ $? -ne 0 ]; then yum install net-tools -y check_err "\033[31myum安装依赖包 net-tools 失败,请检查\033[0m" "\033[36myum安装依赖包 net-tools 成功\033[0m" fi netstat -tln | grep ":$PROCESS_EXPORTER_PORT" CHECK_RESULT=$? if [ $CHECK_RESULT -eq 0 ];then echo "PROCESS_EXPORTER 已启动,请勿重复启动应用!" exit 1 fi echo "正在启动 PROCESS_EXPORTER....." [ ! -d $INSTALL_PACKAGE ] && mkdir -p $INSTALL_PACKAGE [ ! -f $INSTALL_PACKAGE/process-exporter-${PROCESS_EXPORTER_VER}.linux-amd64.tar.gz ] && wget -O $INSTALL_PACKAGE/process-exporter-${PROCESS_EXPORTER_VER}.linux-amd64.tar.gz https://github.com/ncabatoff/process-exporter/releases/download/v${PROCESS_EXPORTER_VER}/process-exporter-${PROCESS_EXPORTER_VER}.linux-amd64.tar.gz [ ! -d $PROCESS_EXPORTER_DIR ] && mkdir -p $PROCESS_EXPORTER_DIR count=`ls $PROCESS_EXPORTER_DIR | wc -l` if [ $count -gt 0 ]; then echo "目录 $PROCESS_EXPORTER_DIR 不为空,不再重复解压压缩包" else tar -xvzf $INSTALL_PACKAGE/process-exporter-${PROCESS_EXPORTER_VER}.linux-amd64.tar.gz --strip-components 1 -C $PROCESS_EXPORTER_DIR fi cat /etc/group | grep $PROCESS_EXPORTER_USER if [ $? -ne 0 ]; then groupadd -g 10002 $PROCESS_EXPORTER_USER fi cat /etc/passwd | grep $PROCESS_EXPORTER_USER if [ $? -ne 0 ]; then useradd -u 10002 -g $PROCESS_EXPORTER_USER -d /home/$PROCESS_EXPORTER_USER -m $PROCESS_EXPORTER_USER fi [ ! -f $PROCESS_EXPORTER_DIR/process-exporter.yaml ] && touch $PROCESS_EXPORTER_DIR/process-exporter.yaml cat > $PROCESS_EXPORTER_DIR/process-exporter.yaml << EOF process_names: - name: "{{.Matches}}" cmdline: - 'process-exporter' EOF chown -R $PROCESS_EXPORTER_USER:$PROCESS_EXPORTER_USER $PROCESS_EXPORTER_DIR [ ! -f $LOG_FILE ] && touch $LOG_FILE chown -R $PROCESS_EXPORTER_USER:$PROCESS_EXPORTER_USER $LOG_FILE # 切换用户 su - $PROCESS_EXPORTER_USER << EOF # 不加反斜杠,取的值是切换用户前的 nohup $PROCESS_EXPORTER_DIR/process-exporter --config.path=$PROCESS_EXPORTER_DIR/process-exporter.yaml >> $LOG_FILE 2>&1 & #每2s检查一次监听端口是否启动,总共检查60次共2分钟,超时未启动则退出 CHECK_RESULT=1 CHECK_COUNT=60 while [ \$CHECK_RESULT -ne 0 ];do if [ \$CHECK_COUNT -gt 0 ];then echo "等待监听端口启动..." sleep 2 netstat -tln | grep ":$PROCESS_EXPORTER_PORT" CHECK_RESULT=\$? CHECK_COUNT=\$((\$CHECK_COUNT-1)) else echo "ERROR: 启动$PROCESS_EXPORTER_PORT端口超时,请检查!" exit 1 fi done echo "PROCESS_EXPORTER 启动成功!" EOF #检查防火墙是否已启动,如果启动则开放端口 systemctl status firewalld > /dev/null if [ $? -eq 0 ];then firewall-cmd --add-port=$PROCESS_EXPORTER_PORT/tcp --permanent firewall-cmd --reload fi } stop(){ netstat -tln | grep ":$PROCESS_EXPORTER_PORT" check_err "PROCESS_EXPORTER 未启动!" "PROCESS_EXPORTER 正在运行,开始停止应用....." su - $PROCESS_EXPORTER_USER << EOF PROCESS=`ps aux | grep $PROCESS_EXPORTER_DIR/process-exporter | grep -v "grep" | awk '{print $2}'` for i in \${PROCESS} do echo "Kill the PROCESS_EXPORTER process [ \$i ]" kill -9 \$i done EOF CHECK_RESULT=0 CHECK_COUNT=30 while [ $CHECK_RESULT -eq 0 ];do if [ $CHECK_COUNT -gt 0 ];then echo "等待监听端口关闭..." sleep 2 netstat -tln | grep ":$PROCESS_EXPORTER_PORT" CHECK_RESULT=$? CHECK_COUNT=$(($CHECK_COUNT-1)) else echo "ERROR: 关闭$PROCESS_EXPORTER_PORT端口超时,请检查!" exit 1 fi done echo "关闭 PROCESS_EXPORTER 成功!" } restart(){ stop start } status(){ su $PROCESS_EXPORTER_USER -c "ps aux | grep $PROCESS_EXPORTER_DIR/process-exporter | grep -v \"grep\" | grep -v \"su $PROCESS_EXPORTER_USER\"" } remove(){ rm -rf $LOG_FILE rm -rf $PROCESS_EXPORTER_DIR #userdel -r $PROCESS_EXPORTER_USER } usage () { echo " " echo "Please input start to start PROCESS_EXPORTER." echo "Please input stop to stop PROCESS_EXPORTER." echo "Please input status to get PROCESS_EXPORTER status." echo "Please input restart to restart PROCESS_EXPORTER." echo "Please input remove to remove PROCESS_EXPORTER." echo " " } INPUT_ACTIVE=$1 ACTIVE=${INPUT_ACTIVE:=start} case ${ACTIVE} in start) start ;; stop) stop ;; status) status ;; restart) restart ;; remove) remove ;; *) usage ;; esac
vim /app/process-exporter/process-exporter.yaml
process_names: - name: "{{.Matches}}" cmdline: - 'docker'
systemctl restart process_exporter
vim /app/prometheus/prometheus.yml
- job_name: 'wxhntmy-process' #进程监控 static_configs: - targets: ['localhost:9256']
vim /app/prometheus/prometheus.yml
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: - "/app/prometheus/*_rules.yaml" # - "first_rules.yml" # - "second_rules.yml"
vim /app/prometheus/process_exporter_rules.yaml
groups: - name: process rules: - alert: ProcessDockerDown expr: (namedprocess_namegroup_num_procs{groupname="map[:docker]"}) == 0 for: 1m labels: severity: warning annotations: summary: "{{ $labels.instance }}: Process Docker Down" description: "{{ $labels.instance }}: Process Docker has been down for more than 1m" value: "{{ $value }}"
重启Prometheus
systemctl restart prometheus
:::info
Import ID 是:249
:::