#!/bin/bash # 请求一次 sudo 权限并保持权限 sudo -v while true; do sudo -n true; sleep 60; kill -0 "$$" || exit; done 2>/dev/null & # 配置参数 MAX_RETRIES=5 RETRY_INTERVAL=5 MONITOR_INTERVAL=60 # 生成包含日期的日志文件名 DATE=$(date +"%Y-%m-%d") LOG_FILE="/ztms/the14thplan/Log/Run/service_monitor_${DATE}.log" # 日志函数,添加时间戳和日志级别 log() { local level=$1 local message=$2 local timestamp=$(date +"%Y-%m-%d %H:%M:%S") echo "$timestamp [$level] $message" | tee -a $LOG_FILE } # 带重试机制和超时控制的服务启动函数 start_service() { local service=$1 local command=$2 local retries=0 local start_time=$(date +%s) while [ $retries -lt $MAX_RETRIES ]; do log "INFO" "尝试启动 $service 服务..." eval $command if [ $? -eq 0 ]; then log "INFO" "$service 服务已成功启动" return 0 fi retries=$((retries + 1)) log "WARN" "$service 服务启动失败,正在重新尝试(第 $retries 次)..." sleep $RETRY_INTERVAL local current_time=$(date +%s) if [ $((current_time - start_time)) -gt $((MAX_RETRIES * RETRY_INTERVAL)) ]; then log "ERROR" "$service 服务启动超时,达到最大重试次数。" return 1 fi done log "ERROR" "$service 服务启动失败,达到最大重试次数。" return 1 } # 1. 重启数据库服务 restart_database_service() { log "INFO" "重启数据库服务..." start_service "MySQL" "sudo systemctl restart mysql" return $? } # 2. 查看 CAN 与网口的硬件驱动是否被加载 check_network_and_can_drivers() { log "INFO" "检查 CAN 与网口的硬件驱动是否已加载..." # 检查网口 local retries=0 while [ $retries -lt $MAX_RETRIES ]; do if ip link show eth1 | grep -q 'UP'; then log "INFO" "网口已正常工作" break fi retries=$((retries + 1)) log "WARN" "网口未正常工作,等待加载(第 $retries 次)..." sleep $RETRY_INTERVAL done if [ $retries -eq $MAX_RETRIES ]; then log "ERROR" "网口驱动加载失败,达到最大重试次数。" return 1 fi # 检查 CAN 接口 (例如 can0) retries=0 while [ $retries -lt $MAX_RETRIES ]; do if ip link show can0 | grep -q 'can0'; then log "INFO" "CAN 接口已加载" break fi retries=$((retries + 1)) log "WARN" "CAN 接口未加载,等待加载(第 $retries 次)..." sleep $RETRY_INTERVAL done if [ $retries -eq $MAX_RETRIES ]; then log "ERROR" "CAN 接口驱动加载失败,达到最大重试次数。" return 1 fi return 0 } # 3. 死循环监测任务,1 分钟循环一次 monitor_services() { while true; do # 3.1 查询数据库服务是否被关闭,关闭则打开 if ! systemctl is-active --quiet mysql; then log "INFO" "数据库服务已停止,正在启动..." start_service "MySQL" "sudo systemctl start mysql" fi # 3.2 查询数采系统是否在后台运行 if ! pgrep -f "/ztms/the14thplan/DataServicePlatform/exe/DataAcquistionSystem" > /dev/null; then log "INFO" "数采系统未运行,正在启动..." start_service "数采系统" "sudo /ztms/the14thplan/DataServicePlatform/exe/DataAcquistionSystem &" fi # 3.3 查询网站服务是否在后台运行 # 检查 nginx 是否在运行 if ! pgrep -f "nginx" > /dev/null; then log "INFO" "nginx 未运行,正在启动..." start_service "Nginx" "sudo systemctl start nginx" fi # 检查后端 jar 服务是否在运行 if ! pgrep -f "java -jar /ztms/the14thplan/Website/145.jar" > /dev/null; then log "INFO" "后端 jar 未运行,正在启动..." start_service "后端 jar 服务" "sudo java -jar /ztms/the14thplan/Website/145.jar &" fi # 每分钟检查一次 sleep $MONITOR_INTERVAL done } # 主程序执行部分 log "INFO" "开始执行脚本..." check_network_and_can_drivers if [ $? -ne 0 ]; then log "ERROR" "硬件驱动检查失败,脚本终止。" exit 1 fi restart_database_service if [ $? -ne 0 ]; then log "ERROR" "数据库服务重启失败,脚本终止。" exit 1 fi monitor_services