139 lines
4.4 KiB
Bash
139 lines
4.4 KiB
Bash
|
|
#!/bin/bash
|
|||
|
|
|
|||
|
|
# 请求一次 sudo 权限并保持权限
|
|||
|
|
sudo -v
|
|||
|
|
while true; do sudo -n true; sleep 60; kill -0 "$$" || exit; done 2>/dev/null &
|
|||
|
|
|
|||
|
|
# 配置参数
|
|||
|
|
MAX_RETRIES=5
|
|||
|
|
RETRY_INTERVAL=5
|
|||
|
|
MONITOR_INTERVAL=60
|
|||
|
|
# 生成包含日期的日志文件名
|
|||
|
|
DATE=$(date +"%Y-%m-%d")
|
|||
|
|
LOG_FILE="/ztms/the14thplan/Log/Run/service_monitor_${DATE}.log"
|
|||
|
|
|
|||
|
|
# 日志函数,添加时间戳和日志级别
|
|||
|
|
log() {
|
|||
|
|
local level=$1
|
|||
|
|
local message=$2
|
|||
|
|
local timestamp=$(date +"%Y-%m-%d %H:%M:%S")
|
|||
|
|
echo "$timestamp [$level] $message" | tee -a $LOG_FILE
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 带重试机制和超时控制的服务启动函数
|
|||
|
|
start_service() {
|
|||
|
|
local service=$1
|
|||
|
|
local command=$2
|
|||
|
|
local retries=0
|
|||
|
|
local start_time=$(date +%s)
|
|||
|
|
while [ $retries -lt $MAX_RETRIES ]; do
|
|||
|
|
log "INFO" "尝试启动 $service 服务..."
|
|||
|
|
eval $command
|
|||
|
|
if [ $? -eq 0 ]; then
|
|||
|
|
log "INFO" "$service 服务已成功启动"
|
|||
|
|
return 0
|
|||
|
|
fi
|
|||
|
|
retries=$((retries + 1))
|
|||
|
|
log "WARN" "$service 服务启动失败,正在重新尝试(第 $retries 次)..."
|
|||
|
|
sleep $RETRY_INTERVAL
|
|||
|
|
local current_time=$(date +%s)
|
|||
|
|
if [ $((current_time - start_time)) -gt $((MAX_RETRIES * RETRY_INTERVAL)) ]; then
|
|||
|
|
log "ERROR" "$service 服务启动超时,达到最大重试次数。"
|
|||
|
|
return 1
|
|||
|
|
fi
|
|||
|
|
done
|
|||
|
|
log "ERROR" "$service 服务启动失败,达到最大重试次数。"
|
|||
|
|
return 1
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 1. 重启数据库服务
|
|||
|
|
restart_database_service() {
|
|||
|
|
log "INFO" "重启数据库服务..."
|
|||
|
|
start_service "MySQL" "sudo systemctl restart mysql"
|
|||
|
|
return $?
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 2. 查看 CAN 与网口的硬件驱动是否被加载
|
|||
|
|
check_network_and_can_drivers() {
|
|||
|
|
log "INFO" "检查 CAN 与网口的硬件驱动是否已加载..."
|
|||
|
|
|
|||
|
|
# 检查网口
|
|||
|
|
local retries=0
|
|||
|
|
while [ $retries -lt $MAX_RETRIES ]; do
|
|||
|
|
if ip link show eth1 | grep -q 'UP'; then
|
|||
|
|
log "INFO" "网口已正常工作"
|
|||
|
|
break
|
|||
|
|
fi
|
|||
|
|
retries=$((retries + 1))
|
|||
|
|
log "WARN" "网口未正常工作,等待加载(第 $retries 次)..."
|
|||
|
|
sleep $RETRY_INTERVAL
|
|||
|
|
done
|
|||
|
|
if [ $retries -eq $MAX_RETRIES ]; then
|
|||
|
|
log "ERROR" "网口驱动加载失败,达到最大重试次数。"
|
|||
|
|
return 1
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# 检查 CAN 接口 (例如 can0)
|
|||
|
|
retries=0
|
|||
|
|
while [ $retries -lt $MAX_RETRIES ]; do
|
|||
|
|
if ip link show can0 | grep -q 'can0'; then
|
|||
|
|
log "INFO" "CAN 接口已加载"
|
|||
|
|
break
|
|||
|
|
fi
|
|||
|
|
retries=$((retries + 1))
|
|||
|
|
log "WARN" "CAN 接口未加载,等待加载(第 $retries 次)..."
|
|||
|
|
sleep $RETRY_INTERVAL
|
|||
|
|
done
|
|||
|
|
if [ $retries -eq $MAX_RETRIES ]; then
|
|||
|
|
log "ERROR" "CAN 接口驱动加载失败,达到最大重试次数。"
|
|||
|
|
return 1
|
|||
|
|
fi
|
|||
|
|
return 0
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 3. 死循环监测任务,1 分钟循环一次
|
|||
|
|
monitor_services() {
|
|||
|
|
while true; do
|
|||
|
|
# 3.1 查询数据库服务是否被关闭,关闭则打开
|
|||
|
|
if ! systemctl is-active --quiet mysql; then
|
|||
|
|
log "INFO" "数据库服务已停止,正在启动..."
|
|||
|
|
start_service "MySQL" "sudo systemctl start mysql"
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# 3.2 查询数采系统是否在后台运行
|
|||
|
|
if ! pgrep -f "/ztms/the14thplan/DataServicePlatform/exe/DataAcquistionSystem" > /dev/null; then
|
|||
|
|
log "INFO" "数采系统未运行,正在启动..."
|
|||
|
|
start_service "数采系统" "sudo /ztms/the14thplan/DataServicePlatform/exe/DataAcquistionSystem &"
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# 3.3 查询网站服务是否在后台运行
|
|||
|
|
# 检查 nginx 是否在运行
|
|||
|
|
if ! pgrep -f "nginx" > /dev/null; then
|
|||
|
|
log "INFO" "nginx 未运行,正在启动..."
|
|||
|
|
start_service "Nginx" "sudo systemctl start nginx"
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# 检查后端 jar 服务是否在运行
|
|||
|
|
if ! pgrep -f "java -jar /ztms/the14thplan/Website/145.jar" > /dev/null; then
|
|||
|
|
log "INFO" "后端 jar 未运行,正在启动..."
|
|||
|
|
start_service "后端 jar 服务" "sudo java -jar /ztms/the14thplan/Website/145.jar &"
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# 每分钟检查一次
|
|||
|
|
sleep $MONITOR_INTERVAL
|
|||
|
|
done
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 主程序执行部分
|
|||
|
|
log "INFO" "开始执行脚本..."
|
|||
|
|
check_network_and_can_drivers
|
|||
|
|
if [ $? -ne 0 ]; then
|
|||
|
|
log "ERROR" "硬件驱动检查失败,脚本终止。"
|
|||
|
|
exit 1
|
|||
|
|
fi
|
|||
|
|
restart_database_service
|
|||
|
|
if [ $? -ne 0 ]; then
|
|||
|
|
log "ERROR" "数据库服务重启失败,脚本终止。"
|
|||
|
|
exit 1
|
|||
|
|
fi
|
|||
|
|
monitor_services
|