#!/bin/bash # IoT Platform 健康检查脚本 # 用法: ./health-check.sh [host] [port] [max_wait_seconds] # 返回: 0=健康, 1=不健康 # # 检查策略: # 1. 优先检查 /actuator/health (Spring Boot Actuator) # 2. 如果 Actuator 返回 404,降级为检查 TCP 端口是否监听 # 3. 如果连端口都没监听,则判定为不健康 HOST=${1:-localhost} PORT=${2:-8887} MAX_WAIT=${3:-30} HEALTH_URL="http://${HOST}:${PORT}/actuator/health" INTERVAL=2 ATTEMPTS=$((MAX_WAIT / INTERVAL)) echo "[health-check] 开始检查: ${HOST}:${PORT}, 最多等待 ${MAX_WAIT} 秒" for i in $(seq 1 $ATTEMPTS); do # 策略1: Actuator 健康端点 RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 2 --max-time 3 "${HEALTH_URL}" 2>/dev/null) if [ "$RESPONSE" = "200" ]; then BODY=$(curl -s --connect-timeout 2 --max-time 3 "${HEALTH_URL}" 2>/dev/null) if echo "$BODY" | grep -q '"status":"UP"'; then echo "[health-check] Actuator 健康检查通过 (${i}/${ATTEMPTS})" exit 0 fi fi # 策略2: 如果 Actuator 404,降级为端口监听检查 if [ "$RESPONSE" = "404" ]; then if nc -z "${HOST}" "${PORT}" 2>/dev/null || \ ss -tlnp 2>/dev/null | grep -q ":${PORT} " || \ netstat -tlnp 2>/dev/null | grep -q ":${PORT} "; then echo "[health-check] 端口监听检查通过 (${i}/${ATTEMPTS}) [Actuator 未启用,使用端口降级检查]" exit 0 fi fi echo "[health-check] 第 ${i}/${ATTEMPTS} 次检查未通过 (HTTP ${RESPONSE}), ${INTERVAL} 秒后重试..." sleep $INTERVAL done echo "[health-check] 健康检查失败: 服务未在 ${MAX_WAIT} 秒内就绪" exit 1