| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748 |
- #!/bin/bash
- # IoT Platform 健康检查脚本
- # 用法: ./health-check.sh [host] [port] [max_wait_seconds]
- # 返回: 0=健康, 1=不健康
- #
- # 检查策略:
- # 1. 优先检查 /actuator/health (Spring Boot Actuator)
- # 2. 如果 Actuator 返回 404,降级为检查 TCP 端口是否监听
- # 3. 如果连端口都没监听,则判定为不健康
-
- HOST=${1:-localhost}
- PORT=${2:-8887}
- MAX_WAIT=${3:-30}
- HEALTH_URL="http://${HOST}:${PORT}/actuator/health"
-
- INTERVAL=2
- ATTEMPTS=$((MAX_WAIT / INTERVAL))
-
- echo "[health-check] 开始检查: ${HOST}:${PORT}, 最多等待 ${MAX_WAIT} 秒"
-
- for i in $(seq 1 $ATTEMPTS); do
- # 策略1: Actuator 健康端点
- RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 2 --max-time 3 "${HEALTH_URL}" 2>/dev/null)
-
- if [ "$RESPONSE" = "200" ]; then
- BODY=$(curl -s --connect-timeout 2 --max-time 3 "${HEALTH_URL}" 2>/dev/null)
- if echo "$BODY" | grep -q '"status":"UP"'; then
- echo "[health-check] Actuator 健康检查通过 (${i}/${ATTEMPTS})"
- exit 0
- fi
- fi
-
- # 策略2: 如果 Actuator 404,降级为端口监听检查
- if [ "$RESPONSE" = "404" ]; then
- if nc -z "${HOST}" "${PORT}" 2>/dev/null || \
- ss -tlnp 2>/dev/null | grep -q ":${PORT} " || \
- netstat -tlnp 2>/dev/null | grep -q ":${PORT} "; then
- echo "[health-check] 端口监听检查通过 (${i}/${ATTEMPTS}) [Actuator 未启用,使用端口降级检查]"
- exit 0
- fi
- fi
-
- echo "[health-check] 第 ${i}/${ATTEMPTS} 次检查未通过 (HTTP ${RESPONSE}), ${INTERVAL} 秒后重试..."
- sleep $INTERVAL
- done
-
- echo "[health-check] 健康检查失败: 服务未在 ${MAX_WAIT} 秒内就绪"
- exit 1
|