test-repo/scripts/web-monitor.sh

118 lines
3.5 KiB
Bash
Executable File

#!/bin/zsh
# Web Apps Monitor - Auto-restart if down
# Ports: 3000 (gantt-board), 3003 (blog-backup), 3005 (heartbeat-monitor)
LOG_FILE="/Users/mattbruce/.openclaw/workspace/memory/web-monitor.log"
LOCK_FILE="/tmp/web-monitor.lock"
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S %Z')
# Prevent concurrent runs
if [ -f "$LOCK_FILE" ]; then
# Check if lock is stale (older than 2 minutes)
lock_age=$(($(date +%s) - $(stat -c %Y "$LOCK_FILE" 2>/dev/null || stat -f %m "$LOCK_FILE" 2>/dev/null || echo 0)))
if [ "$lock_age" -lt 120 ]; then
echo "[$TIMESTAMP] Monitor already running, skipping..." >> "$LOG_FILE"
exit 0
else
echo "[$TIMESTAMP] Removing stale lock file" >> "$LOG_FILE"
rm -f "$LOCK_FILE"
fi
fi
# Create lock file
touch "$LOCK_FILE"
# Ensure lock is removed on exit
trap "rm -f $LOCK_FILE" EXIT
# Ensure PATH for cron (include Homebrew on macOS)
export PATH="/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$HOME/.local/bin:$PATH"
# Ensure log file exists
touch "$LOG_FILE"
# Function to check health
check_health() {
local port=$1
local name=$2
local code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "http://localhost:$port" 2>/dev/null || echo "DOWN")
echo "$code"
}
# Function to restart app
restart_app() {
local port=$1
local name=$2
local path=$3
echo "[$TIMESTAMP] ⚠️ $name (port $port) is DOWN - restarting..." >> "$LOG_FILE"
# Kill process on that port
pkill -f ":$port" 2>/dev/null
/bin/sleep 2
# Restart in background using full path to npm
cd "$path" && /opt/homebrew/bin/npm run dev -- --port "$port" > /dev/null 2>&1 &
echo "[$TIMESTAMP] 🔄 $name restarted on port $port" >> "$LOG_FILE"
}
# Check all apps
RESTARTED=()
# Port 3000 - gantt-board
code3000=$(check_health 3000 "gantt-board")
if [ "$code3000" != "200" ]; then
restart_app 3000 "gantt-board" "/Users/mattbruce/Documents/Projects/OpenClaw/Web/gantt-board"
RESTARTED+=("gantt-board")
fi
# Port 3003 - blog-backup
code3003=$(check_health 3003 "blog-backup")
if [ "$code3003" != "200" ]; then
restart_app 3003 "blog-backup" "/Users/mattbruce/Documents/Projects/OpenClaw/Web/blog-backup"
RESTARTED+=("blog-backup")
fi
# Port 3005 - heartbeat-monitor
code3005=$(check_health 3005 "heartbeat-monitor")
if [ "$code3005" != "200" ]; then
restart_app 3005 "heartbeat-monitor" "/Users/mattbruce/Documents/Projects/OpenClaw/Web/heartbeat-monitor"
RESTARTED+=("heartbeat-monitor")
fi
# If any were restarted, wait and re-verify
if [ ${#RESTARTED[@]} -gt 0 ]; then
/bin/sleep 5
for app in "${RESTARTED[@]}"; do
case $app in
"gantt-board") port=3000 ;;
"blog-backup") port=3003 ;;
"heartbeat-monitor") port=3005 ;;
esac
verify_code=$(check_health "$port" "$app")
if [ "$verify_code" = "200" ]; then
echo "[$TIMESTAMP] ✅ $app verified healthy (HTTP 200)" >> "$LOG_FILE"
else
echo "[$TIMESTAMP] ❌ $app still unhealthy (HTTP $verify_code)" >> "$LOG_FILE"
fi
done
else
# All healthy - log periodically (every 6 runs ~ 30 min with 5-min interval)
if [ ! -f /tmp/web-monitor-counter ]; then
echo "0" > /tmp/web-monitor-counter
fi
counter=$(cat /tmp/web-monitor-counter)
counter=$((counter + 1))
if [ $counter -ge 6 ]; then
echo "[$TIMESTAMP] ✅ All web apps healthy (3000, 3003, 3005)" >> "$LOG_FILE"
counter=0
fi
echo "$counter" > /tmp/web-monitor-counter
fi