#!/bin/zsh # Web Apps Monitor - Auto-restart if down # Ports: 3000 (gantt-board), 3003 (blog-backup), 3005 (heartbeat-monitor) LOG_FILE="/Users/mattbruce/.openclaw/workspace/memory/web-monitor.log" LOCK_FILE="/tmp/web-monitor.lock" TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S %Z') # Prevent concurrent runs if [ -f "$LOCK_FILE" ]; then # Check if lock is stale (older than 2 minutes) lock_age=$(($(date +%s) - $(stat -c %Y "$LOCK_FILE" 2>/dev/null || stat -f %m "$LOCK_FILE" 2>/dev/null || echo 0))) if [ "$lock_age" -lt 120 ]; then echo "[$TIMESTAMP] Monitor already running, skipping..." >> "$LOG_FILE" exit 0 else echo "[$TIMESTAMP] Removing stale lock file" >> "$LOG_FILE" rm -f "$LOCK_FILE" fi fi # Create lock file touch "$LOCK_FILE" # Ensure lock is removed on exit trap "rm -f $LOCK_FILE" EXIT # Ensure PATH for cron (include Homebrew on macOS) export PATH="/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$HOME/.local/bin:$PATH" # Ensure log file exists touch "$LOG_FILE" # Function to check health check_health() { local port=$1 local name=$2 local code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "http://localhost:$port" 2>/dev/null || echo "DOWN") echo "$code" } # Function to restart app restart_app() { local port=$1 local name=$2 local path=$3 echo "[$TIMESTAMP] ⚠️ $name (port $port) is DOWN - restarting..." >> "$LOG_FILE" # Kill process on that port pkill -f ":$port" 2>/dev/null /bin/sleep 2 # Restart in background using full path to npm cd "$path" && /opt/homebrew/bin/npm run dev -- --port "$port" > /dev/null 2>&1 & echo "[$TIMESTAMP] 🔄 $name restarted on port $port" >> "$LOG_FILE" } # Check all apps RESTARTED=() # Port 3000 - gantt-board code3000=$(check_health 3000 "gantt-board") if [ "$code3000" != "200" ]; then restart_app 3000 "gantt-board" "/Users/mattbruce/Documents/Projects/OpenClaw/Web/gantt-board" RESTARTED+=("gantt-board") fi # Port 3003 - blog-backup code3003=$(check_health 3003 "blog-backup") if [ "$code3003" != "200" ]; then restart_app 3003 "blog-backup" "/Users/mattbruce/Documents/Projects/OpenClaw/Web/blog-backup" RESTARTED+=("blog-backup") fi # Port 3005 - heartbeat-monitor code3005=$(check_health 3005 "heartbeat-monitor") if [ "$code3005" != "200" ]; then restart_app 3005 "heartbeat-monitor" "/Users/mattbruce/Documents/Projects/OpenClaw/Web/heartbeat-monitor" RESTARTED+=("heartbeat-monitor") fi # If any were restarted, wait and re-verify if [ ${#RESTARTED[@]} -gt 0 ]; then /bin/sleep 5 for app in "${RESTARTED[@]}"; do case $app in "gantt-board") port=3000 ;; "blog-backup") port=3003 ;; "heartbeat-monitor") port=3005 ;; esac verify_code=$(check_health "$port" "$app") if [ "$verify_code" = "200" ]; then echo "[$TIMESTAMP] ✅ $app verified healthy (HTTP 200)" >> "$LOG_FILE" else echo "[$TIMESTAMP] ❌ $app still unhealthy (HTTP $verify_code)" >> "$LOG_FILE" fi done else # All healthy - log periodically (every 6 runs ~ 30 min with 5-min interval) if [ ! -f /tmp/web-monitor-counter ]; then echo "0" > /tmp/web-monitor-counter fi counter=$(cat /tmp/web-monitor-counter) counter=$((counter + 1)) if [ $counter -ge 6 ]; then echo "[$TIMESTAMP] ✅ All web apps healthy (3000, 3003, 3005)" >> "$LOG_FILE" counter=0 fi echo "$counter" > /tmp/web-monitor-counter fi