118 lines
3.5 KiB
Bash
Executable File
118 lines
3.5 KiB
Bash
Executable File
#!/bin/zsh
|
|
|
|
# Web Apps Monitor - Auto-restart if down
|
|
# Ports: 3000 (gantt-board), 3003 (blog-backup), 3005 (heartbeat-monitor)
|
|
|
|
LOG_FILE="/Users/mattbruce/.openclaw/workspace/memory/web-monitor.log"
|
|
LOCK_FILE="/tmp/web-monitor.lock"
|
|
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S %Z')
|
|
|
|
# Prevent concurrent runs
|
|
if [ -f "$LOCK_FILE" ]; then
|
|
# Check if lock is stale (older than 2 minutes)
|
|
lock_age=$(($(date +%s) - $(stat -c %Y "$LOCK_FILE" 2>/dev/null || stat -f %m "$LOCK_FILE" 2>/dev/null || echo 0)))
|
|
if [ "$lock_age" -lt 120 ]; then
|
|
echo "[$TIMESTAMP] Monitor already running, skipping..." >> "$LOG_FILE"
|
|
exit 0
|
|
else
|
|
echo "[$TIMESTAMP] Removing stale lock file" >> "$LOG_FILE"
|
|
rm -f "$LOCK_FILE"
|
|
fi
|
|
fi
|
|
|
|
# Create lock file
|
|
touch "$LOCK_FILE"
|
|
|
|
# Ensure lock is removed on exit
|
|
trap "rm -f $LOCK_FILE" EXIT
|
|
|
|
# Ensure PATH for cron (include Homebrew on macOS)
|
|
export PATH="/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$HOME/.local/bin:$PATH"
|
|
|
|
# Ensure log file exists
|
|
touch "$LOG_FILE"
|
|
|
|
# Function to check health
|
|
check_health() {
|
|
local port=$1
|
|
local name=$2
|
|
local code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "http://localhost:$port" 2>/dev/null || echo "DOWN")
|
|
echo "$code"
|
|
}
|
|
|
|
# Function to restart app
|
|
restart_app() {
|
|
local port=$1
|
|
local name=$2
|
|
local path=$3
|
|
|
|
echo "[$TIMESTAMP] ⚠️ $name (port $port) is DOWN - restarting..." >> "$LOG_FILE"
|
|
|
|
# Kill process on that port
|
|
pkill -f ":$port" 2>/dev/null
|
|
/bin/sleep 2
|
|
|
|
# Restart in background using full path to npm
|
|
cd "$path" && /opt/homebrew/bin/npm run dev -- --port "$port" > /dev/null 2>&1 &
|
|
|
|
echo "[$TIMESTAMP] 🔄 $name restarted on port $port" >> "$LOG_FILE"
|
|
}
|
|
|
|
# Check all apps
|
|
RESTARTED=()
|
|
|
|
# Port 3000 - gantt-board
|
|
code3000=$(check_health 3000 "gantt-board")
|
|
if [ "$code3000" != "200" ]; then
|
|
restart_app 3000 "gantt-board" "/Users/mattbruce/Documents/Projects/OpenClaw/Web/gantt-board"
|
|
RESTARTED+=("gantt-board")
|
|
fi
|
|
|
|
# Port 3003 - blog-backup
|
|
code3003=$(check_health 3003 "blog-backup")
|
|
if [ "$code3003" != "200" ]; then
|
|
restart_app 3003 "blog-backup" "/Users/mattbruce/Documents/Projects/OpenClaw/Web/blog-backup"
|
|
RESTARTED+=("blog-backup")
|
|
fi
|
|
|
|
# Port 3005 - heartbeat-monitor
|
|
code3005=$(check_health 3005 "heartbeat-monitor")
|
|
if [ "$code3005" != "200" ]; then
|
|
restart_app 3005 "heartbeat-monitor" "/Users/mattbruce/Documents/Projects/OpenClaw/Web/heartbeat-monitor"
|
|
RESTARTED+=("heartbeat-monitor")
|
|
fi
|
|
|
|
# If any were restarted, wait and re-verify
|
|
if [ ${#RESTARTED[@]} -gt 0 ]; then
|
|
/bin/sleep 5
|
|
|
|
for app in "${RESTARTED[@]}"; do
|
|
case $app in
|
|
"gantt-board") port=3000 ;;
|
|
"blog-backup") port=3003 ;;
|
|
"heartbeat-monitor") port=3005 ;;
|
|
esac
|
|
|
|
verify_code=$(check_health "$port" "$app")
|
|
if [ "$verify_code" = "200" ]; then
|
|
echo "[$TIMESTAMP] ✅ $app verified healthy (HTTP 200)" >> "$LOG_FILE"
|
|
else
|
|
echo "[$TIMESTAMP] ❌ $app still unhealthy (HTTP $verify_code)" >> "$LOG_FILE"
|
|
fi
|
|
done
|
|
else
|
|
# All healthy - log periodically (every 6 runs ~ 30 min with 5-min interval)
|
|
if [ ! -f /tmp/web-monitor-counter ]; then
|
|
echo "0" > /tmp/web-monitor-counter
|
|
fi
|
|
counter=$(cat /tmp/web-monitor-counter)
|
|
counter=$((counter + 1))
|
|
|
|
if [ $counter -ge 6 ]; then
|
|
echo "[$TIMESTAMP] ✅ All web apps healthy (3000, 3003, 3005)" >> "$LOG_FILE"
|
|
counter=0
|
|
fi
|
|
|
|
echo "$counter" > /tmp/web-monitor-counter
|
|
fi
|