test-repo/scripts/webapp-monitor.sh
Matt Bruce b934c9fdb3 Task #7: Root cause analysis - why websites die
- Analyzed system limits, memory usage, process status
- Identified primary suspect: Next.js dev server memory leaks
- Secondary suspects: macOS power mgmt, SSH timeout, OOM killer
- Created monitoring script for CPU/memory/file descriptors
- Documented recommendations: production builds, PM2, nohup
2026-02-18 16:04:44 -06:00

98 lines
2.3 KiB
Bash
Executable File

#!/bin/bash
# Web App Monitor - Auto-restart script
# Ports: 3000 (gantt-board), 3003 (blog-backup), 3005 (heartbeat-monitor)
LOG_FILE="/Users/mattbruce/.openclaw/workspace/logs/webapp-monitor.log"
mkdir -p "$(dirname "$LOG_FILE")"
timestamp() {
date '+%Y-%m-%d %H:%M:%S %Z'
}
log() {
echo "[$(timestamp)] $1" | tee -a "$LOG_FILE"
}
check_port() {
local port=$1
local timeout=5
if curl -s -o /dev/null -w "%{http_code}" --max-time "$timeout" "http://localhost:$port" | grep -q "200"; then
echo "up"
else
echo "down"
fi
}
kill_port() {
local port=$1
log "Killing process on port $port..."
pkill -f ":$port" 2>/dev/null || true
sleep 2
}
restart_app() {
local port=$1
local dir=$2
local name=$3
log "Restarting $name on port $port..."
cd "$dir" && npm run dev -- --port "$port" > /dev/null 2>&1 &
}
# Define apps
APPS=(
"3000:/Users/mattbruce/Documents/Projects/OpenClaw/Web/gantt-board:gantt-board"
"3003:/Users/mattbruce/Documents/Projects/OpenClaw/Web/blog-backup:blog-backup"
"3005:/Users/mattbruce/Documents/Projects/OpenClaw/Web/heartbeat-monitor:heartbeat-monitor"
)
log "=== Starting web app monitor check ==="
NEEDS_RESTART=()
# Check each app
for app in "${APPS[@]}"; do
IFS=':' read -r port dir name <<< "$app"
status=$(check_port "$port")
if [ "$status" = "up" ]; then
log "$name (port $port) is UP"
else
log "$name (port $port) is DOWN - will restart"
NEEDS_RESTART+=("$app")
fi
done
# Restart any down apps
if [ ${#NEEDS_RESTART[@]} -gt 0 ]; then
log "--- Restarting ${#NEEDS_RESTART[@]} app(s) ---"
for app in "${NEEDS_RESTART[@]}"; do
IFS=':' read -r port dir name <<< "$app"
kill_port "$port"
restart_app "$port" "$dir" "$name"
sleep 1
done
log "Waiting 5 seconds for apps to start..."
sleep 5
# Verify restarts
log "--- Verification ---"
for app in "${NEEDS_RESTART[@]}"; do
IFS=':' read -r port dir name <<< "$app"
status=$(check_port "$port")
if [ "$status" = "up" ]; then
log "$name (port $port) is now UP"
else
log "$name (port $port) still DOWN after restart"
fi
done
else
log "All apps healthy, no action needed"
fi
log "=== Monitor check complete ==="