test-repo/scripts/monitor_web_apps.sh
Matt Bruce b934c9fdb3 Task #7: Root cause analysis - why websites die
- Analyzed system limits, memory usage, process status
- Identified primary suspect: Next.js dev server memory leaks
- Secondary suspects: macOS power mgmt, SSH timeout, OOM killer
- Created monitoring script for CPU/memory/file descriptors
- Documented recommendations: production builds, PM2, nohup
2026-02-18 16:04:44 -06:00

94 lines
2.8 KiB
Bash
Executable File

#!/bin/bash
LOG_FILE="/Users/mattbruce/.openclaw/workspace/logs/app_monitor.log"
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
echo "[$TIMESTAMP] === Starting Web App Monitor ===" | tee -a "$LOG_FILE"
# Port to project mapping (arrays for bash 3 compatibility)
PORTS=(3000 3003 3005)
PATHS=(
"/Users/mattbruce/Documents/Projects/OpenClaw/Web/gantt-board"
"/Users/mattbruce/Documents/Projects/OpenClaw/Web/blog-backup"
"/Users/mattbruce/Documents/Projects/OpenClaw/Web/heartbeat-monitor"
)
# Track which needed restart
NEEDS_RESTART=()
# Function to check if port is responding
check_port() {
local port=$1
local url="http://localhost:$port"
# Use curl with timeout and follow redirects
response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "$url" 2>/dev/null)
if [ "$response" == "200" ]; then
echo "[$TIMESTAMP] ✓ Port $port - HTTP 200 OK" | tee -a "$LOG_FILE"
return 0
else
echo "[$TIMESTAMP] ✗ Port $port - DOWN (response: $response)" | tee -a "$LOG_FILE"
return 1
fi
}
# Function to kill process on port
kill_port() {
local port=$1
echo "[$TIMESTAMP] → Killing process on port $port..." | tee -a "$LOG_FILE"
# Find and kill process using the port
pids=$(lsof -ti:$port 2>/dev/null)
if [ -n "$pids" ]; then
echo "[$TIMESTAMP] → Found PIDs: $pids" | tee -a "$LOG_FILE"
kill -9 $pids 2>/dev/null
sleep 2
echo "[$TIMESTAMP] → Killed processes on port $port" | tee -a "$LOG_FILE"
else
echo "[$TIMESTAMP] → No process found on port $port" | tee -a "$LOG_FILE"
fi
}
# Function to restart app
restart_app() {
local port=$1
local project_path=$2
echo "[$TIMESTAMP] → Restarting app on port $port..." | tee -a "$LOG_FILE"
echo "[$TIMESTAMP] → Path: $project_path" | tee -a "$LOG_FILE"
cd "$project_path" && nohup npm run dev -- --port $port > /dev/null 2>&1 &
NEEDS_RESTART+=("$port")
}
# Check all ports and restart if needed
for i in "${!PORTS[@]}"; do
port="${PORTS[$i]}"
path="${PATHS[$i]}"
if ! check_port $port; then
kill_port $port
restart_app $port "$path"
fi
done
# If any were restarted, wait and verify
if [ ${#NEEDS_RESTART[@]} -gt 0 ]; then
echo "[$TIMESTAMP] Waiting 5 seconds for apps to start..." | tee -a "$LOG_FILE"
sleep 5
echo "[$TIMESTAMP] === Post-Restart Verification ===" | tee -a "$LOG_FILE"
for port in "${PORTS[@]}"; do
if ! check_port $port; then
echo "[$TIMESTAMP] ⚠ Port $port still not responding after restart" | tee -a "$LOG_FILE"
fi
done
else
echo "[$TIMESTAMP] All apps healthy, no restart needed" | tee -a "$LOG_FILE"
fi
echo "[$TIMESTAMP] === Monitor Complete ===" | tee -a "$LOG_FILE"
echo "" | tee -a "$LOG_FILE"