#!/bin/zsh # # Disk Space Monitor # Warns when disk usage exceeds 90% threshold # Sends alerts via Telegram when critical # STATE_DIR="/Users/mattbruce/.openclaw/workspace/scripts/security-monitors/state" LOG_FILE="/Users/mattbruce/.openclaw/workspace/scripts/security-monitors/logs/disk-monitor.log" ALERT_STATE_FILE="$STATE_DIR/disk-alert-state" # Thresholds WARN_THRESHOLD=80 CRITICAL_THRESHOLD=90 # Create directories mkdir -p "$(dirname $LOG_FILE)" "$STATE_DIR" # Timestamp helper timestamp() { date '+%Y-%m-%d %H:%M:%S %Z' } # Log to file log() { echo "[$(timestamp)] $1" >> "$LOG_FILE" } # Send alert to queue send_alert() { local level="$1" local message="$2" echo "$(timestamp) | $level | DISK | $message" >> "$STATE_DIR/alerts.queue" } # Check disk usage check_disk_usage() { local filesystem="$1" local usage usage=$(df -h "$filesystem" 2>/dev/null | tail -1 | awk '{print $5}' | tr -d '%') echo "$usage" } # Get all mounted filesystems and their usage (exclude system/special volumes) get_all_filesystems() { df -h 2>/dev/null | tail -n +2 | grep -v "devfs\|map \|CoreSimulator\|Cryptex" | awk '{print $6","$5}' } # Main monitoring logic main() { local alert_needed=false local alert_level="" local alert_details="" local max_usage=0 local critical_fs="" # Check main filesystem (/) first local root_usage root_usage=$(check_disk_usage "/") if [[ -n "$root_usage" ]]; then max_usage=$root_usage critical_fs="/" fi # Check all filesystems local fs_list fs_list=$(get_all_filesystems) local details="Disk Usage Report:\n" while IFS=',' read -r mount usage; do [[ -z "$mount" ]] && continue local usage_num=$(echo "$usage" | tr -d '%') details="${details} $mount: $usage\n" if [[ "$usage_num" -gt "$max_usage" ]]; then max_usage=$usage_num critical_fs="$mount" fi if [[ "$usage_num" -ge "$CRITICAL_THRESHOLD" ]]; then alert_needed=true alert_level="CRITICAL" elif [[ "$usage_num" -ge "$WARN_THRESHOLD" ]] && [[ "$alert_level" != "CRITICAL" ]]; then alert_needed=true alert_level="WARNING" fi done <<< "$fs_list" # Check if we already alerted for this state (prevent spam) local last_state="" local last_usage=0 if [[ -f "$ALERT_STATE_FILE" ]]; then last_state=$(cat "$ALERT_STATE_FILE" | cut -d'|' -f1) last_usage=$(cat "$ALERT_STATE_FILE" | cut -d'|' -f2) fi # Alert logic with hysteresis (alert on rising, clear on falling below threshold-5%) local should_alert=false if [[ "$alert_needed" == "true" ]]; then if [[ "$alert_level" == "CRITICAL" ]]; then # Always alert for critical, but not more than once per hour if [[ "$last_state" != "CRITICAL" ]] || [[ $((max_usage - last_usage)) -ge 5 ]]; then should_alert=true fi elif [[ "$alert_level" == "WARNING" ]]; then # Alert for warning if we haven't already, or if it's getting worse if [[ "$last_state" != "WARNING" ]] && [[ "$last_state" != "CRITICAL" ]]; then should_alert=true fi fi elif [[ "$last_state" == "CRITICAL" ]] || [[ "$last_state" == "WARNING" ]]; then # Disk has recovered below threshold - send all-clear if [[ $max_usage -lt $((WARN_THRESHOLD - 5)) ]]; then alert_level="RECOVERED" should_alert=true fi fi if [[ "$should_alert" == "true" ]]; then local hostname=$(hostname -s) local emoji="" local title="" case "$alert_level" in CRITICAL) emoji="🚨" title="CRITICAL: Disk Space Exhaustion Imminent" ;; WARNING) emoji="⚠️" title="WARNING: Disk Space Running Low" ;; RECOVERED) emoji="✅" title="RESOLVED: Disk Space Recovered" ;; esac local alert_msg="$emoji **$title** $emoji **Host:** $hostname **Time:** $(timestamp) **Most Critical Mount:** $critical_fs (${max_usage}% used) **All Filesystems:** $details $(if [[ "$alert_level" == "CRITICAL" ]]; then echo "🛑 **ACTION REQUIRED:** Free up disk space immediately!"; fi) $(if [[ "$alert_level" == "WARNING" ]]; then echo "💡 **Recommendation:** Review and clean up unnecessary files."; fi) _Detected by OpenClaw Disk Monitor_" send_alert "$alert_level" "$alert_msg" log "$alert_level alert sent for $critical_fs (${max_usage}% usage)" # Update state echo "$alert_level|$max_usage|$(timestamp)" > "$ALERT_STATE_FILE" # Log to daily security log local daily_log="/Users/mattbruce/.openclaw/workspace/memory/$(date '+%Y-%m-%d')-security.log" echo "DISK_${alert_level}|$(timestamp)|$critical_fs|${max_usage}%" >> "$daily_log" else # Normal operation - log periodically (every 6 runs ~ 30 min) local counter_file="$STATE_DIR/disk-check-counter" local counter=0 [[ -f "$counter_file" ]] && counter=$(cat "$counter_file") counter=$((counter + 1)) if [[ $counter -ge 6 ]]; then log "Disk check normal. Max usage: $max_usage% on $critical_fs" counter=0 fi echo "$counter" > "$counter_file" fi } # Run main function main "$@"