179 lines
5.0 KiB
Bash
Executable File
179 lines
5.0 KiB
Bash
Executable File
#!/bin/zsh
|
|
#
|
|
# Disk Space Monitor
|
|
# Warns when disk usage exceeds 90% threshold
|
|
# Sends alerts via Telegram when critical
|
|
#
|
|
|
|
STATE_DIR="/Users/mattbruce/.openclaw/workspace/scripts/security-monitors/state"
|
|
LOG_FILE="/Users/mattbruce/.openclaw/workspace/scripts/security-monitors/logs/disk-monitor.log"
|
|
ALERT_STATE_FILE="$STATE_DIR/disk-alert-state"
|
|
|
|
# Thresholds
|
|
WARN_THRESHOLD=80
|
|
CRITICAL_THRESHOLD=90
|
|
|
|
# Create directories
|
|
mkdir -p "$(dirname $LOG_FILE)" "$STATE_DIR"
|
|
|
|
# Timestamp helper
|
|
timestamp() {
|
|
date '+%Y-%m-%d %H:%M:%S %Z'
|
|
}
|
|
|
|
# Log to file
|
|
log() {
|
|
echo "[$(timestamp)] $1" >> "$LOG_FILE"
|
|
}
|
|
|
|
# Send alert to queue
|
|
send_alert() {
|
|
local level="$1"
|
|
local message="$2"
|
|
echo "$(timestamp) | $level | DISK | $message" >> "$STATE_DIR/alerts.queue"
|
|
}
|
|
|
|
# Check disk usage
|
|
check_disk_usage() {
|
|
local filesystem="$1"
|
|
local usage
|
|
usage=$(df -h "$filesystem" 2>/dev/null | tail -1 | awk '{print $5}' | tr -d '%')
|
|
echo "$usage"
|
|
}
|
|
|
|
# Get all mounted filesystems and their usage (exclude system/special volumes)
|
|
get_all_filesystems() {
|
|
df -h 2>/dev/null | tail -n +2 | grep -v "devfs\|map \|CoreSimulator\|Cryptex" | awk '{print $6","$5}'
|
|
}
|
|
|
|
# Main monitoring logic
|
|
main() {
|
|
local alert_needed=false
|
|
local alert_level=""
|
|
local alert_details=""
|
|
local max_usage=0
|
|
local critical_fs=""
|
|
|
|
# Check main filesystem (/) first
|
|
local root_usage
|
|
root_usage=$(check_disk_usage "/")
|
|
if [[ -n "$root_usage" ]]; then
|
|
max_usage=$root_usage
|
|
critical_fs="/"
|
|
fi
|
|
|
|
# Check all filesystems
|
|
local fs_list
|
|
fs_list=$(get_all_filesystems)
|
|
|
|
local details="Disk Usage Report:\n"
|
|
while IFS=',' read -r mount usage; do
|
|
[[ -z "$mount" ]] && continue
|
|
local usage_num=$(echo "$usage" | tr -d '%')
|
|
details="${details} $mount: $usage\n"
|
|
|
|
if [[ "$usage_num" -gt "$max_usage" ]]; then
|
|
max_usage=$usage_num
|
|
critical_fs="$mount"
|
|
fi
|
|
|
|
if [[ "$usage_num" -ge "$CRITICAL_THRESHOLD" ]]; then
|
|
alert_needed=true
|
|
alert_level="CRITICAL"
|
|
elif [[ "$usage_num" -ge "$WARN_THRESHOLD" ]] && [[ "$alert_level" != "CRITICAL" ]]; then
|
|
alert_needed=true
|
|
alert_level="WARNING"
|
|
fi
|
|
done <<< "$fs_list"
|
|
|
|
# Check if we already alerted for this state (prevent spam)
|
|
local last_state=""
|
|
local last_usage=0
|
|
if [[ -f "$ALERT_STATE_FILE" ]]; then
|
|
last_state=$(cat "$ALERT_STATE_FILE" | cut -d'|' -f1)
|
|
last_usage=$(cat "$ALERT_STATE_FILE" | cut -d'|' -f2)
|
|
fi
|
|
|
|
# Alert logic with hysteresis (alert on rising, clear on falling below threshold-5%)
|
|
local should_alert=false
|
|
|
|
if [[ "$alert_needed" == "true" ]]; then
|
|
if [[ "$alert_level" == "CRITICAL" ]]; then
|
|
# Always alert for critical, but not more than once per hour
|
|
if [[ "$last_state" != "CRITICAL" ]] || [[ $((max_usage - last_usage)) -ge 5 ]]; then
|
|
should_alert=true
|
|
fi
|
|
elif [[ "$alert_level" == "WARNING" ]]; then
|
|
# Alert for warning if we haven't already, or if it's getting worse
|
|
if [[ "$last_state" != "WARNING" ]] && [[ "$last_state" != "CRITICAL" ]]; then
|
|
should_alert=true
|
|
fi
|
|
fi
|
|
elif [[ "$last_state" == "CRITICAL" ]] || [[ "$last_state" == "WARNING" ]]; then
|
|
# Disk has recovered below threshold - send all-clear
|
|
if [[ $max_usage -lt $((WARN_THRESHOLD - 5)) ]]; then
|
|
alert_level="RECOVERED"
|
|
should_alert=true
|
|
fi
|
|
fi
|
|
|
|
if [[ "$should_alert" == "true" ]]; then
|
|
local hostname=$(hostname -s)
|
|
local emoji=""
|
|
local title=""
|
|
|
|
case "$alert_level" in
|
|
CRITICAL)
|
|
emoji="🚨"
|
|
title="CRITICAL: Disk Space Exhaustion Imminent"
|
|
;;
|
|
WARNING)
|
|
emoji="⚠️"
|
|
title="WARNING: Disk Space Running Low"
|
|
;;
|
|
RECOVERED)
|
|
emoji="✅"
|
|
title="RESOLVED: Disk Space Recovered"
|
|
;;
|
|
esac
|
|
|
|
local alert_msg="$emoji **$title** $emoji
|
|
|
|
**Host:** $hostname
|
|
**Time:** $(timestamp)
|
|
**Most Critical Mount:** $critical_fs (${max_usage}% used)
|
|
|
|
**All Filesystems:**
|
|
$details
|
|
$(if [[ "$alert_level" == "CRITICAL" ]]; then echo "🛑 **ACTION REQUIRED:** Free up disk space immediately!"; fi)
|
|
$(if [[ "$alert_level" == "WARNING" ]]; then echo "💡 **Recommendation:** Review and clean up unnecessary files."; fi)
|
|
|
|
_Detected by OpenClaw Disk Monitor_"
|
|
|
|
send_alert "$alert_level" "$alert_msg"
|
|
log "$alert_level alert sent for $critical_fs (${max_usage}% usage)"
|
|
|
|
# Update state
|
|
echo "$alert_level|$max_usage|$(timestamp)" > "$ALERT_STATE_FILE"
|
|
|
|
# Log to daily security log
|
|
local daily_log="/Users/mattbruce/.openclaw/workspace/memory/$(date '+%Y-%m-%d')-security.log"
|
|
echo "DISK_${alert_level}|$(timestamp)|$critical_fs|${max_usage}%" >> "$daily_log"
|
|
else
|
|
# Normal operation - log periodically (every 6 runs ~ 30 min)
|
|
local counter_file="$STATE_DIR/disk-check-counter"
|
|
local counter=0
|
|
[[ -f "$counter_file" ]] && counter=$(cat "$counter_file")
|
|
counter=$((counter + 1))
|
|
|
|
if [[ $counter -ge 6 ]]; then
|
|
log "Disk check normal. Max usage: $max_usage% on $critical_fs"
|
|
counter=0
|
|
fi
|
|
echo "$counter" > "$counter_file"
|
|
fi
|
|
}
|
|
|
|
# Run main function
|
|
main "$@"
|