chore(hq): daily sync 2026-02-17
This commit is contained in:
30
hq/scripts/discord-watchdog.sh
Executable file
30
hq/scripts/discord-watchdog.sh
Executable file
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env bash
|
||||
# Discord Watchdog — detects reconnect storms and auto-restarts affected agents
|
||||
# Run via cron every 5 minutes
|
||||
|
||||
AGENTS=(manager tech-lead secretary auditor webster optimizer study-builder nx-expert)
|
||||
THRESHOLD=10 # max disconnects in WINDOW before restart
|
||||
WINDOW="5 min ago"
|
||||
LOG_FILE="/home/papa/atomizer/logs/watchdog.log"
|
||||
|
||||
mkdir -p "$(dirname "$LOG_FILE")"
|
||||
|
||||
log() { echo "$(date -u '+%Y-%m-%d %H:%M:%S') $*" >> "$LOG_FILE"; }
|
||||
|
||||
for agent in "${AGENTS[@]}"; do
|
||||
# Count close/disconnect events in the window
|
||||
count=$(journalctl --user -u "openclaw-atomizer@${agent}" --since "$WINDOW" --no-pager 2>/dev/null \
|
||||
| grep -c "discord.*close\|ws close\|code 100[56]" || echo 0)
|
||||
|
||||
if [ "$count" -ge "$THRESHOLD" ]; then
|
||||
log "STORM DETECTED: ${agent} had ${count} disconnect events in 5min — restarting"
|
||||
systemctl --user restart "openclaw-atomizer@${agent}"
|
||||
sleep 2
|
||||
# Verify it came back
|
||||
if systemctl --user is-active "openclaw-atomizer@${agent}" >/dev/null 2>&1; then
|
||||
log "RESTARTED: ${agent} is back up"
|
||||
else
|
||||
log "FAILED: ${agent} did not restart cleanly"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
Reference in New Issue
Block a user