111 lines
4.5 KiB
Bash
111 lines
4.5 KiB
Bash
|
|
#!/usr/bin/env bash
|
||
|
|
#
|
||
|
|
# deploy/dalidou/dedup-watcher.sh
|
||
|
|
# -------------------------------
|
||
|
|
# Host-side watcher for on-demand memory dedup scans (Phase 7A).
|
||
|
|
#
|
||
|
|
# The /admin/triage page has a "🔗 Scan for duplicates" button that POSTs
|
||
|
|
# to /admin/memory/dedup-scan with {project, similarity_threshold, max_batch}.
|
||
|
|
# The container writes this to project_state (atocore/config/dedup_requested_at).
|
||
|
|
#
|
||
|
|
# This script runs on the Dalidou HOST (where claude CLI lives), polls
|
||
|
|
# for the flag, and runs memory_dedup.py when seen.
|
||
|
|
#
|
||
|
|
# Installed via cron every 2 minutes:
|
||
|
|
# */2 * * * * /srv/storage/atocore/app/deploy/dalidou/dedup-watcher.sh \
|
||
|
|
# >> /home/papa/atocore-logs/dedup-watcher.log 2>&1
|
||
|
|
#
|
||
|
|
# Mirrors deploy/dalidou/graduation-watcher.sh exactly.
|
||
|
|
|
||
|
|
set -euo pipefail
|
||
|
|
|
||
|
|
ATOCORE_URL="${ATOCORE_URL:-http://127.0.0.1:8100}"
|
||
|
|
APP_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||
|
|
LOCK_FILE="/tmp/atocore-dedup.lock"
|
||
|
|
LOG_DIR="/home/papa/atocore-logs"
|
||
|
|
mkdir -p "$LOG_DIR"
|
||
|
|
|
||
|
|
TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||
|
|
log() { printf '[%s] %s\n' "$TS" "$*"; }
|
||
|
|
|
||
|
|
# Fetch the flag via API
|
||
|
|
STATE_JSON=$(curl -sSf --max-time 5 "$ATOCORE_URL/project/state/atocore" 2>/dev/null || echo "{}")
|
||
|
|
REQUESTED=$(echo "$STATE_JSON" | python3 -c "
|
||
|
|
import sys, json
|
||
|
|
try:
|
||
|
|
d = json.load(sys.stdin)
|
||
|
|
for e in d.get('entries', d.get('state', [])):
|
||
|
|
if e.get('category') == 'config' and e.get('key') == 'dedup_requested_at':
|
||
|
|
print(e.get('value', ''))
|
||
|
|
break
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
" 2>/dev/null || echo "")
|
||
|
|
|
||
|
|
if [[ -z "$REQUESTED" ]]; then
|
||
|
|
exit 0
|
||
|
|
fi
|
||
|
|
|
||
|
|
PROJECT=$(echo "$REQUESTED" | python3 -c "import sys,json; print(json.loads(sys.stdin.read() or '{}').get('project',''))" 2>/dev/null || echo "")
|
||
|
|
THRESHOLD=$(echo "$REQUESTED" | python3 -c "import sys,json; print(json.loads(sys.stdin.read() or '{}').get('similarity_threshold',0.88))" 2>/dev/null || echo "0.88")
|
||
|
|
MAX_BATCH=$(echo "$REQUESTED" | python3 -c "import sys,json; print(json.loads(sys.stdin.read() or '{}').get('max_batch',50))" 2>/dev/null || echo "50")
|
||
|
|
|
||
|
|
# Acquire lock
|
||
|
|
exec 9>"$LOCK_FILE" || exit 0
|
||
|
|
if ! flock -n 9; then
|
||
|
|
log "dedup already running, skipping"
|
||
|
|
exit 0
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Mark running
|
||
|
|
curl -sSf -X POST "$ATOCORE_URL/project/state" \
|
||
|
|
-H 'Content-Type: application/json' \
|
||
|
|
-d "{\"project\":\"atocore\",\"category\":\"status\",\"key\":\"dedup_running\",\"value\":\"1\",\"source\":\"dedup watcher\"}" \
|
||
|
|
>/dev/null 2>&1 || true
|
||
|
|
curl -sSf -X POST "$ATOCORE_URL/project/state" \
|
||
|
|
-H 'Content-Type: application/json' \
|
||
|
|
-d "{\"project\":\"atocore\",\"category\":\"status\",\"key\":\"dedup_last_started_at\",\"value\":\"$TS\",\"source\":\"dedup watcher\"}" \
|
||
|
|
>/dev/null 2>&1 || true
|
||
|
|
|
||
|
|
LOG_FILE="$LOG_DIR/dedup-ondemand-$(date -u +%Y%m%d-%H%M%S).log"
|
||
|
|
log "Starting dedup (project='$PROJECT' threshold=$THRESHOLD max_batch=$MAX_BATCH, log: $LOG_FILE)"
|
||
|
|
|
||
|
|
# Clear the flag BEFORE running so duplicate clicks queue at most one
|
||
|
|
curl -sSf -X DELETE "$ATOCORE_URL/project/state" \
|
||
|
|
-H 'Content-Type: application/json' \
|
||
|
|
-d "{\"project\":\"atocore\",\"category\":\"config\",\"key\":\"dedup_requested_at\"}" \
|
||
|
|
>/dev/null 2>&1 || true
|
||
|
|
|
||
|
|
cd "$APP_DIR"
|
||
|
|
export PYTHONPATH="$APP_DIR/src:${PYTHONPATH:-}"
|
||
|
|
ARGS=(--base-url "$ATOCORE_URL" --similarity-threshold "$THRESHOLD" --max-batch "$MAX_BATCH")
|
||
|
|
if [[ -n "$PROJECT" ]]; then
|
||
|
|
ARGS+=(--project "$PROJECT")
|
||
|
|
fi
|
||
|
|
|
||
|
|
if python3 scripts/memory_dedup.py "${ARGS[@]}" >> "$LOG_FILE" 2>&1; then
|
||
|
|
RESULT=$(grep "^summary:" "$LOG_FILE" | tail -1 || tail -1 "$LOG_FILE")
|
||
|
|
RESULT="${RESULT:-completed}"
|
||
|
|
log "dedup finished: $RESULT"
|
||
|
|
else
|
||
|
|
RESULT="ERROR — see $LOG_FILE"
|
||
|
|
log "dedup FAILED"
|
||
|
|
fi
|
||
|
|
|
||
|
|
FINISH_TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||
|
|
|
||
|
|
curl -sSf -X POST "$ATOCORE_URL/project/state" \
|
||
|
|
-H 'Content-Type: application/json' \
|
||
|
|
-d "{\"project\":\"atocore\",\"category\":\"status\",\"key\":\"dedup_running\",\"value\":\"0\",\"source\":\"dedup watcher\"}" \
|
||
|
|
>/dev/null 2>&1 || true
|
||
|
|
curl -sSf -X POST "$ATOCORE_URL/project/state" \
|
||
|
|
-H 'Content-Type: application/json' \
|
||
|
|
-d "{\"project\":\"atocore\",\"category\":\"status\",\"key\":\"dedup_last_finished_at\",\"value\":\"$FINISH_TS\",\"source\":\"dedup watcher\"}" \
|
||
|
|
>/dev/null 2>&1 || true
|
||
|
|
|
||
|
|
SAFE_RESULT=$(printf '%s' "$RESULT" | python3 -c "import sys,json; print(json.dumps(sys.stdin.read())[1:-1])")
|
||
|
|
curl -sSf -X POST "$ATOCORE_URL/project/state" \
|
||
|
|
-H 'Content-Type: application/json' \
|
||
|
|
-d "{\"project\":\"atocore\",\"category\":\"status\",\"key\":\"dedup_last_result\",\"value\":\"$SAFE_RESULT\",\"source\":\"dedup watcher\"}" \
|
||
|
|
>/dev/null 2>&1 || true
|