#!/usr/bin/env bash # # deploy/dalidou/dedup-watcher.sh # ------------------------------- # Host-side watcher for on-demand memory dedup scans (Phase 7A). # # The /admin/triage page has a "🔗 Scan for duplicates" button that POSTs # to /admin/memory/dedup-scan with {project, similarity_threshold, max_batch}. # The container writes this to project_state (atocore/config/dedup_requested_at). # # This script runs on the Dalidou HOST (where claude CLI lives), polls # for the flag, and runs memory_dedup.py when seen. # # Installed via cron every 2 minutes: # */2 * * * * /srv/storage/atocore/app/deploy/dalidou/dedup-watcher.sh \ # >> /home/papa/atocore-logs/dedup-watcher.log 2>&1 # # Mirrors deploy/dalidou/graduation-watcher.sh exactly. set -euo pipefail ATOCORE_URL="${ATOCORE_URL:-http://127.0.0.1:8100}" APP_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" LOCK_FILE="/tmp/atocore-dedup.lock" LOG_DIR="/home/papa/atocore-logs" mkdir -p "$LOG_DIR" TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)" log() { printf '[%s] %s\n' "$TS" "$*"; } # Fetch the flag via API STATE_JSON=$(curl -sSf --max-time 5 "$ATOCORE_URL/project/state/atocore" 2>/dev/null || echo "{}") REQUESTED=$(echo "$STATE_JSON" | python3 -c " import sys, json try: d = json.load(sys.stdin) for e in d.get('entries', d.get('state', [])): if e.get('category') == 'config' and e.get('key') == 'dedup_requested_at': print(e.get('value', '')) break except Exception: pass " 2>/dev/null || echo "") if [[ -z "$REQUESTED" ]]; then exit 0 fi PROJECT=$(echo "$REQUESTED" | python3 -c "import sys,json; print(json.loads(sys.stdin.read() or '{}').get('project',''))" 2>/dev/null || echo "") THRESHOLD=$(echo "$REQUESTED" | python3 -c "import sys,json; print(json.loads(sys.stdin.read() or '{}').get('similarity_threshold',0.88))" 2>/dev/null || echo "0.88") MAX_BATCH=$(echo "$REQUESTED" | python3 -c "import sys,json; print(json.loads(sys.stdin.read() or '{}').get('max_batch',50))" 2>/dev/null || echo "50") # Acquire lock exec 9>"$LOCK_FILE" || exit 0 if ! flock -n 9; then log "dedup already running, skipping" exit 0 fi # Mark running curl -sSf -X POST "$ATOCORE_URL/project/state" \ -H 'Content-Type: application/json' \ -d "{\"project\":\"atocore\",\"category\":\"status\",\"key\":\"dedup_running\",\"value\":\"1\",\"source\":\"dedup watcher\"}" \ >/dev/null 2>&1 || true curl -sSf -X POST "$ATOCORE_URL/project/state" \ -H 'Content-Type: application/json' \ -d "{\"project\":\"atocore\",\"category\":\"status\",\"key\":\"dedup_last_started_at\",\"value\":\"$TS\",\"source\":\"dedup watcher\"}" \ >/dev/null 2>&1 || true LOG_FILE="$LOG_DIR/dedup-ondemand-$(date -u +%Y%m%d-%H%M%S).log" log "Starting dedup (project='$PROJECT' threshold=$THRESHOLD max_batch=$MAX_BATCH, log: $LOG_FILE)" # Clear the flag BEFORE running so duplicate clicks queue at most one curl -sSf -X DELETE "$ATOCORE_URL/project/state" \ -H 'Content-Type: application/json' \ -d "{\"project\":\"atocore\",\"category\":\"config\",\"key\":\"dedup_requested_at\"}" \ >/dev/null 2>&1 || true cd "$APP_DIR" export PYTHONPATH="$APP_DIR/src:${PYTHONPATH:-}" ARGS=(--base-url "$ATOCORE_URL" --similarity-threshold "$THRESHOLD" --max-batch "$MAX_BATCH") if [[ -n "$PROJECT" ]]; then ARGS+=(--project "$PROJECT") fi if python3 scripts/memory_dedup.py "${ARGS[@]}" >> "$LOG_FILE" 2>&1; then RESULT=$(grep "^summary:" "$LOG_FILE" | tail -1 || tail -1 "$LOG_FILE") RESULT="${RESULT:-completed}" log "dedup finished: $RESULT" else RESULT="ERROR — see $LOG_FILE" log "dedup FAILED" fi FINISH_TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)" curl -sSf -X POST "$ATOCORE_URL/project/state" \ -H 'Content-Type: application/json' \ -d "{\"project\":\"atocore\",\"category\":\"status\",\"key\":\"dedup_running\",\"value\":\"0\",\"source\":\"dedup watcher\"}" \ >/dev/null 2>&1 || true curl -sSf -X POST "$ATOCORE_URL/project/state" \ -H 'Content-Type: application/json' \ -d "{\"project\":\"atocore\",\"category\":\"status\",\"key\":\"dedup_last_finished_at\",\"value\":\"$FINISH_TS\",\"source\":\"dedup watcher\"}" \ >/dev/null 2>&1 || true SAFE_RESULT=$(printf '%s' "$RESULT" | python3 -c "import sys,json; print(json.dumps(sys.stdin.read())[1:-1])") curl -sSf -X POST "$ATOCORE_URL/project/state" \ -H 'Content-Type: application/json' \ -d "{\"project\":\"atocore\",\"category\":\"status\",\"key\":\"dedup_last_result\",\"value\":\"$SAFE_RESULT\",\"source\":\"dedup watcher\"}" \ >/dev/null 2>&1 || true