Step 4 added to the daily cron: POST /admin/extract-batch with mode=llm, persist=true, limit=50. Runs after backup + cleanup + rsync. Fail-open: extraction failure never blocks the backup. Gated on ATOCORE_EXTRACT_BATCH=true (defaults to true). The endpoint uses the last_extract_batch_run timestamp from project state to auto-resume, so the cron doesn't need to track state. curl --max-time 600 gives the LLM extractor up to 10 minutes for the batch (50 interactions × ~20s each worst case = ~17 min, but most will be no-ops if already extracted). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
108 lines
4.1 KiB
Bash
Executable File
108 lines
4.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# deploy/dalidou/cron-backup.sh
|
|
# ------------------------------
|
|
# Daily backup + retention cleanup via the AtoCore API.
|
|
#
|
|
# Intended to run from cron on Dalidou:
|
|
#
|
|
# # Daily at 03:00 UTC
|
|
# 0 3 * * * /srv/storage/atocore/app/deploy/dalidou/cron-backup.sh >> /var/log/atocore-backup.log 2>&1
|
|
#
|
|
# What it does:
|
|
# 1. Creates a runtime backup (db + registry, no chroma by default)
|
|
# 2. Runs retention cleanup with --confirm to delete old snapshots
|
|
# 3. Logs results to stdout (captured by cron into the log file)
|
|
#
|
|
# Fail-open: exits 0 even on API errors so cron doesn't send noise
|
|
# emails. Check /var/log/atocore-backup.log for diagnostics.
|
|
#
|
|
# Environment variables:
|
|
# ATOCORE_URL default http://127.0.0.1:8100
|
|
# ATOCORE_BACKUP_CHROMA default false (set to "true" for cold chroma copy)
|
|
# ATOCORE_BACKUP_DIR default /srv/storage/atocore/backups
|
|
# ATOCORE_BACKUP_RSYNC optional rsync destination for off-host copies
|
|
# (e.g. papa@laptop:/home/papa/atocore-backups/)
|
|
# When set, the local snapshots tree is rsynced to
|
|
# the destination after cleanup. Unset = skip.
|
|
# SSH key auth must already be configured from this
|
|
# host to the destination.
|
|
|
|
set -euo pipefail
|
|
|
|
ATOCORE_URL="${ATOCORE_URL:-http://127.0.0.1:8100}"
|
|
INCLUDE_CHROMA="${ATOCORE_BACKUP_CHROMA:-false}"
|
|
BACKUP_DIR="${ATOCORE_BACKUP_DIR:-/srv/storage/atocore/backups}"
|
|
RSYNC_TARGET="${ATOCORE_BACKUP_RSYNC:-}"
|
|
TIMESTAMP="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
|
|
log() { printf '[%s] %s\n' "$TIMESTAMP" "$*"; }
|
|
|
|
log "=== AtoCore daily backup starting ==="
|
|
|
|
# Step 1: Create backup
|
|
log "Step 1: creating backup (chroma=$INCLUDE_CHROMA)"
|
|
BACKUP_RESULT=$(curl -sf -X POST \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"include_chroma\": $INCLUDE_CHROMA}" \
|
|
"$ATOCORE_URL/admin/backup" 2>&1) || {
|
|
log "ERROR: backup creation failed: $BACKUP_RESULT"
|
|
exit 0
|
|
}
|
|
log "Backup created: $BACKUP_RESULT"
|
|
|
|
# Step 2: Retention cleanup (confirm=true to actually delete)
|
|
log "Step 2: running retention cleanup"
|
|
CLEANUP_RESULT=$(curl -sf -X POST \
|
|
-H "Content-Type: application/json" \
|
|
-d '{"confirm": true}' \
|
|
"$ATOCORE_URL/admin/backup/cleanup" 2>&1) || {
|
|
log "ERROR: cleanup failed: $CLEANUP_RESULT"
|
|
exit 0
|
|
}
|
|
log "Cleanup result: $CLEANUP_RESULT"
|
|
|
|
# Step 3: Off-host rsync (optional). Fail-open: log but don't abort
|
|
# the cron so a laptop being offline at 03:00 UTC never turns the
|
|
# local backup path red.
|
|
if [[ -n "$RSYNC_TARGET" ]]; then
|
|
log "Step 3: rsyncing snapshots to $RSYNC_TARGET"
|
|
if [[ ! -d "$BACKUP_DIR/snapshots" ]]; then
|
|
log "WARN: $BACKUP_DIR/snapshots does not exist, skipping rsync"
|
|
else
|
|
RSYNC_OUTPUT=$(rsync -a --delete \
|
|
-e "ssh -o ConnectTimeout=10 -o BatchMode=yes -o StrictHostKeyChecking=accept-new" \
|
|
"$BACKUP_DIR/snapshots/" "$RSYNC_TARGET" 2>&1) && {
|
|
log "Rsync complete"
|
|
} || {
|
|
log "WARN: rsync to $RSYNC_TARGET failed (offline or auth?): $RSYNC_OUTPUT"
|
|
}
|
|
fi
|
|
else
|
|
log "Step 3: ATOCORE_BACKUP_RSYNC not set, skipping off-host copy"
|
|
fi
|
|
|
|
# Step 4: Batch LLM extraction on recent interactions (optional).
|
|
# Runs the LLM extractor (claude -p sonnet) against interactions
|
|
# captured since the last batch run. Candidates land as
|
|
# status=candidate for human or auto-triage review.
|
|
# Fail-open: extraction failure never blocks backup.
|
|
# The endpoint tracks its own last-run timestamp in project state.
|
|
EXTRACT="${ATOCORE_EXTRACT_BATCH:-true}"
|
|
if [[ "$EXTRACT" == "true" ]]; then
|
|
log "Step 4: running batch LLM extraction"
|
|
EXTRACT_RESULT=$(curl -sf -X POST \
|
|
-H "Content-Type: application/json" \
|
|
-d '{"mode": "llm", "persist": true, "limit": 50}' \
|
|
--max-time 600 \
|
|
"$ATOCORE_URL/admin/extract-batch" 2>&1) && {
|
|
log "Extraction result: $EXTRACT_RESULT"
|
|
} || {
|
|
log "WARN: batch extraction failed (this is non-blocking): $EXTRACT_RESULT"
|
|
}
|
|
else
|
|
log "Step 4: ATOCORE_EXTRACT_BATCH not set to true, skipping extraction"
|
|
fi
|
|
|
|
log "=== AtoCore daily backup complete ==="
|