ATOCore/deploy/dalidou/cron-backup.sh

#!/usr/bin/env bash
#
# deploy/dalidou/cron-backup.sh
# ------------------------------
# Daily backup + retention cleanup via the AtoCore API.
#
# Intended to run from cron on Dalidou:
#
#   # Daily at 03:00 UTC
#   0 3 * * * /srv/storage/atocore/app/deploy/dalidou/cron-backup.sh >> /var/log/atocore-backup.log 2>&1
#
# What it does:
#   1. Creates a runtime backup (db + registry, no chroma by default)
#   2. Runs retention cleanup with --confirm to delete old snapshots
#   3. Logs results to stdout (captured by cron into the log file)
#
# Fail-open: exits 0 even on API errors so cron doesn't send noise
# emails. Check /var/log/atocore-backup.log for diagnostics.
#
# Environment variables:
#   ATOCORE_URL             default http://127.0.0.1:8100
#   ATOCORE_BACKUP_CHROMA   default false (set to "true" for cold chroma copy)
#   ATOCORE_BACKUP_DIR      default /srv/storage/atocore/backups
#   ATOCORE_BACKUP_RSYNC    optional rsync destination for off-host copies
#                           (e.g. papa@laptop:/home/papa/atocore-backups/)
#                           When set, the local snapshots tree is rsynced to
#                           the destination after cleanup. Unset = skip.
#                           SSH key auth must already be configured from this
#                           host to the destination.

set -euo pipefail

ATOCORE_URL="${ATOCORE_URL:-http://127.0.0.1:8100}"
INCLUDE_CHROMA="${ATOCORE_BACKUP_CHROMA:-false}"
BACKUP_DIR="${ATOCORE_BACKUP_DIR:-/srv/storage/atocore/backups}"
RSYNC_TARGET="${ATOCORE_BACKUP_RSYNC:-}"
TIMESTAMP="$(date -u +%Y-%m-%dT%H:%M:%SZ)"

log() { printf '[%s] %s\n' "$TIMESTAMP" "$*"; }

log "=== AtoCore daily backup starting ==="

# Step 1: Create backup
log "Step 1: creating backup (chroma=$INCLUDE_CHROMA)"
BACKUP_RESULT=$(curl -sf -X POST \
    -H "Content-Type: application/json" \
    -d "{\"include_chroma\": $INCLUDE_CHROMA}" \
    "$ATOCORE_URL/admin/backup" 2>&1) || {
    log "ERROR: backup creation failed: $BACKUP_RESULT"
    exit 0
}
log "Backup created: $BACKUP_RESULT"

# Step 2: Retention cleanup (confirm=true to actually delete)
log "Step 2: running retention cleanup"
CLEANUP_RESULT=$(curl -sf -X POST \
    -H "Content-Type: application/json" \
    -d '{"confirm": true}' \
    "$ATOCORE_URL/admin/backup/cleanup" 2>&1) || {
    log "ERROR: cleanup failed: $CLEANUP_RESULT"
    exit 0
}
log "Cleanup result: $CLEANUP_RESULT"

# Step 3: Off-host rsync (optional). Fail-open: log but don't abort
# the cron so a laptop being offline at 03:00 UTC never turns the
# local backup path red.
if [[ -n "$RSYNC_TARGET" ]]; then
    log "Step 3: rsyncing snapshots to $RSYNC_TARGET"
    if [[ ! -d "$BACKUP_DIR/snapshots" ]]; then
        log "WARN: $BACKUP_DIR/snapshots does not exist, skipping rsync"
    else
        RSYNC_OUTPUT=$(rsync -a --delete \
            -e "ssh -o ConnectTimeout=10 -o BatchMode=yes -o StrictHostKeyChecking=accept-new" \
            "$BACKUP_DIR/snapshots/" "$RSYNC_TARGET" 2>&1) && {
            log "Rsync complete"
        } || {
            log "WARN: rsync to $RSYNC_TARGET failed (offline or auth?): $RSYNC_OUTPUT"
        }
    fi
else
    log "Step 3: ATOCORE_BACKUP_RSYNC not set, skipping off-host copy"
fi

# Step 3a: Pull OpenClaw state from clawdbot (one-way import of
# SOUL.md, USER.md, MODEL-ROUTING.md, MEMORY.md, recent memory/*.md).
# Loose coupling: OpenClaw's internals don't need to change.
# Fail-open: importer failure never blocks the pipeline.
log "Step 3a: pull OpenClaw state"
OPENCLAW_IMPORT="${ATOCORE_OPENCLAW_IMPORT:-true}"
if [[ "$OPENCLAW_IMPORT" == "true" ]]; then
    python3 "$SCRIPT_DIR/../../scripts/import_openclaw_state.py" \
        --base-url "$ATOCORE_URL" \
        2>&1 | while IFS= read -r line; do log "  $line"; done || {
        log "  WARN: OpenClaw import failed (non-blocking)"
    }
else
    log "  skipped (ATOCORE_OPENCLAW_IMPORT != true)"
fi

# Step 3b: Auto-refresh vault sources so new PKM files flow in
# automatically. Fail-open: never blocks the rest of the pipeline.
log "Step 3b: auto-refresh vault sources"
REFRESH_RESULT=$(curl -sf -X POST --max-time 600 \
    "$ATOCORE_URL/ingest/sources" 2>&1) && {
    log "Sources refresh complete"
} || {
    log "WARN: sources refresh failed (non-blocking): $REFRESH_RESULT"
}

# Step 4: Batch LLM extraction on recent interactions (optional).
# Runs HOST-SIDE because claude CLI is on the host, not inside the
# Docker container. The script fetches interactions from the API,
# runs claude -p locally, and POSTs candidates back.
# Fail-open: extraction failure never blocks backup.
EXTRACT="${ATOCORE_EXTRACT_BATCH:-true}"
if [[ "$EXTRACT" == "true" ]]; then
    SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
    log "Step 4: running host-side batch LLM extraction"
    bash "$SCRIPT_DIR/batch-extract.sh" 2>&1 && {
        log "Extraction complete"
    } || {
        log "WARN: batch extraction failed (this is non-blocking)"
    }
else
    log "Step 4: ATOCORE_EXTRACT_BATCH not set to true, skipping extraction"
fi

log "=== AtoCore daily backup complete ==="