#!/usr/bin/env bash # # deploy/dalidou/cron-backup.sh # ------------------------------ # Daily backup + retention cleanup via the AtoCore API. # # Intended to run from cron on Dalidou: # # # Daily at 03:00 UTC # 0 3 * * * /srv/storage/atocore/app/deploy/dalidou/cron-backup.sh >> /var/log/atocore-backup.log 2>&1 # # What it does: # 1. Creates a runtime backup (db + registry, no chroma by default) # 2. Runs retention cleanup with --confirm to delete old snapshots # 3. Logs results to stdout (captured by cron into the log file) # # Fail-open: exits 0 even on API errors so cron doesn't send noise # emails. Check /var/log/atocore-backup.log for diagnostics. # # Environment variables: # ATOCORE_URL default http://127.0.0.1:8100 # ATOCORE_BACKUP_CHROMA default false (set to "true" for cold chroma copy) # ATOCORE_BACKUP_DIR default /srv/storage/atocore/backups # ATOCORE_BACKUP_RSYNC optional rsync destination for off-host copies # (e.g. papa@laptop:/home/papa/atocore-backups/) # When set, the local snapshots tree is rsynced to # the destination after cleanup. Unset = skip. # SSH key auth must already be configured from this # host to the destination. set -euo pipefail ATOCORE_URL="${ATOCORE_URL:-http://127.0.0.1:8100}" INCLUDE_CHROMA="${ATOCORE_BACKUP_CHROMA:-false}" BACKUP_DIR="${ATOCORE_BACKUP_DIR:-/srv/storage/atocore/backups}" RSYNC_TARGET="${ATOCORE_BACKUP_RSYNC:-}" TIMESTAMP="$(date -u +%Y-%m-%dT%H:%M:%SZ)" log() { printf '[%s] %s\n' "$TIMESTAMP" "$*"; } log "=== AtoCore daily backup starting ===" # Step 1: Create backup log "Step 1: creating backup (chroma=$INCLUDE_CHROMA)" BACKUP_RESULT=$(curl -sf -X POST \ -H "Content-Type: application/json" \ -d "{\"include_chroma\": $INCLUDE_CHROMA}" \ "$ATOCORE_URL/admin/backup" 2>&1) || { log "ERROR: backup creation failed: $BACKUP_RESULT" exit 0 } log "Backup created: $BACKUP_RESULT" # Step 2: Retention cleanup (confirm=true to actually delete) log "Step 2: running retention cleanup" CLEANUP_RESULT=$(curl -sf -X POST \ -H "Content-Type: application/json" \ -d '{"confirm": true}' \ "$ATOCORE_URL/admin/backup/cleanup" 2>&1) || { log "ERROR: cleanup failed: $CLEANUP_RESULT" exit 0 } log "Cleanup result: $CLEANUP_RESULT" # Step 3: Off-host rsync (optional). Fail-open: log but don't abort # the cron so a laptop being offline at 03:00 UTC never turns the # local backup path red. if [[ -n "$RSYNC_TARGET" ]]; then log "Step 3: rsyncing snapshots to $RSYNC_TARGET" if [[ ! -d "$BACKUP_DIR/snapshots" ]]; then log "WARN: $BACKUP_DIR/snapshots does not exist, skipping rsync" else RSYNC_OUTPUT=$(rsync -a --delete \ -e "ssh -o ConnectTimeout=10 -o BatchMode=yes -o StrictHostKeyChecking=accept-new" \ "$BACKUP_DIR/snapshots/" "$RSYNC_TARGET" 2>&1) && { log "Rsync complete" } || { log "WARN: rsync to $RSYNC_TARGET failed (offline or auth?): $RSYNC_OUTPUT" } fi else log "Step 3: ATOCORE_BACKUP_RSYNC not set, skipping off-host copy" fi # Step 3b: Auto-refresh vault sources so new PKM files flow in # automatically. Fail-open: never blocks the rest of the pipeline. log "Step 3b: auto-refresh vault sources" REFRESH_RESULT=$(curl -sf -X POST --max-time 600 \ "$ATOCORE_URL/ingest/sources" 2>&1) && { log "Sources refresh complete" } || { log "WARN: sources refresh failed (non-blocking): $REFRESH_RESULT" } # Step 4: Batch LLM extraction on recent interactions (optional). # Runs HOST-SIDE because claude CLI is on the host, not inside the # Docker container. The script fetches interactions from the API, # runs claude -p locally, and POSTs candidates back. # Fail-open: extraction failure never blocks backup. EXTRACT="${ATOCORE_EXTRACT_BATCH:-true}" if [[ "$EXTRACT" == "true" ]]; then SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" log "Step 4: running host-side batch LLM extraction" bash "$SCRIPT_DIR/batch-extract.sh" 2>&1 && { log "Extraction complete" } || { log "WARN: batch extraction failed (this is non-blocking)" } else log "Step 4: ATOCORE_EXTRACT_BATCH not set to true, skipping extraction" fi log "=== AtoCore daily backup complete ==="