scripts/import_openclaw_state.py reads the OpenClaw file continuity layer from clawdbot (T420) via SSH and imports candidate memories into AtoCore. Loose coupling: OpenClaw's internals don't need to change, AtoCore pulls from stable markdown files. Per codex's integration proposal (docs/openclaw-atocore-integration-proposal.md): Classification: - SOUL.md -> identity candidate - USER.md -> identity candidate - MODEL-ROUTING.md -> adaptation candidate (routing rules) - MEMORY.md -> memory candidate (long-term curated) - memory/YYYY-MM-DD.md -> episodic candidate (daily logs, last 7 days) - heartbeat-state.json -> skipped (ops metadata only, not canonical) Delta detection: SHA-256 hash per file stored in project_state under atocore/status/openclaw_import_hashes. Only changed files re-import. Hashes persist across runs so no wasted work. All imports land as status=candidate. Auto-triage filters. Nothing auto-promotes — the importer is a signal producer, the pipeline decides what graduates. Discord: deferred per codex's proposal — no durable local store in current OpenClaw snapshot. Revisit if OpenClaw exposes an export. Wired into cron-backup.sh as Step 3a (before vault refresh + extraction) so OpenClaw signals flow through the same pipeline. Gated on ATOCORE_OPENCLAW_IMPORT=true (default true). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
130 lines
5.0 KiB
Bash
Executable File
130 lines
5.0 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# deploy/dalidou/cron-backup.sh
|
|
# ------------------------------
|
|
# Daily backup + retention cleanup via the AtoCore API.
|
|
#
|
|
# Intended to run from cron on Dalidou:
|
|
#
|
|
# # Daily at 03:00 UTC
|
|
# 0 3 * * * /srv/storage/atocore/app/deploy/dalidou/cron-backup.sh >> /var/log/atocore-backup.log 2>&1
|
|
#
|
|
# What it does:
|
|
# 1. Creates a runtime backup (db + registry, no chroma by default)
|
|
# 2. Runs retention cleanup with --confirm to delete old snapshots
|
|
# 3. Logs results to stdout (captured by cron into the log file)
|
|
#
|
|
# Fail-open: exits 0 even on API errors so cron doesn't send noise
|
|
# emails. Check /var/log/atocore-backup.log for diagnostics.
|
|
#
|
|
# Environment variables:
|
|
# ATOCORE_URL default http://127.0.0.1:8100
|
|
# ATOCORE_BACKUP_CHROMA default false (set to "true" for cold chroma copy)
|
|
# ATOCORE_BACKUP_DIR default /srv/storage/atocore/backups
|
|
# ATOCORE_BACKUP_RSYNC optional rsync destination for off-host copies
|
|
# (e.g. papa@laptop:/home/papa/atocore-backups/)
|
|
# When set, the local snapshots tree is rsynced to
|
|
# the destination after cleanup. Unset = skip.
|
|
# SSH key auth must already be configured from this
|
|
# host to the destination.
|
|
|
|
set -euo pipefail
|
|
|
|
ATOCORE_URL="${ATOCORE_URL:-http://127.0.0.1:8100}"
|
|
INCLUDE_CHROMA="${ATOCORE_BACKUP_CHROMA:-false}"
|
|
BACKUP_DIR="${ATOCORE_BACKUP_DIR:-/srv/storage/atocore/backups}"
|
|
RSYNC_TARGET="${ATOCORE_BACKUP_RSYNC:-}"
|
|
TIMESTAMP="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
|
|
log() { printf '[%s] %s\n' "$TIMESTAMP" "$*"; }
|
|
|
|
log "=== AtoCore daily backup starting ==="
|
|
|
|
# Step 1: Create backup
|
|
log "Step 1: creating backup (chroma=$INCLUDE_CHROMA)"
|
|
BACKUP_RESULT=$(curl -sf -X POST \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"include_chroma\": $INCLUDE_CHROMA}" \
|
|
"$ATOCORE_URL/admin/backup" 2>&1) || {
|
|
log "ERROR: backup creation failed: $BACKUP_RESULT"
|
|
exit 0
|
|
}
|
|
log "Backup created: $BACKUP_RESULT"
|
|
|
|
# Step 2: Retention cleanup (confirm=true to actually delete)
|
|
log "Step 2: running retention cleanup"
|
|
CLEANUP_RESULT=$(curl -sf -X POST \
|
|
-H "Content-Type: application/json" \
|
|
-d '{"confirm": true}' \
|
|
"$ATOCORE_URL/admin/backup/cleanup" 2>&1) || {
|
|
log "ERROR: cleanup failed: $CLEANUP_RESULT"
|
|
exit 0
|
|
}
|
|
log "Cleanup result: $CLEANUP_RESULT"
|
|
|
|
# Step 3: Off-host rsync (optional). Fail-open: log but don't abort
|
|
# the cron so a laptop being offline at 03:00 UTC never turns the
|
|
# local backup path red.
|
|
if [[ -n "$RSYNC_TARGET" ]]; then
|
|
log "Step 3: rsyncing snapshots to $RSYNC_TARGET"
|
|
if [[ ! -d "$BACKUP_DIR/snapshots" ]]; then
|
|
log "WARN: $BACKUP_DIR/snapshots does not exist, skipping rsync"
|
|
else
|
|
RSYNC_OUTPUT=$(rsync -a --delete \
|
|
-e "ssh -o ConnectTimeout=10 -o BatchMode=yes -o StrictHostKeyChecking=accept-new" \
|
|
"$BACKUP_DIR/snapshots/" "$RSYNC_TARGET" 2>&1) && {
|
|
log "Rsync complete"
|
|
} || {
|
|
log "WARN: rsync to $RSYNC_TARGET failed (offline or auth?): $RSYNC_OUTPUT"
|
|
}
|
|
fi
|
|
else
|
|
log "Step 3: ATOCORE_BACKUP_RSYNC not set, skipping off-host copy"
|
|
fi
|
|
|
|
# Step 3a: Pull OpenClaw state from clawdbot (one-way import of
|
|
# SOUL.md, USER.md, MODEL-ROUTING.md, MEMORY.md, recent memory/*.md).
|
|
# Loose coupling: OpenClaw's internals don't need to change.
|
|
# Fail-open: importer failure never blocks the pipeline.
|
|
log "Step 3a: pull OpenClaw state"
|
|
OPENCLAW_IMPORT="${ATOCORE_OPENCLAW_IMPORT:-true}"
|
|
if [[ "$OPENCLAW_IMPORT" == "true" ]]; then
|
|
python3 "$SCRIPT_DIR/../../scripts/import_openclaw_state.py" \
|
|
--base-url "$ATOCORE_URL" \
|
|
2>&1 | while IFS= read -r line; do log " $line"; done || {
|
|
log " WARN: OpenClaw import failed (non-blocking)"
|
|
}
|
|
else
|
|
log " skipped (ATOCORE_OPENCLAW_IMPORT != true)"
|
|
fi
|
|
|
|
# Step 3b: Auto-refresh vault sources so new PKM files flow in
|
|
# automatically. Fail-open: never blocks the rest of the pipeline.
|
|
log "Step 3b: auto-refresh vault sources"
|
|
REFRESH_RESULT=$(curl -sf -X POST --max-time 600 \
|
|
"$ATOCORE_URL/ingest/sources" 2>&1) && {
|
|
log "Sources refresh complete"
|
|
} || {
|
|
log "WARN: sources refresh failed (non-blocking): $REFRESH_RESULT"
|
|
}
|
|
|
|
# Step 4: Batch LLM extraction on recent interactions (optional).
|
|
# Runs HOST-SIDE because claude CLI is on the host, not inside the
|
|
# Docker container. The script fetches interactions from the API,
|
|
# runs claude -p locally, and POSTs candidates back.
|
|
# Fail-open: extraction failure never blocks backup.
|
|
EXTRACT="${ATOCORE_EXTRACT_BATCH:-true}"
|
|
if [[ "$EXTRACT" == "true" ]]; then
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
log "Step 4: running host-side batch LLM extraction"
|
|
bash "$SCRIPT_DIR/batch-extract.sh" 2>&1 && {
|
|
log "Extraction complete"
|
|
} || {
|
|
log "WARN: batch extraction failed (this is non-blocking)"
|
|
}
|
|
else
|
|
log "Step 4: ATOCORE_EXTRACT_BATCH not set to true, skipping extraction"
|
|
fi
|
|
|
|
log "=== AtoCore daily backup complete ==="
|