ATOCore/deploy/dalidou/hourly-extract.sh

#!/usr/bin/env bash
#
# deploy/dalidou/hourly-extract.sh
# ---------------------------------
# Lightweight hourly extraction + triage so autonomous capture stays
# current (not a 24h-latency nightly-only affair).
#
# Does ONLY:
#   Step A: LLM extraction over recent interactions (last 2h window)
#   Step B: 3-tier auto-triage on the resulting candidates
#
# Skips the heavy nightly stuff (backup, rsync, OpenClaw import,
# synthesis, harness, integrity check, emerging detector). Those stay
# in cron-backup.sh at 03:00 UTC.
#
# Runs every hour via cron:
#   0 * * * * /srv/storage/atocore/app/deploy/dalidou/hourly-extract.sh \
#       >> /home/papa/atocore-logs/hourly-extract.log 2>&1
#
# Lock file prevents overlap if a previous run is still going (which
# can happen if claude CLI rate-limits and retries).

set -euo pipefail

ATOCORE_URL="${ATOCORE_URL:-http://127.0.0.1:8100}"
# 50 recent interactions is enough for an hour — typical usage is under 20/h.
LIMIT="${ATOCORE_HOURLY_EXTRACT_LIMIT:-50}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
APP_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
TIMESTAMP="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
LOCK_FILE="/tmp/atocore-hourly-extract.lock"

log() { printf '[%s] %s\n' "$TIMESTAMP" "$*"; }

# Acquire lock (non-blocking)
exec 9>"$LOCK_FILE" || exit 0
if ! flock -n 9; then
    log "hourly extract already running, skipping"
    exit 0
fi

export PYTHONPATH="$APP_DIR/src:${PYTHONPATH:-}"

log "=== hourly extract+triage starting ==="

# Step A — Extract candidates from recent interactions
log "Step A: LLM extraction (since last run)"
python3 "$APP_DIR/scripts/batch_llm_extract_live.py" \
    --base-url "$ATOCORE_URL" \
    --limit "$LIMIT" \
    2>&1 || {
    log "WARN: batch extraction failed (non-blocking)"
}

# Step B — 3-tier auto-triage (sonnet → opus → discard)
log "Step B: auto-triage (3-tier)"
python3 "$APP_DIR/scripts/auto_triage.py" \
    --base-url "$ATOCORE_URL" \
    --max-batches 3 \
    2>&1 || {
    log "WARN: auto-triage failed (non-blocking)"
}

log "=== hourly extract+triage complete ==="