#!/usr/bin/env bash # # deploy/dalidou/hourly-extract.sh # --------------------------------- # Lightweight hourly extraction + triage so autonomous capture stays # current (not a 24h-latency nightly-only affair). # # Does ONLY: # Step A: LLM extraction over recent interactions (last 2h window) # Step B: 3-tier auto-triage on the resulting candidates # # Skips the heavy nightly stuff (backup, rsync, OpenClaw import, # synthesis, harness, integrity check, emerging detector). Those stay # in cron-backup.sh at 03:00 UTC. # # Runs every hour via cron: # 0 * * * * /srv/storage/atocore/app/deploy/dalidou/hourly-extract.sh \ # >> /home/papa/atocore-logs/hourly-extract.log 2>&1 # # Lock file prevents overlap if a previous run is still going (which # can happen if claude CLI rate-limits and retries). set -euo pipefail ATOCORE_URL="${ATOCORE_URL:-http://127.0.0.1:8100}" # 50 recent interactions is enough for an hour — typical usage is under 20/h. LIMIT="${ATOCORE_HOURLY_EXTRACT_LIMIT:-50}" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" APP_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" TIMESTAMP="$(date -u +%Y-%m-%dT%H:%M:%SZ)" LOCK_FILE="/tmp/atocore-hourly-extract.lock" log() { printf '[%s] %s\n' "$TIMESTAMP" "$*"; } # Acquire lock (non-blocking) exec 9>"$LOCK_FILE" || exit 0 if ! flock -n 9; then log "hourly extract already running, skipping" exit 0 fi export PYTHONPATH="$APP_DIR/src:${PYTHONPATH:-}" log "=== hourly extract+triage starting ===" # Step A — Extract candidates from recent interactions log "Step A: LLM extraction (since last run)" python3 "$APP_DIR/scripts/batch_llm_extract_live.py" \ --base-url "$ATOCORE_URL" \ --limit "$LIMIT" \ 2>&1 || { log "WARN: batch extraction failed (non-blocking)" } # Step B — 3-tier auto-triage (sonnet → opus → discard) log "Step B: auto-triage (3-tier)" python3 "$APP_DIR/scripts/auto_triage.py" \ --base-url "$ATOCORE_URL" \ --max-batches 3 \ 2>&1 || { log "WARN: auto-triage failed (non-blocking)" } log "=== hourly extract+triage complete ==="