ATOCore/deploy/dalidou/batch-extract.sh

#!/usr/bin/env bash
#
# deploy/dalidou/batch-extract.sh
# --------------------------------
# Host-side LLM batch extraction for Dalidou.
#
# The claude CLI is available on the Dalidou HOST but NOT inside the
# Docker container. This script runs on the host, fetches recent
# interactions from the AtoCore API, runs the LLM extractor locally
# (claude -p sonnet), and posts candidates back to the API.
#
# Intended to be called from cron-backup.sh after backup/cleanup/rsync,
# or manually via:
#
#   bash /srv/storage/atocore/app/deploy/dalidou/batch-extract.sh
#
# Environment variables:
#   ATOCORE_URL      default http://127.0.0.1:8100
#   ATOCORE_EXTRACT_LIMIT  default 50

set -euo pipefail

ATOCORE_URL="${ATOCORE_URL:-http://127.0.0.1:8100}"
LIMIT="${ATOCORE_EXTRACT_LIMIT:-50}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
APP_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
TIMESTAMP="$(date -u +%Y-%m-%dT%H:%M:%SZ)"

log() { printf '[%s] %s\n' "$TIMESTAMP" "$*"; }

# The Python script needs the atocore source on PYTHONPATH
export PYTHONPATH="$APP_DIR/src:${PYTHONPATH:-}"

log "=== AtoCore batch extraction + triage starting ==="
log "URL=$ATOCORE_URL  LIMIT=$LIMIT"

# Step A: Extract candidates from recent interactions
log "Step A: LLM extraction"
python3 "$APP_DIR/scripts/batch_llm_extract_live.py" \
    --base-url "$ATOCORE_URL" \
    --limit "$LIMIT" \
    2>&1 || {
    log "WARN: batch extraction failed (non-blocking)"
}

# Step B: Auto-triage candidates in the queue
log "Step B: auto-triage"
python3 "$APP_DIR/scripts/auto_triage.py" \
    --base-url "$ATOCORE_URL" \
    2>&1 || {
    log "WARN: auto-triage failed (non-blocking)"
}

log "=== AtoCore batch extraction + triage complete ==="