#!/usr/bin/env bash # # deploy/dalidou/batch-extract.sh # -------------------------------- # Host-side LLM batch extraction for Dalidou. # # The claude CLI is available on the Dalidou HOST but NOT inside the # Docker container. This script runs on the host, fetches recent # interactions from the AtoCore API, runs the LLM extractor locally # (claude -p sonnet), and posts candidates back to the API. # # Intended to be called from cron-backup.sh after backup/cleanup/rsync, # or manually via: # # bash /srv/storage/atocore/app/deploy/dalidou/batch-extract.sh # # Environment variables: # ATOCORE_URL default http://127.0.0.1:8100 # ATOCORE_EXTRACT_LIMIT default 50 set -euo pipefail ATOCORE_URL="${ATOCORE_URL:-http://127.0.0.1:8100}" LIMIT="${ATOCORE_EXTRACT_LIMIT:-50}" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" APP_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" TIMESTAMP="$(date -u +%Y-%m-%dT%H:%M:%SZ)" log() { printf '[%s] %s\n' "$TIMESTAMP" "$*"; } # The Python script needs the atocore source on PYTHONPATH export PYTHONPATH="$APP_DIR/src:${PYTHONPATH:-}" log "=== AtoCore batch LLM extraction starting ===" log "URL=$ATOCORE_URL LIMIT=$LIMIT" # Run the host-side extraction script python3 "$APP_DIR/scripts/batch_llm_extract_live.py" \ --base-url "$ATOCORE_URL" \ --limit "$LIMIT" \ 2>&1 || { log "WARN: batch extraction failed (non-blocking)" } log "=== AtoCore batch LLM extraction complete ==="