From 7863ab38258f76d581dc7a49d3c9a3e1773dabda Mon Sep 17 00:00:00 2001 From: Anto01 Date: Sat, 18 Apr 2026 08:24:49 -0400 Subject: [PATCH] =?UTF-8?q?feat:=20hourly=20extract+triage=20cron=20?= =?UTF-8?q?=E2=80=94=20close=20the=2024h=20latency=20gap?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User observation that triggered this: 'AtoCore was meant to remember and triage by its own, not me specifically asking to remember things'. Correct — the system IS capturing autonomously (Stop hook + OpenClaw plugin), but extraction was nightly-only. So 'I talked about APM today' didn't show up in memories until the next 03:00 UTC cron run. Fix: split the lightweight extraction + triage into a new hourly cron. The heavy nightly (backup, rsync, OpenClaw import, synthesis, harness, integrity, emerging detector) stays at 03:00 UTC — no reason to run those hourly. hourly-extract.sh does ONLY: - Step A: batch_llm_extract_live.py (limit 50, ~1h window) - Step B: auto_triage.py (3-tier, max_batches=3) Lock file prevents overlap on rate-limit retries. After this lands: latency from 'you told me X' to 'X is an active memory' drops from ~24h to ~1h (plus the ~5min it takes for extraction + triage to complete on a typical <20 interactions/hour). The 'atocore_remember' MCP tool stays as an escape hatch for conversations that happen outside captured channels (Claude Desktop web, phone), NOT as the primary capture path. The primary path is automatic: Claude Code / OpenClaw captures → hourly extract → 3-tier triage → active memory. Install cron entry manually: 0 * * * * /srv/storage/atocore/app/deploy/dalidou/hourly-extract.sh \ >> /home/papa/atocore-logs/hourly-extract.log 2>&1 Co-Authored-By: Claude Opus 4.6 (1M context) --- deploy/dalidou/hourly-extract.sh | 64 ++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 deploy/dalidou/hourly-extract.sh diff --git a/deploy/dalidou/hourly-extract.sh b/deploy/dalidou/hourly-extract.sh new file mode 100644 index 0000000..dc29180 --- /dev/null +++ b/deploy/dalidou/hourly-extract.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# +# deploy/dalidou/hourly-extract.sh +# --------------------------------- +# Lightweight hourly extraction + triage so autonomous capture stays +# current (not a 24h-latency nightly-only affair). +# +# Does ONLY: +# Step A: LLM extraction over recent interactions (last 2h window) +# Step B: 3-tier auto-triage on the resulting candidates +# +# Skips the heavy nightly stuff (backup, rsync, OpenClaw import, +# synthesis, harness, integrity check, emerging detector). Those stay +# in cron-backup.sh at 03:00 UTC. +# +# Runs every hour via cron: +# 0 * * * * /srv/storage/atocore/app/deploy/dalidou/hourly-extract.sh \ +# >> /home/papa/atocore-logs/hourly-extract.log 2>&1 +# +# Lock file prevents overlap if a previous run is still going (which +# can happen if claude CLI rate-limits and retries). + +set -euo pipefail + +ATOCORE_URL="${ATOCORE_URL:-http://127.0.0.1:8100}" +# 50 recent interactions is enough for an hour — typical usage is under 20/h. +LIMIT="${ATOCORE_HOURLY_EXTRACT_LIMIT:-50}" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +APP_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" +TIMESTAMP="$(date -u +%Y-%m-%dT%H:%M:%SZ)" +LOCK_FILE="/tmp/atocore-hourly-extract.lock" + +log() { printf '[%s] %s\n' "$TIMESTAMP" "$*"; } + +# Acquire lock (non-blocking) +exec 9>"$LOCK_FILE" || exit 0 +if ! flock -n 9; then + log "hourly extract already running, skipping" + exit 0 +fi + +export PYTHONPATH="$APP_DIR/src:${PYTHONPATH:-}" + +log "=== hourly extract+triage starting ===" + +# Step A — Extract candidates from recent interactions +log "Step A: LLM extraction (since last run)" +python3 "$APP_DIR/scripts/batch_llm_extract_live.py" \ + --base-url "$ATOCORE_URL" \ + --limit "$LIMIT" \ + 2>&1 || { + log "WARN: batch extraction failed (non-blocking)" +} + +# Step B — 3-tier auto-triage (sonnet → opus → discard) +log "Step B: auto-triage (3-tier)" +python3 "$APP_DIR/scripts/auto_triage.py" \ + --base-url "$ATOCORE_URL" \ + --max-batches 3 \ + 2>&1 || { + log "WARN: auto-triage failed (non-blocking)" +} + +log "=== hourly extract+triage complete ==="