fix: host-side LLM extraction (claude CLI not in container)

The claude CLI is installed on the Dalidou HOST but not inside the Docker container. The /admin/extract-batch API endpoint with mode=llm silently returned 0 candidates because shutil.which('claude') was None inside the container. Fix: extraction runs host-side via deploy/dalidou/batch-extract.sh which calls scripts/batch_llm_extract_live.py with the host's PYTHONPATH pointing at the repo's src/. The script: - Fetches interactions from the API (GET /interactions?since=...) - Runs extract_candidates_llm() locally (host has claude CLI) - POSTs candidates back to the API (POST /memory, status=candidate) - Tracks last-run timestamp via project state The cron now calls the host-side script instead of the container API endpoint for LLM mode. Rule-mode extraction in the container still works via /admin/extract-batch. The API endpoint retains the mode=llm option for environments where claude IS inside the container (future Docker image with claude CLI, or a different deployment model). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-12 10:55:22 -04:00
parent c67bec095c
commit cd0fd390a8
3 changed files with 219 additions and 12 deletions
--- a/scripts/batch_llm_extract_live.py
+++ b/scripts/batch_llm_extract_live.py
@@ -0,0 +1,166 @@
+"""Host-side LLM batch extraction against a live AtoCore instance.
+
+Fetches recent interactions from the AtoCore API, runs the LLM
+extractor locally (requires ``claude`` CLI on PATH), and POSTs
+candidates back to the API as ``status=candidate``.
+
+This script runs on the HOST (not inside the Docker container)
+because the ``claude`` CLI is installed host-side. The container's
+``/admin/extract-batch`` endpoint can't use LLM mode because
+``shutil.which("claude")`` returns None inside the container.
+
+Tracks last-run timestamp via project state so re-runs auto-resume.
+
+Usage (manual):
+
+    python3 scripts/batch_llm_extract_live.py --base-url http://localhost:8100
+
+Usage (cron, via wrapper):
+
+    bash deploy/dalidou/batch-extract.sh
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+import urllib.error
+import urllib.parse
+import urllib.request
+from pathlib import Path
+
+# Make src/ importable
+_REPO_ROOT = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(_REPO_ROOT / "src"))
+
+from atocore.interactions.service import Interaction  # noqa: E402
+from atocore.memory.extractor_llm import extract_candidates_llm  # noqa: E402
+
+DEFAULT_BASE_URL = os.environ.get("ATOCORE_BASE_URL", "http://localhost:8100")
+DEFAULT_TIMEOUT = int(os.environ.get("ATOCORE_TIMEOUT_SECONDS", "10"))
+
+
+def api_get(base_url: str, path: str, timeout: int = DEFAULT_TIMEOUT) -> dict:
+    req = urllib.request.Request(f"{base_url}{path}")
+    with urllib.request.urlopen(req, timeout=timeout) as resp:
+        return json.loads(resp.read().decode("utf-8"))
+
+
+def api_post(base_url: str, path: str, body: dict, timeout: int = DEFAULT_TIMEOUT) -> dict:
+    data = json.dumps(body).encode("utf-8")
+    req = urllib.request.Request(
+        f"{base_url}{path}",
+        method="POST",
+        headers={"Content-Type": "application/json"},
+        data=data,
+    )
+    with urllib.request.urlopen(req, timeout=timeout) as resp:
+        return json.loads(resp.read().decode("utf-8"))
+
+
+def get_last_run(base_url: str) -> str | None:
+    try:
+        state = api_get(base_url, "/project/state/atocore?category=status")
+        for entry in state.get("entries", []):
+            if entry.get("key") == "last_extract_batch_run":
+                return entry["value"]
+    except Exception:
+        pass
+    return None
+
+
+def set_last_run(base_url: str, timestamp: str) -> None:
+    try:
+        api_post(base_url, "/project/state", {
+            "project": "atocore",
+            "category": "status",
+            "key": "last_extract_batch_run",
+            "value": timestamp,
+            "source": "batch_llm_extract_live.py",
+        })
+    except Exception:
+        pass
+
+
+def fetch_interactions(base_url: str, since: str | None, limit: int) -> list[dict]:
+    params = [f"limit={limit}"]
+    if since:
+        params.append(f"since={urllib.parse.quote(since)}")
+    query = "?" + "&".join(params)
+    result = api_get(base_url, f"/interactions{query}")
+    return result.get("interactions", [])
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Host-side LLM batch extraction")
+    parser.add_argument("--base-url", default=DEFAULT_BASE_URL)
+    parser.add_argument("--limit", type=int, default=50)
+    parser.add_argument("--since", default=None, help="override last-run timestamp")
+    args = parser.parse_args()
+
+    since = args.since or get_last_run(args.base_url)
+    print(f"fetching interactions since={since or '(first run)'} limit={args.limit}")
+
+    raw_interactions = fetch_interactions(args.base_url, since, args.limit)
+    print(f"fetched {len(raw_interactions)} interactions")
+
+    processed = 0
+    total_candidates = 0
+    total_persisted = 0
+    errors = 0
+
+    for raw in raw_interactions:
+        response_text = raw.get("response", "") or ""
+        if not response_text.strip():
+            continue
+
+        interaction = Interaction(
+            id=raw["id"],
+            prompt=raw.get("prompt", "") or "",
+            response=response_text,
+            response_summary=raw.get("response_summary", "") or "",
+            project=raw.get("project", "") or "",
+            client=raw.get("client", "") or "",
+            session_id=raw.get("session_id", "") or "",
+            created_at=raw.get("created_at", "") or "",
+        )
+
+        try:
+            candidates = extract_candidates_llm(interaction)
+        except Exception as exc:
+            print(f"  ! extraction error on {interaction.id[:8]}: {exc}")
+            errors += 1
+            continue
+
+        processed += 1
+        total_candidates += len(candidates)
+
+        for c in candidates:
+            try:
+                api_post(args.base_url, "/memory", {
+                    "memory_type": c.memory_type,
+                    "content": c.content,
+                    "project": c.project,
+                    "confidence": c.confidence,
+                    "status": "candidate",
+                })
+                total_persisted += 1
+            except urllib.error.HTTPError as exc:
+                if exc.code != 400:  # 400 = duplicate, skip silently
+                    errors += 1
+            except Exception:
+                errors += 1
+
+    from datetime import datetime, timezone
+
+    now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
+    set_last_run(args.base_url, now)
+
+    print(f"processed={processed} candidates={total_candidates} persisted={total_persisted} errors={errors}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())