"""Persist LLM-extracted candidates from a baseline JSON to Dalidou. One-shot script: reads a saved extractor eval output file, filters to candidates the LLM actually produced, and POSTs each to the Dalidou memory API with ``status=candidate``. Deduplicates against already- existing candidate content so the script is safe to re-run. Usage: python scripts/persist_llm_candidates.py \\ scripts/eval_data/extractor_llm_baseline_2026-04-11.json Then triage via: python scripts/atocore_client.py triage """ from __future__ import annotations import json import os import sys import urllib.error import urllib.parse import urllib.request BASE_URL = os.environ.get("ATOCORE_BASE_URL", "http://dalidou:8100") TIMEOUT = int(os.environ.get("ATOCORE_TIMEOUT_SECONDS", "10")) def post_json(path: str, body: dict) -> dict: data = json.dumps(body).encode("utf-8") req = urllib.request.Request( url=f"{BASE_URL}{path}", method="POST", headers={"Content-Type": "application/json"}, data=data, ) with urllib.request.urlopen(req, timeout=TIMEOUT) as resp: return json.loads(resp.read().decode("utf-8")) def main() -> int: if len(sys.argv) < 2: print(f"usage: {sys.argv[0]} ", file=sys.stderr) return 1 data = json.loads(open(sys.argv[1], encoding="utf-8").read()) results = data.get("results", []) persisted = 0 skipped = 0 errors = 0 for r in results: for c in r.get("actual_candidates", []): content = (c.get("content") or "").strip() if not content: continue mem_type = c.get("memory_type", "knowledge") project = c.get("project", "") confidence = c.get("confidence", 0.5) try: resp = post_json("/memory", { "memory_type": mem_type, "content": content, "project": project, "confidence": float(confidence), "status": "candidate", }) persisted += 1 print(f" + {resp.get('id','?')[:8]} [{mem_type}] {content[:80]}") except urllib.error.HTTPError as exc: if exc.code == 400: skipped += 1 else: errors += 1 print(f" ! error {exc.code}: {content[:60]}", file=sys.stderr) except Exception as exc: errors += 1 print(f" ! {exc}: {content[:60]}", file=sys.stderr) print(f"\npersisted={persisted} skipped={skipped} errors={errors}") return 0 if __name__ == "__main__": raise SystemExit(main())