Files
ATOCore/scripts/persist_llm_candidates.py

90 lines
2.7 KiB
Python
Raw Permalink Normal View History

"""Persist LLM-extracted candidates from a baseline JSON to Dalidou.
One-shot script: reads a saved extractor eval output file, filters to
candidates the LLM actually produced, and POSTs each to the Dalidou
memory API with ``status=candidate``. Deduplicates against already-
existing candidate content so the script is safe to re-run.
Usage:
python scripts/persist_llm_candidates.py \\
scripts/eval_data/extractor_llm_baseline_2026-04-11.json
Then triage via:
python scripts/atocore_client.py triage
"""
from __future__ import annotations
import json
import os
import sys
import urllib.error
import urllib.parse
import urllib.request
BASE_URL = os.environ.get("ATOCORE_BASE_URL", "http://dalidou:8100")
TIMEOUT = int(os.environ.get("ATOCORE_TIMEOUT_SECONDS", "10"))
def post_json(path: str, body: dict) -> dict:
data = json.dumps(body).encode("utf-8")
req = urllib.request.Request(
url=f"{BASE_URL}{path}",
method="POST",
headers={"Content-Type": "application/json"},
data=data,
)
with urllib.request.urlopen(req, timeout=TIMEOUT) as resp:
return json.loads(resp.read().decode("utf-8"))
def main() -> int:
if len(sys.argv) < 2:
print(f"usage: {sys.argv[0]} <baseline_json>", file=sys.stderr)
return 1
data = json.loads(open(sys.argv[1], encoding="utf-8").read())
results = data.get("results", [])
persisted = 0
skipped = 0
errors = 0
for r in results:
for c in r.get("actual_candidates", []):
content = (c.get("content") or "").strip()
if not content:
continue
mem_type = c.get("memory_type", "knowledge")
project = c.get("project", "")
confidence = c.get("confidence", 0.5)
try:
resp = post_json("/memory", {
"memory_type": mem_type,
"content": content,
"project": project,
"confidence": float(confidence),
"status": "candidate",
})
persisted += 1
print(f" + {resp.get('id','?')[:8]} [{mem_type}] {content[:80]}")
except urllib.error.HTTPError as exc:
if exc.code == 400:
skipped += 1
else:
errors += 1
print(f" ! error {exc.code}: {content[:60]}", file=sys.stderr)
except Exception as exc:
errors += 1
print(f" ! {exc}: {content[:60]}", file=sys.stderr)
print(f"\npersisted={persisted} skipped={skipped} errors={errors}")
return 0
if __name__ == "__main__":
raise SystemExit(main())