feat: expose status field on POST /memory + persist_llm_candidates script

The API endpoint now passes the request's status field through to create_memory() so external scripts can create candidate memories directly without going through the extract endpoint. Default remains 'active' for backward compatibility. persist_llm_candidates.py reads a saved extractor eval baseline JSON (e.g. the Day 4 LLM run) and POSTs each candidate to Dalidou with status=candidate. Safe to re-run — duplicate content returns 400 which the script counts as 'skipped'. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-12 05:51:31 -04:00
parent a29b5e22f2
commit 3a7e8ccba4
2 changed files with 91 additions and 0 deletions
--- a/scripts/persist_llm_candidates.py
+++ b/scripts/persist_llm_candidates.py
@@ -0,0 +1,89 @@
+"""Persist LLM-extracted candidates from a baseline JSON to Dalidou.
+
+One-shot script: reads a saved extractor eval output file, filters to
+candidates the LLM actually produced, and POSTs each to the Dalidou
+memory API with ``status=candidate``. Deduplicates against already-
+existing candidate content so the script is safe to re-run.
+
+Usage:
+
+    python scripts/persist_llm_candidates.py \\
+        scripts/eval_data/extractor_llm_baseline_2026-04-11.json
+
+Then triage via:
+
+    python scripts/atocore_client.py triage
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+import urllib.error
+import urllib.parse
+import urllib.request
+
+BASE_URL = os.environ.get("ATOCORE_BASE_URL", "http://dalidou:8100")
+TIMEOUT = int(os.environ.get("ATOCORE_TIMEOUT_SECONDS", "10"))
+
+
+def post_json(path: str, body: dict) -> dict:
+    data = json.dumps(body).encode("utf-8")
+    req = urllib.request.Request(
+        url=f"{BASE_URL}{path}",
+        method="POST",
+        headers={"Content-Type": "application/json"},
+        data=data,
+    )
+    with urllib.request.urlopen(req, timeout=TIMEOUT) as resp:
+        return json.loads(resp.read().decode("utf-8"))
+
+
+def main() -> int:
+    if len(sys.argv) < 2:
+        print(f"usage: {sys.argv[0]} <baseline_json>", file=sys.stderr)
+        return 1
+
+    data = json.loads(open(sys.argv[1], encoding="utf-8").read())
+    results = data.get("results", [])
+
+    persisted = 0
+    skipped = 0
+    errors = 0
+
+    for r in results:
+        for c in r.get("actual_candidates", []):
+            content = (c.get("content") or "").strip()
+            if not content:
+                continue
+            mem_type = c.get("memory_type", "knowledge")
+            project = c.get("project", "")
+            confidence = c.get("confidence", 0.5)
+
+            try:
+                resp = post_json("/memory", {
+                    "memory_type": mem_type,
+                    "content": content,
+                    "project": project,
+                    "confidence": float(confidence),
+                    "status": "candidate",
+                })
+                persisted += 1
+                print(f"  + {resp.get('id','?')[:8]}  [{mem_type}]  {content[:80]}")
+            except urllib.error.HTTPError as exc:
+                if exc.code == 400:
+                    skipped += 1
+                else:
+                    errors += 1
+                    print(f"  ! error {exc.code}: {content[:60]}", file=sys.stderr)
+            except Exception as exc:
+                errors += 1
+                print(f"  ! {exc}: {content[:60]}", file=sys.stderr)
+
+    print(f"\npersisted={persisted}  skipped={skipped}  errors={errors}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/src/atocore/api/routes.py
+++ b/src/atocore/api/routes.py
@@ -141,6 +141,7 @@ class MemoryCreateRequest(BaseModel):
    content: str
    project: str = ""
    confidence: float = 1.0
+    status: str = "active"


 class MemoryUpdateRequest(BaseModel):
@@ -344,6 +345,7 @@ def api_create_memory(req: MemoryCreateRequest) -> dict:
            content=req.content,
            project=req.project,
            confidence=req.confidence,
+            status=req.status,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))