feat: expose status field on POST /memory + persist_llm_candidates script
The API endpoint now passes the request's status field through to create_memory() so external scripts can create candidate memories directly without going through the extract endpoint. Default remains 'active' for backward compatibility. persist_llm_candidates.py reads a saved extractor eval baseline JSON (e.g. the Day 4 LLM run) and POSTs each candidate to Dalidou with status=candidate. Safe to re-run — duplicate content returns 400 which the script counts as 'skipped'. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
89
scripts/persist_llm_candidates.py
Normal file
89
scripts/persist_llm_candidates.py
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
"""Persist LLM-extracted candidates from a baseline JSON to Dalidou.
|
||||||
|
|
||||||
|
One-shot script: reads a saved extractor eval output file, filters to
|
||||||
|
candidates the LLM actually produced, and POSTs each to the Dalidou
|
||||||
|
memory API with ``status=candidate``. Deduplicates against already-
|
||||||
|
existing candidate content so the script is safe to re-run.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
python scripts/persist_llm_candidates.py \\
|
||||||
|
scripts/eval_data/extractor_llm_baseline_2026-04-11.json
|
||||||
|
|
||||||
|
Then triage via:
|
||||||
|
|
||||||
|
python scripts/atocore_client.py triage
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
BASE_URL = os.environ.get("ATOCORE_BASE_URL", "http://dalidou:8100")
|
||||||
|
TIMEOUT = int(os.environ.get("ATOCORE_TIMEOUT_SECONDS", "10"))
|
||||||
|
|
||||||
|
|
||||||
|
def post_json(path: str, body: dict) -> dict:
|
||||||
|
data = json.dumps(body).encode("utf-8")
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url=f"{BASE_URL}{path}",
|
||||||
|
method="POST",
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
data=data,
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(req, timeout=TIMEOUT) as resp:
|
||||||
|
return json.loads(resp.read().decode("utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print(f"usage: {sys.argv[0]} <baseline_json>", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
data = json.loads(open(sys.argv[1], encoding="utf-8").read())
|
||||||
|
results = data.get("results", [])
|
||||||
|
|
||||||
|
persisted = 0
|
||||||
|
skipped = 0
|
||||||
|
errors = 0
|
||||||
|
|
||||||
|
for r in results:
|
||||||
|
for c in r.get("actual_candidates", []):
|
||||||
|
content = (c.get("content") or "").strip()
|
||||||
|
if not content:
|
||||||
|
continue
|
||||||
|
mem_type = c.get("memory_type", "knowledge")
|
||||||
|
project = c.get("project", "")
|
||||||
|
confidence = c.get("confidence", 0.5)
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = post_json("/memory", {
|
||||||
|
"memory_type": mem_type,
|
||||||
|
"content": content,
|
||||||
|
"project": project,
|
||||||
|
"confidence": float(confidence),
|
||||||
|
"status": "candidate",
|
||||||
|
})
|
||||||
|
persisted += 1
|
||||||
|
print(f" + {resp.get('id','?')[:8]} [{mem_type}] {content[:80]}")
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
if exc.code == 400:
|
||||||
|
skipped += 1
|
||||||
|
else:
|
||||||
|
errors += 1
|
||||||
|
print(f" ! error {exc.code}: {content[:60]}", file=sys.stderr)
|
||||||
|
except Exception as exc:
|
||||||
|
errors += 1
|
||||||
|
print(f" ! {exc}: {content[:60]}", file=sys.stderr)
|
||||||
|
|
||||||
|
print(f"\npersisted={persisted} skipped={skipped} errors={errors}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
@@ -141,6 +141,7 @@ class MemoryCreateRequest(BaseModel):
|
|||||||
content: str
|
content: str
|
||||||
project: str = ""
|
project: str = ""
|
||||||
confidence: float = 1.0
|
confidence: float = 1.0
|
||||||
|
status: str = "active"
|
||||||
|
|
||||||
|
|
||||||
class MemoryUpdateRequest(BaseModel):
|
class MemoryUpdateRequest(BaseModel):
|
||||||
@@ -344,6 +345,7 @@ def api_create_memory(req: MemoryCreateRequest) -> dict:
|
|||||||
content=req.content,
|
content=req.content,
|
||||||
project=req.project,
|
project=req.project,
|
||||||
confidence=req.confidence,
|
confidence=req.confidence,
|
||||||
|
status=req.status,
|
||||||
)
|
)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise HTTPException(status_code=400, detail=str(e))
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|||||||
Reference in New Issue
Block a user