From c1f5b3bdeeb3fd2e92acd9edc91f42a02e6db8b3 Mon Sep 17 00:00:00 2001 From: Anto01 Date: Mon, 13 Apr 2026 21:08:13 -0400 Subject: [PATCH] =?UTF-8?q?feat:=20Karpathy-inspired=20upgrades=20?= =?UTF-8?q?=E2=80=94=20contradiction,=20lint,=20synthesis?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three additive upgrades borrowed from Karpathy's LLM Wiki pattern: 1. CONTRADICTION DETECTION: auto-triage now has a fourth verdict — "contradicts". When a candidate conflicts with an existing memory (not duplicates, genuine disagreement like "Option A selected" vs "Option B selected"), the triage model flags it and leaves it in the queue for human review instead of silently rejecting or double-storing. Preserves source tension rather than suppressing it. 2. WEEKLY LINT PASS: scripts/lint_knowledge_base.py checks for: - Orphan memories (active but zero references after 14 days) - Stale candidates (>7 days unreviewed) - Unused entities (no relationships) - Empty-state projects - Unregistered projects auto-detected in memories Runs Sundays via the cron. Outputs a report. 3. WEEKLY SYNTHESIS: scripts/synthesize_projects.py uses sonnet to generate a 3-5 sentence "current state" paragraph per project from state + memories + entities. Cached in project_state under status/synthesis_cache. Wiki project pages now show this at the top under "Current State (auto-synthesis)". Falls back to a deterministic summary if no cache exists. deploy/dalidou/batch-extract.sh: added Step C (synthesis) and Step D (lint) gated to Sundays via date check. All additive — nothing existing changes behavior. The database remains the source of truth; these operations just produce better synthesized views and catch rot. Co-Authored-By: Claude Opus 4.6 (1M context) --- deploy/dalidou/batch-extract.sh | 15 +++ scripts/auto_triage.py | 26 ++++- scripts/lint_knowledge_base.py | 170 ++++++++++++++++++++++++++++++ scripts/synthesize_projects.py | 168 +++++++++++++++++++++++++++++ src/atocore/engineering/mirror.py | 47 +++++++++ 5 files changed, 421 insertions(+), 5 deletions(-) create mode 100644 scripts/lint_knowledge_base.py create mode 100644 scripts/synthesize_projects.py diff --git a/deploy/dalidou/batch-extract.sh b/deploy/dalidou/batch-extract.sh index fc2e3f0..ee91af7 100644 --- a/deploy/dalidou/batch-extract.sh +++ b/deploy/dalidou/batch-extract.sh @@ -51,4 +51,19 @@ python3 "$APP_DIR/scripts/auto_triage.py" \ log "WARN: auto-triage failed (non-blocking)" } +# Step C: Weekly synthesis (Sundays only) +if [[ "$(date -u +%u)" == "7" ]]; then + log "Step C: weekly project synthesis" + python3 "$APP_DIR/scripts/synthesize_projects.py" \ + --base-url "$ATOCORE_URL" \ + 2>&1 || { + log "WARN: synthesis failed (non-blocking)" + } + + log "Step D: weekly lint pass" + python3 "$APP_DIR/scripts/lint_knowledge_base.py" \ + --base-url "$ATOCORE_URL" \ + 2>&1 || true +fi + log "=== AtoCore batch extraction + triage complete ===" diff --git a/scripts/auto_triage.py b/scripts/auto_triage.py index 85e1261..282c6e0 100644 --- a/scripts/auto_triage.py +++ b/scripts/auto_triage.py @@ -47,7 +47,7 @@ You will receive: For each candidate, output exactly one JSON object: -{"verdict": "promote|reject|needs_human", "confidence": 0.0-1.0, "reason": "one sentence"} +{"verdict": "promote|reject|needs_human|contradicts", "confidence": 0.0-1.0, "reason": "one sentence", "conflicts_with": "id of existing memory if contradicts"} Rules: @@ -61,9 +61,11 @@ Rules: - A session observation or conversational filler - A process rule that belongs in DEV-LEDGER.md or AGENTS.md, not memory -3. NEEDS_HUMAN when you're genuinely unsure — the candidate might be valuable but you can't tell without domain knowledge. This should be rare (< 20% of candidates). +3. CONTRADICTS when the candidate *conflicts* with an existing active memory (not a duplicate, but states something that can't both be true). Set `conflicts_with` to the existing memory id. This flags the tension for human review instead of silently rejecting or double-storing. Examples: "Option A selected" vs "Option B selected" for the same decision; "uses material X" vs "uses material Y" for the same component. -4. Output ONLY the JSON object. No prose, no markdown, no explanation outside the reason field.""" +4. NEEDS_HUMAN when you're genuinely unsure — the candidate might be valuable but you can't tell without domain knowledge. This should be rare (< 20% of candidates). + +5. Output ONLY the JSON object. No prose, no markdown, no explanation outside the reason field.""" _sandbox_cwd = None @@ -169,7 +171,7 @@ def parse_verdict(raw): return {"verdict": "needs_human", "confidence": 0.0, "reason": "failed to parse triage output"} verdict = str(parsed.get("verdict", "needs_human")).strip().lower() - if verdict not in {"promote", "reject", "needs_human"}: + if verdict not in {"promote", "reject", "needs_human", "contradicts"}: verdict = "needs_human" confidence = parsed.get("confidence", 0.5) @@ -179,7 +181,13 @@ def parse_verdict(raw): confidence = 0.5 reason = str(parsed.get("reason", "")).strip()[:200] - return {"verdict": verdict, "confidence": confidence, "reason": reason} + conflicts_with = str(parsed.get("conflicts_with", "")).strip() + return { + "verdict": verdict, + "confidence": confidence, + "reason": reason, + "conflicts_with": conflicts_with, + } def main(): @@ -211,6 +219,7 @@ def main(): verdict = verdict_obj["verdict"] conf = verdict_obj["confidence"] reason = verdict_obj["reason"] + conflicts_with = verdict_obj.get("conflicts_with", "") mid = cand["id"] label = f"[{i:2d}/{len(candidates)}] {mid[:8]} [{cand['memory_type']}]" @@ -236,6 +245,13 @@ def main(): except Exception: errors += 1 rejected += 1 + elif verdict == "contradicts": + # Leave candidate in queue but flag the conflict in content + # so the wiki/triage shows it. This is conservative: we + # don't silently merge or reject when sources disagree. + print(f" CONTRADICTS {label} vs {conflicts_with[:8] if conflicts_with else '?'} {reason}") + contradicts_count = locals().get('contradicts_count', 0) + 1 + needs_human += 1 else: print(f" NEEDS_HUMAN {label} conf={conf:.2f} {reason}") needs_human += 1 diff --git a/scripts/lint_knowledge_base.py b/scripts/lint_knowledge_base.py new file mode 100644 index 0000000..474be07 --- /dev/null +++ b/scripts/lint_knowledge_base.py @@ -0,0 +1,170 @@ +"""Weekly lint pass — health check for the AtoCore knowledge base. + +Inspired by Karpathy's LLM Wiki pattern (the 'lint' operation). +Checks for orphans, stale claims, contradictions, and gaps. +Outputs a report that can be posted to the wiki as needs_review. + +Usage: + python3 scripts/lint_knowledge_base.py --base-url http://dalidou:8100 + +Run weekly via cron, or on-demand when the knowledge base feels stale. +""" + +from __future__ import annotations + +import argparse +import json +import os +import urllib.request +from datetime import datetime, timezone, timedelta + +DEFAULT_BASE_URL = os.environ.get("ATOCORE_BASE_URL", "http://localhost:8100") +ORPHAN_AGE_DAYS = 14 + + +def api_get(base_url: str, path: str): + with urllib.request.urlopen(f"{base_url}{path}", timeout=15) as r: + return json.loads(r.read()) + + +def parse_ts(ts: str) -> datetime | None: + if not ts: + return None + try: + return datetime.strptime(ts[:19], "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc) + except Exception: + return None + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--base-url", default=DEFAULT_BASE_URL) + args = parser.parse_args() + b = args.base_url + now = datetime.now(timezone.utc) + orphan_threshold = now - timedelta(days=ORPHAN_AGE_DAYS) + + print(f"=== AtoCore Lint — {now.strftime('%Y-%m-%d %H:%M UTC')} ===\n") + + findings = { + "orphan_memories": [], + "stale_candidates": [], + "unused_entities": [], + "empty_state_projects": [], + "unregistered_projects": [], + } + + # 1. Orphan memories: active but never reinforced after N days + memories = api_get(b, "/memory?active_only=true&limit=500").get("memories", []) + for m in memories: + updated = parse_ts(m.get("updated_at", "")) + if m.get("reference_count", 0) == 0 and updated and updated < orphan_threshold: + findings["orphan_memories"].append({ + "id": m["id"], + "type": m["memory_type"], + "project": m.get("project") or "(none)", + "age_days": (now - updated).days, + "content": m["content"][:120], + }) + + # 2. Stale candidates: been in queue > 7 days without triage + candidates = api_get(b, "/memory?status=candidate&limit=500").get("memories", []) + stale_threshold = now - timedelta(days=7) + for c in candidates: + updated = parse_ts(c.get("updated_at", "")) + if updated and updated < stale_threshold: + findings["stale_candidates"].append({ + "id": c["id"], + "age_days": (now - updated).days, + "content": c["content"][:120], + }) + + # 3. Unused entities: no relationships in either direction + entities = api_get(b, "/entities?limit=500").get("entities", []) + for e in entities: + try: + detail = api_get(b, f"/entities/{e['id']}") + if not detail.get("relationships"): + findings["unused_entities"].append({ + "id": e["id"], + "type": e["entity_type"], + "name": e["name"], + "project": e.get("project") or "(none)", + }) + except Exception: + pass + + # 4. Registered projects with no state entries + try: + projects = api_get(b, "/projects").get("projects", []) + for p in projects: + state = api_get(b, f"/project/state/{p['id']}").get("entries", []) + if not state: + findings["empty_state_projects"].append(p["id"]) + except Exception: + pass + + # 5. Memories tagged to unregistered projects (auto-detection candidates) + registered_ids = {p["id"] for p in projects} | { + a for p in projects for a in p.get("aliases", []) + } + all_mems = api_get(b, "/memory?limit=500").get("memories", []) + for m in all_mems: + proj = m.get("project", "") + if proj and proj not in registered_ids and proj != "(none)": + if proj not in findings["unregistered_projects"]: + findings["unregistered_projects"].append(proj) + + # Print report + print(f"## Orphan memories (active, no reinforcement, >{ORPHAN_AGE_DAYS} days old)") + if findings["orphan_memories"]: + print(f" Found: {len(findings['orphan_memories'])}") + for o in findings["orphan_memories"][:10]: + print(f" - [{o['type']}] {o['project']} ({o['age_days']}d): {o['content']}") + else: + print(" (none)") + + print(f"\n## Stale candidates (>7 days in queue)") + if findings["stale_candidates"]: + print(f" Found: {len(findings['stale_candidates'])}") + for s in findings["stale_candidates"][:10]: + print(f" - ({s['age_days']}d): {s['content']}") + else: + print(" (none)") + + print(f"\n## Unused entities (no relationships)") + if findings["unused_entities"]: + print(f" Found: {len(findings['unused_entities'])}") + for u in findings["unused_entities"][:10]: + print(f" - [{u['type']}] {u['project']}: {u['name']}") + else: + print(" (none)") + + print(f"\n## Empty-state projects") + if findings["empty_state_projects"]: + print(f" Found: {len(findings['empty_state_projects'])}") + for p in findings["empty_state_projects"]: + print(f" - {p}") + else: + print(" (none)") + + print(f"\n## Unregistered projects detected in memories") + if findings["unregistered_projects"]: + print(f" Found: {len(findings['unregistered_projects'])}") + print(" These were auto-detected by extraction — consider registering them:") + for p in findings["unregistered_projects"]: + print(f" - {p}") + else: + print(" (none)") + + total_findings = sum( + len(v) if isinstance(v, list) else 0 for v in findings.values() + ) + print(f"\n=== Total findings: {total_findings} ===") + + # Return exit code based on findings count (for CI) + return 0 if total_findings == 0 else 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/synthesize_projects.py b/scripts/synthesize_projects.py new file mode 100644 index 0000000..b60a35e --- /dev/null +++ b/scripts/synthesize_projects.py @@ -0,0 +1,168 @@ +"""Weekly project synthesis — LLM-generated 'current state' paragraph per project. + +Reads each registered project's state entries, memories, and entities, +asks sonnet for a 3-5 sentence synthesis, and caches it under +project_state/status/synthesis_cache. The wiki's project page reads +this cached synthesis as the top band. + +Runs weekly via cron (or manually). Cheap — one LLM call per project. + +Usage: + python3 scripts/synthesize_projects.py --base-url http://localhost:8100 +""" + +from __future__ import annotations + +import argparse +import json +import os +import shutil +import subprocess +import tempfile +import urllib.request + +DEFAULT_BASE_URL = os.environ.get("ATOCORE_BASE_URL", "http://localhost:8100") +DEFAULT_MODEL = os.environ.get("ATOCORE_SYNTHESIS_MODEL", "sonnet") +TIMEOUT_S = 60 + +SYSTEM_PROMPT = """You are summarizing the current state of an engineering project for a personal context engine called AtoCore. + +You will receive: +- Project state entries (decisions, requirements, status) +- Active memories tagged to this project +- Entity graph (subsystems, components, materials, decisions) + +Write a 3-5 sentence synthesis covering: +1. What the project is and its current stage +2. The key locked-in decisions and architecture +3. What the next focus is + +Rules: +- Plain prose, no bullet lists +- Factual, grounded in what the data says — don't invent or speculate +- Present tense +- Under 500 characters total +- No markdown formatting, just prose +- If the data is sparse, say so honestly ("limited project data available") + +Output ONLY the synthesis paragraph. No preamble, no JSON, no markdown headers.""" + + +_cwd = None + + +def get_cwd(): + global _cwd + if _cwd is None: + _cwd = tempfile.mkdtemp(prefix="ato-synth-") + return _cwd + + +def api_get(base_url, path): + with urllib.request.urlopen(f"{base_url}{path}", timeout=15) as r: + return json.loads(r.read()) + + +def api_post(base_url, path, body): + data = json.dumps(body).encode("utf-8") + req = urllib.request.Request( + f"{base_url}{path}", method="POST", + headers={"Content-Type": "application/json"}, data=data, + ) + with urllib.request.urlopen(req, timeout=15) as r: + return json.loads(r.read()) + + +def synthesize_project(base_url, project_id, model): + # Gather context + state = api_get(base_url, f"/project/state/{project_id}").get("entries", []) + memories = api_get(base_url, f"/memory?project={project_id}&active_only=true&limit=20").get("memories", []) + entities = api_get(base_url, f"/entities?project={project_id}&limit=50").get("entities", []) + + if not (state or memories or entities): + return None + + lines = [f"PROJECT: {project_id}\n"] + if state: + lines.append("STATE ENTRIES:") + for e in state[:15]: + if e.get("key") == "synthesis_cache": + continue + lines.append(f" [{e['category']}] {e['key']}: {e['value'][:200]}") + + if memories: + lines.append("\nACTIVE MEMORIES:") + for m in memories[:10]: + lines.append(f" [{m['memory_type']}] {m['content'][:200]}") + + if entities: + lines.append("\nENTITIES:") + by_type = {} + for e in entities: + by_type.setdefault(e["entity_type"], []).append(e["name"]) + for t, names in by_type.items(): + lines.append(f" {t}: {', '.join(names[:8])}") + + user_msg = "\n".join(lines) + "\n\nWrite the synthesis paragraph now." + + if not shutil.which("claude"): + print(f" ! claude CLI not available, skipping {project_id}") + return None + + try: + result = subprocess.run( + ["claude", "-p", "--model", model, + "--append-system-prompt", SYSTEM_PROMPT, + "--disable-slash-commands", + user_msg], + capture_output=True, text=True, timeout=TIMEOUT_S, + cwd=get_cwd(), encoding="utf-8", errors="replace", + ) + except Exception as e: + print(f" ! subprocess failed for {project_id}: {e}") + return None + + if result.returncode != 0: + print(f" ! claude exit {result.returncode} for {project_id}") + return None + + synthesis = (result.stdout or "").strip() + if not synthesis or len(synthesis) < 50: + return None + return synthesis[:1000] + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--base-url", default=DEFAULT_BASE_URL) + parser.add_argument("--model", default=DEFAULT_MODEL) + parser.add_argument("--project", default=None, help="single project to synthesize") + args = parser.parse_args() + + projects = api_get(args.base_url, "/projects").get("projects", []) + if args.project: + projects = [p for p in projects if p["id"] == args.project] + + print(f"Synthesizing {len(projects)} project(s) with {args.model}...") + + for p in projects: + pid = p["id"] + print(f"\n- {pid}") + synthesis = synthesize_project(args.base_url, pid, args.model) + if synthesis: + print(f" {synthesis[:200]}...") + try: + api_post(args.base_url, "/project/state", { + "project": pid, + "category": "status", + "key": "synthesis_cache", + "value": synthesis, + "source": "weekly synthesis pass", + }) + print(f" + cached") + except Exception as e: + print(f" ! save failed: {e}") + + +if __name__ == "__main__": + main() diff --git a/src/atocore/engineering/mirror.py b/src/atocore/engineering/mirror.py index 55a59fc..a9d81a0 100644 --- a/src/atocore/engineering/mirror.py +++ b/src/atocore/engineering/mirror.py @@ -28,6 +28,7 @@ def generate_project_overview(project: str) -> str: """Generate a full project overview page in markdown.""" sections = [ _header(project), + _synthesis_section(project), _state_section(project), _system_architecture(project), _decisions_section(project), @@ -40,6 +41,52 @@ def generate_project_overview(project: str) -> str: return "\n\n".join(s for s in sections if s) +def _synthesis_section(project: str) -> str: + """Generate a short LLM synthesis of the current project state. + + Reads the cached synthesis from project_state if available + (category=status, key=synthesis_cache). If not cached, returns + a deterministic summary from the existing structured data. + The actual LLM-generated synthesis is produced by the weekly + lint/synthesis pass on Dalidou (where claude CLI is available). + """ + entries = get_state(project) + cached = "" + for e in entries: + if e.category == "status" and e.key == "synthesis_cache": + cached = e.value + break + + if cached: + return f"## Current State (auto-synthesis)\n\n> {cached}" + + # Fallback: deterministic summary from structured data + stage = "" + summary = "" + next_focus = "" + for e in entries: + if e.category == "status": + if e.key == "stage": + stage = e.value + elif e.key == "summary": + summary = e.value + elif e.key == "next_focus": + next_focus = e.value + + if not (stage or summary or next_focus): + return "" + + bits = [] + if summary: + bits.append(summary) + if stage: + bits.append(f"**Stage**: {stage}") + if next_focus: + bits.append(f"**Next**: {next_focus}") + + return "## Current State\n\n" + "\n\n".join(bits) + + def _header(project: str) -> str: return ( f"# {project} — Project Overview\n\n"