"""Weekly project synthesis — LLM-generated 'current state' paragraph per project. Reads each registered project's state entries, memories, and entities, asks sonnet for a 3-5 sentence synthesis, and caches it under project_state/status/synthesis_cache. The wiki's project page reads this cached synthesis as the top band. Runs weekly via cron (or manually). Cheap — one LLM call per project. Usage: python3 scripts/synthesize_projects.py --base-url http://localhost:8100 """ from __future__ import annotations import argparse import json import os import shutil import subprocess import tempfile import urllib.request DEFAULT_BASE_URL = os.environ.get("ATOCORE_BASE_URL", "http://localhost:8100") DEFAULT_MODEL = os.environ.get("ATOCORE_SYNTHESIS_MODEL", "sonnet") TIMEOUT_S = 60 SYSTEM_PROMPT = """You are summarizing the current state of an engineering project for a personal context engine called AtoCore. You will receive: - Project state entries (decisions, requirements, status) - Active memories tagged to this project - Entity graph (subsystems, components, materials, decisions) Write a 3-5 sentence synthesis covering: 1. What the project is and its current stage 2. The key locked-in decisions and architecture 3. What the next focus is Rules: - Plain prose, no bullet lists - Factual, grounded in what the data says — don't invent or speculate - Present tense - Under 500 characters total - No markdown formatting, just prose - If the data is sparse, say so honestly ("limited project data available") Output ONLY the synthesis paragraph. No preamble, no JSON, no markdown headers.""" _cwd = None def get_cwd(): global _cwd if _cwd is None: _cwd = tempfile.mkdtemp(prefix="ato-synth-") return _cwd def api_get(base_url, path): with urllib.request.urlopen(f"{base_url}{path}", timeout=15) as r: return json.loads(r.read()) def api_post(base_url, path, body): data = json.dumps(body).encode("utf-8") req = urllib.request.Request( f"{base_url}{path}", method="POST", headers={"Content-Type": "application/json"}, data=data, ) with urllib.request.urlopen(req, timeout=15) as r: return json.loads(r.read()) def synthesize_project(base_url, project_id, model): # Gather context state = api_get(base_url, f"/project/state/{project_id}").get("entries", []) memories = api_get(base_url, f"/memory?project={project_id}&active_only=true&limit=20").get("memories", []) entities = api_get(base_url, f"/entities?project={project_id}&limit=50").get("entities", []) if not (state or memories or entities): return None lines = [f"PROJECT: {project_id}\n"] if state: lines.append("STATE ENTRIES:") for e in state[:15]: if e.get("key") == "synthesis_cache": continue lines.append(f" [{e['category']}] {e['key']}: {e['value'][:200]}") if memories: lines.append("\nACTIVE MEMORIES:") for m in memories[:10]: lines.append(f" [{m['memory_type']}] {m['content'][:200]}") if entities: lines.append("\nENTITIES:") by_type = {} for e in entities: by_type.setdefault(e["entity_type"], []).append(e["name"]) for t, names in by_type.items(): lines.append(f" {t}: {', '.join(names[:8])}") user_msg = "\n".join(lines) + "\n\nWrite the synthesis paragraph now." if not shutil.which("claude"): print(f" ! claude CLI not available, skipping {project_id}") return None try: result = subprocess.run( ["claude", "-p", "--model", model, "--append-system-prompt", SYSTEM_PROMPT, "--disable-slash-commands", user_msg], capture_output=True, text=True, timeout=TIMEOUT_S, cwd=get_cwd(), encoding="utf-8", errors="replace", ) except Exception as e: print(f" ! subprocess failed for {project_id}: {e}") return None if result.returncode != 0: print(f" ! claude exit {result.returncode} for {project_id}") return None synthesis = (result.stdout or "").strip() if not synthesis or len(synthesis) < 50: return None return synthesis[:1000] def main(): parser = argparse.ArgumentParser() parser.add_argument("--base-url", default=DEFAULT_BASE_URL) parser.add_argument("--model", default=DEFAULT_MODEL) parser.add_argument("--project", default=None, help="single project to synthesize") args = parser.parse_args() projects = api_get(args.base_url, "/projects").get("projects", []) if args.project: projects = [p for p in projects if p["id"] == args.project] print(f"Synthesizing {len(projects)} project(s) with {args.model}...") for p in projects: pid = p["id"] print(f"\n- {pid}") synthesis = synthesize_project(args.base_url, pid, args.model) if synthesis: print(f" {synthesis[:200]}...") try: api_post(args.base_url, "/project/state", { "project": pid, "category": "status", "key": "synthesis_cache", "value": synthesis, "source": "weekly synthesis pass", }) print(f" + cached") except Exception as e: print(f" ! save failed: {e}") if __name__ == "__main__": main()