ATOCore/scripts/synthesize_projects.py

"""Weekly project synthesis — LLM-generated 'current state' paragraph per project.

Reads each registered project's state entries, memories, and entities,
asks sonnet for a 3-5 sentence synthesis, and caches it under
project_state/status/synthesis_cache. The wiki's project page reads
this cached synthesis as the top band.

Runs weekly via cron (or manually). Cheap — one LLM call per project.

Usage:
  python3 scripts/synthesize_projects.py --base-url http://localhost:8100
"""

from __future__ import annotations

import argparse
import json
import os
import shutil
import subprocess
import tempfile
import urllib.request

DEFAULT_BASE_URL = os.environ.get("ATOCORE_BASE_URL", "http://localhost:8100")
DEFAULT_MODEL = os.environ.get("ATOCORE_SYNTHESIS_MODEL", "sonnet")
TIMEOUT_S = 60

SYSTEM_PROMPT = """You are summarizing the current state of an engineering project for a personal context engine called AtoCore.

You will receive:
- Project state entries (decisions, requirements, status)
- Active memories tagged to this project
- Entity graph (subsystems, components, materials, decisions)

Write a 3-5 sentence synthesis covering:
1. What the project is and its current stage
2. The key locked-in decisions and architecture
3. What the next focus is

Rules:
- Plain prose, no bullet lists
- Factual, grounded in what the data says — don't invent or speculate
- Present tense
- Under 500 characters total
- No markdown formatting, just prose
- If the data is sparse, say so honestly ("limited project data available")

Output ONLY the synthesis paragraph. No preamble, no JSON, no markdown headers."""


_cwd = None


def get_cwd():
    global _cwd
    if _cwd is None:
        _cwd = tempfile.mkdtemp(prefix="ato-synth-")
    return _cwd


def api_get(base_url, path):
    with urllib.request.urlopen(f"{base_url}{path}", timeout=15) as r:
        return json.loads(r.read())


def api_post(base_url, path, body):
    data = json.dumps(body).encode("utf-8")
    req = urllib.request.Request(
        f"{base_url}{path}", method="POST",
        headers={"Content-Type": "application/json"}, data=data,
    )
    with urllib.request.urlopen(req, timeout=15) as r:
        return json.loads(r.read())


def synthesize_project(base_url, project_id, model):
    # Gather context
    state = api_get(base_url, f"/project/state/{project_id}").get("entries", [])
    memories = api_get(base_url, f"/memory?project={project_id}&active_only=true&limit=20").get("memories", [])
    entities = api_get(base_url, f"/entities?project={project_id}&limit=50").get("entities", [])

    if not (state or memories or entities):
        return None

    lines = [f"PROJECT: {project_id}\n"]
    if state:
        lines.append("STATE ENTRIES:")
        for e in state[:15]:
            if e.get("key") == "synthesis_cache":
                continue
            lines.append(f"  [{e['category']}] {e['key']}: {e['value'][:200]}")

    if memories:
        lines.append("\nACTIVE MEMORIES:")
        for m in memories[:10]:
            lines.append(f"  [{m['memory_type']}] {m['content'][:200]}")

    if entities:
        lines.append("\nENTITIES:")
        by_type = {}
        for e in entities:
            by_type.setdefault(e["entity_type"], []).append(e["name"])
        for t, names in by_type.items():
            lines.append(f"  {t}: {', '.join(names[:8])}")

    user_msg = "\n".join(lines) + "\n\nWrite the synthesis paragraph now."

    if not shutil.which("claude"):
        print(f"  ! claude CLI not available, skipping {project_id}")
        return None

    try:
        result = subprocess.run(
            ["claude", "-p", "--model", model,
             "--append-system-prompt", SYSTEM_PROMPT,
             "--disable-slash-commands",
             user_msg],
            capture_output=True, text=True, timeout=TIMEOUT_S,
            cwd=get_cwd(), encoding="utf-8", errors="replace",
        )
    except Exception as e:
        print(f"  ! subprocess failed for {project_id}: {e}")
        return None

    if result.returncode != 0:
        print(f"  ! claude exit {result.returncode} for {project_id}")
        return None

    synthesis = (result.stdout or "").strip()
    if not synthesis or len(synthesis) < 50:
        return None
    return synthesis[:1000]


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--base-url", default=DEFAULT_BASE_URL)
    parser.add_argument("--model", default=DEFAULT_MODEL)
    parser.add_argument("--project", default=None, help="single project to synthesize")
    args = parser.parse_args()

    projects = api_get(args.base_url, "/projects").get("projects", [])
    if args.project:
        projects = [p for p in projects if p["id"] == args.project]

    print(f"Synthesizing {len(projects)} project(s) with {args.model}...")

    for p in projects:
        pid = p["id"]
        print(f"\n- {pid}")
        synthesis = synthesize_project(args.base_url, pid, args.model)
        if synthesis:
            print(f"  {synthesis[:200]}...")
            try:
                api_post(args.base_url, "/project/state", {
                    "project": pid,
                    "category": "status",
                    "key": "synthesis_cache",
                    "value": synthesis,
                    "source": "weekly synthesis pass",
                })
                print(f"  + cached")
            except Exception as e:
                print(f"  ! save failed: {e}")


if __name__ == "__main__":
    main()