ATOCore/scripts/phase9_first_real_use.py

"""Phase 9 first-real-use validation script.

Captures a small set of representative interactions drawn from a real
working session, runs the full Phase 9 loop (capture -> reinforce ->
extract) over them, and prints what each step produced. The intent is
to generate empirical evidence about the extractor's behaviour against
prose that wasn't written to make the test pass.

Usage:
    python scripts/phase9_first_real_use.py [--data-dir PATH]

The script writes a fresh isolated SQLite + Chroma store under the
given data dir (default: ./data/validation/phase9-first-use). The
data dir is gitignored so the script can be re-run cleanly.

Each interaction is printed with:
  - the captured interaction id
  - the reinforcement results (which seeded memories were echoed)
  - the extraction results (which candidates were proposed and why)
  - notes on what the extractor MISSED (manually annotated below)

The output is intentionally human-readable so the run can be saved as
the body of docs/phase9-first-real-use.md.
"""

from __future__ import annotations

import argparse
import json
import os
import shutil
import sys
from dataclasses import dataclass
from pathlib import Path

# Make the package importable when run directly from the repo root.
_REPO_ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(_REPO_ROOT / "src"))


@dataclass
class SampleInteraction:
    label: str
    prompt: str
    response: str
    project: str
    notes_for_human: str


# ---------------------------------------------------------------------------
# Sample interactions, paraphrased from real working sessions on AtoCore
# itself. Each one is a real-feeling exchange with at least one structural
# cue the extractor should catch, plus prose the extractor should NOT
# accidentally catch.
# ---------------------------------------------------------------------------

SAMPLES: list[SampleInteraction] = [
    SampleInteraction(
        label="exdev-mount-merge-decision",
        project="atocore",
        prompt="The TMPDIR fix didn't work, the LOCAL_COPY_PATH change didn't work, what now?",
        response=(
            "We've been chasing this through three layers without success. "
            "Time to stop guessing and apply the simple fix Dalidou Claude "
            "proposed in the very first diagnosis.\n\n"
            "## Decision: merge the two bind mounts into one\n\n"
            "Both /srv/storage/repos and /srv/storage/gitea/data live on "
            "/dev/sdb1, so the move is a same-filesystem rename — instant, "
            "atomic, no data copy. After the move, the EXDEV class of bug "
            "becomes structurally impossible because there is only one "
            "mount inside the container."
        ),
        notes_for_human=(
            "Should fire decision_heading rule. Should NOT fire requirement "
            "or constraint rules. The 'becomes structurally impossible' line "
            "is prose, not a candidate."
        ),
    ),
    SampleInteraction(
        label="ownership-was-the-real-fix",
        project="atocore",
        prompt="Still failing after the mount merge. What's actually wrong?",
        response=(
            "Same rejection even after the mounts were merged, which kills "
            "the EXDEV theory. The error 'unable to migrate objects to "
            "permanent storage' is git's catch-all for any failure in "
            "tmp_objdir_migrate, and the real errno is being thrown away.\n\n"
            "## Fact: the actual root cause was ownership, not cross-device link\n\n"
            "After mv on the host, the moved directory had perms git could "
            "navigate for some subdirs but not others. Recursive chown "
            "git:git plus chmod u+rwX,g+rwX fixed it on the first try."
        ),
        notes_for_human=(
            "Should fire fact_heading rule, target type knowledge. The "
            "'unable to migrate' string is inside prose and should NOT "
            "trigger any rule."
        ),
    ),
    SampleInteraction(
        label="memory-vs-entity-canonical-home",
        project="atocore",
        prompt="Does a Decision live in memories or entities?",
        response=(
            "Both layers exist for different reasons. After thinking through "
            "the ontology question, I'm landing on a clean rule: one "
            "canonical home per concept, with explicit graduation when a "
            "memory eventually deserves structure.\n\n"
            "## Decision: project, knowledge, and adaptation memory types graduate to entities\n\n"
            "The identity, preference, and episodic types stay in the memory "
            "layer because they're personal and unstructured. The other "
            "three become entities once the engineering layer V1 ships. The "
            "original memory rows are NEVER deleted — they become frozen "
            "historical pointers to whichever entity they graduated into."
        ),
        notes_for_human=(
            "Should fire decision_heading rule. Long content (over 100 "
            "chars) — verifies the max-length cap of 280 doesn't truncate."
        ),
    ),
    SampleInteraction(
        label="auto-promotion-deferred",
        project="atocore",
        prompt="Should the engineering layer auto-promote candidates?",
        response=(
            "The trust hierarchy is the whole point of AtoCore. If we let "
            "the system auto-promote, we trade away the 'bad memory is "
            "worse than no memory' invariant for a marginal speed gain.\n\n"
            "## Decision: no auto-promotion in V1, ever\n\n"
            "The schema and API are designed so policies can be added "
            "later without migration, but the V1 stance is that every "
            "promotion needs a human signoff. The hard line: nothing ever "
            "moves into trusted project state automatically, even with the "
            "engineering layer running."
        ),
        notes_for_human=(
            "Should fire decision_heading rule. Has 'invariant' jargon — "
            "verify the extractor doesn't get distracted."
        ),
    ),
    SampleInteraction(
        label="preference-rebase-workflow",
        project="atocore",
        prompt="Should we use rebase or merge commits for this branch?",
        response=(
            "I prefer rebase-based workflows because the history stays "
            "linear and reviewers have an easier time. For the dalidou "
            "storage-foundation branch we'll rebase before merging into "
            "main so the timeline reads as one coherent narrative."
        ),
        notes_for_human=(
            "Should fire preference_sentence rule. Note: the rule looks "
            "for 'I prefer' which is exactly the wording used."
        ),
    ),
    SampleInteraction(
        label="constraint-from-doc-cite",
        project="p05-interferometer",
        prompt="What's the budget situation for p05 right now?",
        response=(
            "Looking at the latest design notes, the constraint is fairly "
            "tight on the optical path side.\n\n"
            "### Constraint: total optical path stability must stay within 10 nm RMS at 1 kHz\n\n"
            "The error budget allocates 6 nm to the laser source and 4 nm "
            "to the mechanical mounts and frame combined."
        ),
        notes_for_human=(
            "Should fire constraint_heading rule with type=project. The "
            "second sentence ('error budget allocates 6 nm') is descriptive "
            "and should NOT fire any rule because it doesn't have a "
            "structural cue."
        ),
    ),
    SampleInteraction(
        label="prose-only-no-cues",
        project="atocore",
        prompt="What should I work on next?",
        response=(
            "Looking at the current state of AtoCore, the next natural "
            "step is to validate Phase 9 against real interactions before "
            "starting the engineering layer implementation. Empirical "
            "evidence is missing and the planning docs would benefit from "
            "real signal."
        ),
        notes_for_human=(
            "Should produce ZERO candidates. Pure recommendation prose, no "
            "structural cues. If this fires anything the extractor is too "
            "loose."
        ),
    ),
    SampleInteraction(
        label="multiple-cues-in-one-interaction",
        project="p06-polisher",
        prompt="Summarize today's polisher session",
        response=(
            "We worked through three things in the polisher session today.\n\n"
            "## Decision: defer the laser interlock redesign to after the July milestone\n\n"
            "## Constraint: the calibration routine must complete in under 90 seconds for production use\n\n"
            "## Requirement: the polisher must hold position to within 0.5 micron at 1 g loading\n\n"
            "Action items captured for the next sync."
        ),
        notes_for_human=(
            "Three rules should fire on the same interaction: "
            "decision_heading -> adaptation, constraint_heading -> project, "
            "requirement_heading -> project. Verify dedup doesn't merge them."
        ),
    ),
]


def setup_environment(data_dir: Path) -> None:
    """Configure AtoCore to use an isolated data directory for this run."""
    if data_dir.exists():
        shutil.rmtree(data_dir)
    data_dir.mkdir(parents=True, exist_ok=True)
    os.environ["ATOCORE_DATA_DIR"] = str(data_dir)
    os.environ.setdefault("ATOCORE_DEBUG", "true")
    # Reset cached settings so the new env vars take effect
    import atocore.config as config

    config.settings = config.Settings()
    import atocore.retrieval.vector_store as vs

    vs._store = None


def seed_memories() -> dict[str, str]:
    """Insert a small set of seed active memories so reinforcement has
    something to match against."""
    from atocore.memory.service import create_memory

    seeded: dict[str, str] = {}
    seeded["pref_rebase"] = create_memory(
        memory_type="preference",
        content="prefers rebase-based workflows because history stays linear",
        confidence=0.6,
    ).id
    seeded["pref_concise"] = create_memory(
        memory_type="preference",
        content="writes commit messages focused on the why, not the what",
        confidence=0.6,
    ).id
    seeded["identity_runs_atocore"] = create_memory(
        memory_type="identity",
        content="mechanical engineer who runs AtoCore for context engineering",
        confidence=0.9,
    ).id
    return seeded


def run_sample(sample: SampleInteraction) -> dict:
    """Capture one sample, run extraction, return a result dict."""
    from atocore.interactions.service import record_interaction
    from atocore.memory.extractor import extract_candidates_from_interaction

    interaction = record_interaction(
        prompt=sample.prompt,
        response=sample.response,
        project=sample.project,
        client="phase9-first-real-use",
        session_id="first-real-use",
        reinforce=True,
    )
    candidates = extract_candidates_from_interaction(interaction)

    return {
        "label": sample.label,
        "project": sample.project,
        "interaction_id": interaction.id,
        "expected_notes": sample.notes_for_human,
        "candidate_count": len(candidates),
        "candidates": [
            {
                "memory_type": c.memory_type,
                "rule": c.rule,
                "content": c.content,
                "source_span": c.source_span[:120],
            }
            for c in candidates
        ],
    }


def report_seed_memory_state(seeded_ids: dict[str, str]) -> dict:
    from atocore.memory.service import get_memories

    state = {}
    for label, mid in seeded_ids.items():
        rows = [m for m in get_memories(limit=200) if m.id == mid]
        if not rows:
            state[label] = None
            continue
        m = rows[0]
        state[label] = {
            "id": m.id,
            "memory_type": m.memory_type,
            "content_preview": m.content[:80],
            "confidence": round(m.confidence, 4),
            "reference_count": m.reference_count,
            "last_referenced_at": m.last_referenced_at,
        }
    return state


def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--data-dir",
        default=str(_REPO_ROOT / "data" / "validation" / "phase9-first-use"),
        help="Isolated data directory to use for this validation run",
    )
    parser.add_argument(
        "--json",
        action="store_true",
        help="Emit machine-readable JSON instead of human prose",
    )
    args = parser.parse_args()

    data_dir = Path(args.data_dir).resolve()
    setup_environment(data_dir)

    from atocore.models.database import init_db
    from atocore.context.project_state import init_project_state_schema

    init_db()
    init_project_state_schema()

    seeded = seed_memories()
    sample_results = [run_sample(s) for s in SAMPLES]
    final_seed_state = report_seed_memory_state(seeded)

    if args.json:
        json.dump(
            {
                "data_dir": str(data_dir),
                "seeded_memories_initial": list(seeded.keys()),
                "samples": sample_results,
                "seed_memory_state_after_run": final_seed_state,
            },
            sys.stdout,
            indent=2,
            default=str,
        )
        return 0

    print("=" * 78)
    print("Phase 9 first-real-use validation run")
    print("=" * 78)
    print(f"Isolated data dir: {data_dir}")
    print()
    print("Seeded the memory store with 3 active memories:")
    for label, mid in seeded.items():
        print(f"  - {label}  ({mid[:8]})")
    print()
    print("-" * 78)
    print(f"Running {len(SAMPLES)} sample interactions ...")
    print("-" * 78)

    for result in sample_results:
        print()
        print(f"## {result['label']}  [project={result['project']}]")
        print(f"   interaction_id={result['interaction_id'][:8]}")
        print(f"   expected: {result['expected_notes']}")
        print(f"   candidates produced: {result['candidate_count']}")
        for i, cand in enumerate(result["candidates"], 1):
            print(
                f"     [{i}] type={cand['memory_type']:11s} "
                f"rule={cand['rule']:21s} "
                f"content={cand['content']!r}"
            )

    print()
    print("-" * 78)
    print("Reinforcement state on seeded memories AFTER all interactions:")
    print("-" * 78)
    for label, state in final_seed_state.items():
        if state is None:
            print(f"  {label}: <missing>")
            continue
        print(
            f"  {label}: confidence={state['confidence']:.4f}  "
            f"refs={state['reference_count']}  "
            f"last={state['last_referenced_at'] or '-'}"
        )

    print()
    print("=" * 78)
    print("Run complete. Data written to:", data_dir)
    print("=" * 78)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())