"""Phase 9 first-real-use validation script. Captures a small set of representative interactions drawn from a real working session, runs the full Phase 9 loop (capture -> reinforce -> extract) over them, and prints what each step produced. The intent is to generate empirical evidence about the extractor's behaviour against prose that wasn't written to make the test pass. Usage: python scripts/phase9_first_real_use.py [--data-dir PATH] The script writes a fresh isolated SQLite + Chroma store under the given data dir (default: ./data/validation/phase9-first-use). The data dir is gitignored so the script can be re-run cleanly. Each interaction is printed with: - the captured interaction id - the reinforcement results (which seeded memories were echoed) - the extraction results (which candidates were proposed and why) - notes on what the extractor MISSED (manually annotated below) The output is intentionally human-readable so the run can be saved as the body of docs/phase9-first-real-use.md. """ from __future__ import annotations import argparse import json import os import shutil import sys from dataclasses import dataclass from pathlib import Path # Make the package importable when run directly from the repo root. _REPO_ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(_REPO_ROOT / "src")) @dataclass class SampleInteraction: label: str prompt: str response: str project: str notes_for_human: str # --------------------------------------------------------------------------- # Sample interactions, paraphrased from real working sessions on AtoCore # itself. Each one is a real-feeling exchange with at least one structural # cue the extractor should catch, plus prose the extractor should NOT # accidentally catch. # --------------------------------------------------------------------------- SAMPLES: list[SampleInteraction] = [ SampleInteraction( label="exdev-mount-merge-decision", project="atocore", prompt="The TMPDIR fix didn't work, the LOCAL_COPY_PATH change didn't work, what now?", response=( "We've been chasing this through three layers without success. " "Time to stop guessing and apply the simple fix Dalidou Claude " "proposed in the very first diagnosis.\n\n" "## Decision: merge the two bind mounts into one\n\n" "Both /srv/storage/repos and /srv/storage/gitea/data live on " "/dev/sdb1, so the move is a same-filesystem rename — instant, " "atomic, no data copy. After the move, the EXDEV class of bug " "becomes structurally impossible because there is only one " "mount inside the container." ), notes_for_human=( "Should fire decision_heading rule. Should NOT fire requirement " "or constraint rules. The 'becomes structurally impossible' line " "is prose, not a candidate." ), ), SampleInteraction( label="ownership-was-the-real-fix", project="atocore", prompt="Still failing after the mount merge. What's actually wrong?", response=( "Same rejection even after the mounts were merged, which kills " "the EXDEV theory. The error 'unable to migrate objects to " "permanent storage' is git's catch-all for any failure in " "tmp_objdir_migrate, and the real errno is being thrown away.\n\n" "## Fact: the actual root cause was ownership, not cross-device link\n\n" "After mv on the host, the moved directory had perms git could " "navigate for some subdirs but not others. Recursive chown " "git:git plus chmod u+rwX,g+rwX fixed it on the first try." ), notes_for_human=( "Should fire fact_heading rule, target type knowledge. The " "'unable to migrate' string is inside prose and should NOT " "trigger any rule." ), ), SampleInteraction( label="memory-vs-entity-canonical-home", project="atocore", prompt="Does a Decision live in memories or entities?", response=( "Both layers exist for different reasons. After thinking through " "the ontology question, I'm landing on a clean rule: one " "canonical home per concept, with explicit graduation when a " "memory eventually deserves structure.\n\n" "## Decision: project, knowledge, and adaptation memory types graduate to entities\n\n" "The identity, preference, and episodic types stay in the memory " "layer because they're personal and unstructured. The other " "three become entities once the engineering layer V1 ships. The " "original memory rows are NEVER deleted — they become frozen " "historical pointers to whichever entity they graduated into." ), notes_for_human=( "Should fire decision_heading rule. Long content (over 100 " "chars) — verifies the max-length cap of 280 doesn't truncate." ), ), SampleInteraction( label="auto-promotion-deferred", project="atocore", prompt="Should the engineering layer auto-promote candidates?", response=( "The trust hierarchy is the whole point of AtoCore. If we let " "the system auto-promote, we trade away the 'bad memory is " "worse than no memory' invariant for a marginal speed gain.\n\n" "## Decision: no auto-promotion in V1, ever\n\n" "The schema and API are designed so policies can be added " "later without migration, but the V1 stance is that every " "promotion needs a human signoff. The hard line: nothing ever " "moves into trusted project state automatically, even with the " "engineering layer running." ), notes_for_human=( "Should fire decision_heading rule. Has 'invariant' jargon — " "verify the extractor doesn't get distracted." ), ), SampleInteraction( label="preference-rebase-workflow", project="atocore", prompt="Should we use rebase or merge commits for this branch?", response=( "I prefer rebase-based workflows because the history stays " "linear and reviewers have an easier time. For the dalidou " "storage-foundation branch we'll rebase before merging into " "main so the timeline reads as one coherent narrative." ), notes_for_human=( "Should fire preference_sentence rule. Note: the rule looks " "for 'I prefer' which is exactly the wording used." ), ), SampleInteraction( label="constraint-from-doc-cite", project="p05-interferometer", prompt="What's the budget situation for p05 right now?", response=( "Looking at the latest design notes, the constraint is fairly " "tight on the optical path side.\n\n" "### Constraint: total optical path stability must stay within 10 nm RMS at 1 kHz\n\n" "The error budget allocates 6 nm to the laser source and 4 nm " "to the mechanical mounts and frame combined." ), notes_for_human=( "Should fire constraint_heading rule with type=project. The " "second sentence ('error budget allocates 6 nm') is descriptive " "and should NOT fire any rule because it doesn't have a " "structural cue." ), ), SampleInteraction( label="prose-only-no-cues", project="atocore", prompt="What should I work on next?", response=( "Looking at the current state of AtoCore, the next natural " "step is to validate Phase 9 against real interactions before " "starting the engineering layer implementation. Empirical " "evidence is missing and the planning docs would benefit from " "real signal." ), notes_for_human=( "Should produce ZERO candidates. Pure recommendation prose, no " "structural cues. If this fires anything the extractor is too " "loose." ), ), SampleInteraction( label="multiple-cues-in-one-interaction", project="p06-polisher", prompt="Summarize today's polisher session", response=( "We worked through three things in the polisher session today.\n\n" "## Decision: defer the laser interlock redesign to after the July milestone\n\n" "## Constraint: the calibration routine must complete in under 90 seconds for production use\n\n" "## Requirement: the polisher must hold position to within 0.5 micron at 1 g loading\n\n" "Action items captured for the next sync." ), notes_for_human=( "Three rules should fire on the same interaction: " "decision_heading -> adaptation, constraint_heading -> project, " "requirement_heading -> project. Verify dedup doesn't merge them." ), ), ] def setup_environment(data_dir: Path) -> None: """Configure AtoCore to use an isolated data directory for this run.""" if data_dir.exists(): shutil.rmtree(data_dir) data_dir.mkdir(parents=True, exist_ok=True) os.environ["ATOCORE_DATA_DIR"] = str(data_dir) os.environ.setdefault("ATOCORE_DEBUG", "true") # Reset cached settings so the new env vars take effect import atocore.config as config config.settings = config.Settings() import atocore.retrieval.vector_store as vs vs._store = None def seed_memories() -> dict[str, str]: """Insert a small set of seed active memories so reinforcement has something to match against.""" from atocore.memory.service import create_memory seeded: dict[str, str] = {} seeded["pref_rebase"] = create_memory( memory_type="preference", content="prefers rebase-based workflows because history stays linear", confidence=0.6, ).id seeded["pref_concise"] = create_memory( memory_type="preference", content="writes commit messages focused on the why, not the what", confidence=0.6, ).id seeded["identity_runs_atocore"] = create_memory( memory_type="identity", content="mechanical engineer who runs AtoCore for context engineering", confidence=0.9, ).id return seeded def run_sample(sample: SampleInteraction) -> dict: """Capture one sample, run extraction, return a result dict.""" from atocore.interactions.service import record_interaction from atocore.memory.extractor import extract_candidates_from_interaction interaction = record_interaction( prompt=sample.prompt, response=sample.response, project=sample.project, client="phase9-first-real-use", session_id="first-real-use", reinforce=True, ) candidates = extract_candidates_from_interaction(interaction) return { "label": sample.label, "project": sample.project, "interaction_id": interaction.id, "expected_notes": sample.notes_for_human, "candidate_count": len(candidates), "candidates": [ { "memory_type": c.memory_type, "rule": c.rule, "content": c.content, "source_span": c.source_span[:120], } for c in candidates ], } def report_seed_memory_state(seeded_ids: dict[str, str]) -> dict: from atocore.memory.service import get_memories state = {} for label, mid in seeded_ids.items(): rows = [m for m in get_memories(limit=200) if m.id == mid] if not rows: state[label] = None continue m = rows[0] state[label] = { "id": m.id, "memory_type": m.memory_type, "content_preview": m.content[:80], "confidence": round(m.confidence, 4), "reference_count": m.reference_count, "last_referenced_at": m.last_referenced_at, } return state def main() -> int: parser = argparse.ArgumentParser() parser.add_argument( "--data-dir", default=str(_REPO_ROOT / "data" / "validation" / "phase9-first-use"), help="Isolated data directory to use for this validation run", ) parser.add_argument( "--json", action="store_true", help="Emit machine-readable JSON instead of human prose", ) args = parser.parse_args() data_dir = Path(args.data_dir).resolve() setup_environment(data_dir) from atocore.models.database import init_db from atocore.context.project_state import init_project_state_schema init_db() init_project_state_schema() seeded = seed_memories() sample_results = [run_sample(s) for s in SAMPLES] final_seed_state = report_seed_memory_state(seeded) if args.json: json.dump( { "data_dir": str(data_dir), "seeded_memories_initial": list(seeded.keys()), "samples": sample_results, "seed_memory_state_after_run": final_seed_state, }, sys.stdout, indent=2, default=str, ) return 0 print("=" * 78) print("Phase 9 first-real-use validation run") print("=" * 78) print(f"Isolated data dir: {data_dir}") print() print("Seeded the memory store with 3 active memories:") for label, mid in seeded.items(): print(f" - {label} ({mid[:8]})") print() print("-" * 78) print(f"Running {len(SAMPLES)} sample interactions ...") print("-" * 78) for result in sample_results: print() print(f"## {result['label']} [project={result['project']}]") print(f" interaction_id={result['interaction_id'][:8]}") print(f" expected: {result['expected_notes']}") print(f" candidates produced: {result['candidate_count']}") for i, cand in enumerate(result["candidates"], 1): print( f" [{i}] type={cand['memory_type']:11s} " f"rule={cand['rule']:21s} " f"content={cand['content']!r}" ) print() print("-" * 78) print("Reinforcement state on seeded memories AFTER all interactions:") print("-" * 78) for label, state in final_seed_state.items(): if state is None: print(f" {label}: ") continue print( f" {label}: confidence={state['confidence']:.4f} " f"refs={state['reference_count']} " f"last={state['last_referenced_at'] or '-'}" ) print() print("=" * 78) print("Run complete. Data written to:", data_dir) print("=" * 78) return 0 if __name__ == "__main__": raise SystemExit(main())