diff --git a/scripts/retrieval_eval_fixtures.json b/scripts/retrieval_eval_fixtures.json index 0e50572..d40f568 100644 --- a/scripts/retrieval_eval_fixtures.json +++ b/scripts/retrieval_eval_fixtures.json @@ -13,7 +13,7 @@ "p06-polisher", "folded-beam" ], - "notes": "Canonical p04 decision — should surface both Trusted Project State (selected_mirror_architecture) and the project-memory band with the Option B memory" + "notes": "Canonical p04 decision — should surface both Trusted Project State and the project-memory band" }, { "name": "p04-constraints", @@ -27,7 +27,17 @@ "expect_absent": [ "polisher suite" ], - "notes": "Key constraints are in Trusted Project State (key_constraints) and in the mission-framing memory" + "notes": "Key constraints are in Trusted Project State and in the mission-framing memory" + }, + { + "name": "p04-short-ambiguous", + "project": "p04-gigabit", + "prompt": "current status", + "expect_present": [ + "--- Trusted Project State ---" + ], + "expect_absent": [], + "notes": "Short ambiguous prompt — at minimum project state should surface. Hard case: the prompt is generic enough that chunks may not rank well." }, { "name": "p05-configuration", @@ -42,7 +52,7 @@ "conical back", "polisher suite" ], - "notes": "P05 architecture memory covers folded-beam + CGH. GigaBIT M1 is the mirror under test and legitimately appears in p05 source docs (the interferometer measures it), so we only flag genuinely p04-only decisions like the mirror architecture choice." + "notes": "P05 architecture memory covers folded-beam + CGH. GigaBIT M1 legitimately appears in p05 source docs." }, { "name": "p05-vendor-signal", @@ -57,6 +67,19 @@ ], "notes": "Vendor memory mentions 4D as strongest technical candidate and Zygo Verifire SV as value path" }, + { + "name": "p05-cgh-calibration", + "project": "p05-interferometer", + "prompt": "how does CGH calibration work for the interferometer", + "expect_present": [ + "CGH" + ], + "expect_absent": [ + "polisher-sim", + "polisher-post" + ], + "notes": "CGH is a core p05 concept. Should surface via chunks and possibly the architecture memory. Must not bleed p06 polisher-suite terms." + }, { "name": "p06-suite-split", "project": "p06-polisher", @@ -69,7 +92,7 @@ "expect_absent": [ "GigaBIT" ], - "notes": "The three-layer split is in multiple p06 memories; check all three names surface together" + "notes": "The three-layer split is in multiple p06 memories" }, { "name": "p06-control-rule", @@ -82,5 +105,121 @@ "interferometer" ], "notes": "Control design rule memory mentions interlocks and state transitions" + }, + { + "name": "p06-firmware-interface", + "project": "p06-polisher", + "prompt": "what is the firmware interface contract for the polisher machine", + "expect_present": [ + "controller-job" + ], + "expect_absent": [ + "interferometer", + "GigaBIT" + ], + "notes": "New p06 memory from the first triage: firmware interface contract is invariant controller-job.v1 in, run-log.v1 out" + }, + { + "name": "p06-z-axis", + "project": "p06-polisher", + "prompt": "how does the polisher Z-axis work", + "expect_present": [ + "engage" + ], + "expect_absent": [ + "interferometer" + ], + "notes": "New p06 memory: Z-axis is binary engage/retract, not continuous position. The word 'engage' should appear." + }, + { + "name": "p06-cam-mechanism", + "project": "p06-polisher", + "prompt": "how is cam amplitude controlled on the polisher", + "expect_present": [ + "encoder" + ], + "expect_absent": [ + "GigaBIT" + ], + "notes": "New p06 memory: cam set mechanically by operator, read by encoders. The word 'encoder' should appear." + }, + { + "name": "p06-telemetry-rate", + "project": "p06-polisher", + "prompt": "what is the expected polishing telemetry data rate", + "expect_present": [ + "29 MB" + ], + "expect_absent": [ + "interferometer" + ], + "notes": "New p06 knowledge memory: approximately 29 MB per hour at 100 Hz" + }, + { + "name": "p06-offline-design", + "project": "p06-polisher", + "prompt": "does the polisher machine need network to operate", + "expect_present": [ + "offline" + ], + "expect_absent": [ + "CGH" + ], + "notes": "New p06 memory: machine works fully offline and independently; network is for remote access only" + }, + { + "name": "p06-short-ambiguous", + "project": "p06-polisher", + "prompt": "current status", + "expect_present": [ + "--- Trusted Project State ---" + ], + "expect_absent": [], + "notes": "Short ambiguous prompt — project state should surface at minimum" + }, + { + "name": "cross-project-no-bleed", + "project": "p04-gigabit", + "prompt": "what telemetry rate should we target", + "expect_present": [], + "expect_absent": [ + "29 MB", + "polisher" + ], + "notes": "Adversarial: telemetry rate is a p06 fact. A p04 query for 'telemetry rate' must NOT surface p06 memories. Tests cross-project gating." + }, + { + "name": "no-project-hint", + "project": "", + "prompt": "tell me about the current projects", + "expect_present": [], + "expect_absent": [ + "--- Project Memories ---" + ], + "notes": "Without a project hint, project memories must not appear (cross-project bleed guard). Chunks may appear if any match." + }, + { + "name": "p06-usb-ssd", + "project": "p06-polisher", + "prompt": "what storage solution is specified for the polisher RPi", + "expect_present": [ + "USB SSD" + ], + "expect_absent": [ + "interferometer" + ], + "notes": "New p06 memory from triage: USB SSD mandatory, not SD card" + }, + { + "name": "p06-tailscale", + "project": "p06-polisher", + "prompt": "how do we access the polisher machine remotely", + "expect_present": [ + "Tailscale" + ], + "expect_absent": [ + "GigaBIT" + ], + "notes": "New p06 memory: Tailscale mesh for RPi remote access" } ]