From 3921c5ffc76eb48cabd668ab25cee0aa90226999 Mon Sep 17 00:00:00 2001 From: Anto01 Date: Sun, 12 Apr 2026 06:32:47 -0400 Subject: [PATCH] =?UTF-8?q?test:=20Day=206=20=E2=80=94=20retrieval=20harne?= =?UTF-8?q?ss=20expanded=20from=206=20to=2018=20fixtures?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added 12 new fixtures across all three active projects: - p04: 1 short/ambiguous case ('current status') - p05: 1 CGH calibration case with cross-project bleed guard - p06: 7 new fixtures targeting triage-promoted memories (firmware interface, z-axis, cam encoder, telemetry rate, offline design, USB SSD, Tailscale) - Adversarial: cross-project-no-bleed (p04 query must not surface p06 telemetry rate), no-project-hint (project memories must not appear without a hint) First run: 14/18 passing. 4 failures (p06-firmware-interface, p06-z-axis, p06-offline-design, p06-tailscale) share the same root cause: long pre-existing p06 memories (530+ chars, confidence 0.9+) fill the 25% project-memory budget before the query-relevant newly-promoted memories (shorter, confidence 0.5) get a slot. Budget contention at equal overlap score tiebroken by confidence. Day 7 ranking tweak target. Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/retrieval_eval_fixtures.json | 147 ++++++++++++++++++++++++++- 1 file changed, 143 insertions(+), 4 deletions(-) diff --git a/scripts/retrieval_eval_fixtures.json b/scripts/retrieval_eval_fixtures.json index 0e50572..d40f568 100644 --- a/scripts/retrieval_eval_fixtures.json +++ b/scripts/retrieval_eval_fixtures.json @@ -13,7 +13,7 @@ "p06-polisher", "folded-beam" ], - "notes": "Canonical p04 decision — should surface both Trusted Project State (selected_mirror_architecture) and the project-memory band with the Option B memory" + "notes": "Canonical p04 decision — should surface both Trusted Project State and the project-memory band" }, { "name": "p04-constraints", @@ -27,7 +27,17 @@ "expect_absent": [ "polisher suite" ], - "notes": "Key constraints are in Trusted Project State (key_constraints) and in the mission-framing memory" + "notes": "Key constraints are in Trusted Project State and in the mission-framing memory" + }, + { + "name": "p04-short-ambiguous", + "project": "p04-gigabit", + "prompt": "current status", + "expect_present": [ + "--- Trusted Project State ---" + ], + "expect_absent": [], + "notes": "Short ambiguous prompt — at minimum project state should surface. Hard case: the prompt is generic enough that chunks may not rank well." }, { "name": "p05-configuration", @@ -42,7 +52,7 @@ "conical back", "polisher suite" ], - "notes": "P05 architecture memory covers folded-beam + CGH. GigaBIT M1 is the mirror under test and legitimately appears in p05 source docs (the interferometer measures it), so we only flag genuinely p04-only decisions like the mirror architecture choice." + "notes": "P05 architecture memory covers folded-beam + CGH. GigaBIT M1 legitimately appears in p05 source docs." }, { "name": "p05-vendor-signal", @@ -57,6 +67,19 @@ ], "notes": "Vendor memory mentions 4D as strongest technical candidate and Zygo Verifire SV as value path" }, + { + "name": "p05-cgh-calibration", + "project": "p05-interferometer", + "prompt": "how does CGH calibration work for the interferometer", + "expect_present": [ + "CGH" + ], + "expect_absent": [ + "polisher-sim", + "polisher-post" + ], + "notes": "CGH is a core p05 concept. Should surface via chunks and possibly the architecture memory. Must not bleed p06 polisher-suite terms." + }, { "name": "p06-suite-split", "project": "p06-polisher", @@ -69,7 +92,7 @@ "expect_absent": [ "GigaBIT" ], - "notes": "The three-layer split is in multiple p06 memories; check all three names surface together" + "notes": "The three-layer split is in multiple p06 memories" }, { "name": "p06-control-rule", @@ -82,5 +105,121 @@ "interferometer" ], "notes": "Control design rule memory mentions interlocks and state transitions" + }, + { + "name": "p06-firmware-interface", + "project": "p06-polisher", + "prompt": "what is the firmware interface contract for the polisher machine", + "expect_present": [ + "controller-job" + ], + "expect_absent": [ + "interferometer", + "GigaBIT" + ], + "notes": "New p06 memory from the first triage: firmware interface contract is invariant controller-job.v1 in, run-log.v1 out" + }, + { + "name": "p06-z-axis", + "project": "p06-polisher", + "prompt": "how does the polisher Z-axis work", + "expect_present": [ + "engage" + ], + "expect_absent": [ + "interferometer" + ], + "notes": "New p06 memory: Z-axis is binary engage/retract, not continuous position. The word 'engage' should appear." + }, + { + "name": "p06-cam-mechanism", + "project": "p06-polisher", + "prompt": "how is cam amplitude controlled on the polisher", + "expect_present": [ + "encoder" + ], + "expect_absent": [ + "GigaBIT" + ], + "notes": "New p06 memory: cam set mechanically by operator, read by encoders. The word 'encoder' should appear." + }, + { + "name": "p06-telemetry-rate", + "project": "p06-polisher", + "prompt": "what is the expected polishing telemetry data rate", + "expect_present": [ + "29 MB" + ], + "expect_absent": [ + "interferometer" + ], + "notes": "New p06 knowledge memory: approximately 29 MB per hour at 100 Hz" + }, + { + "name": "p06-offline-design", + "project": "p06-polisher", + "prompt": "does the polisher machine need network to operate", + "expect_present": [ + "offline" + ], + "expect_absent": [ + "CGH" + ], + "notes": "New p06 memory: machine works fully offline and independently; network is for remote access only" + }, + { + "name": "p06-short-ambiguous", + "project": "p06-polisher", + "prompt": "current status", + "expect_present": [ + "--- Trusted Project State ---" + ], + "expect_absent": [], + "notes": "Short ambiguous prompt — project state should surface at minimum" + }, + { + "name": "cross-project-no-bleed", + "project": "p04-gigabit", + "prompt": "what telemetry rate should we target", + "expect_present": [], + "expect_absent": [ + "29 MB", + "polisher" + ], + "notes": "Adversarial: telemetry rate is a p06 fact. A p04 query for 'telemetry rate' must NOT surface p06 memories. Tests cross-project gating." + }, + { + "name": "no-project-hint", + "project": "", + "prompt": "tell me about the current projects", + "expect_present": [], + "expect_absent": [ + "--- Project Memories ---" + ], + "notes": "Without a project hint, project memories must not appear (cross-project bleed guard). Chunks may appear if any match." + }, + { + "name": "p06-usb-ssd", + "project": "p06-polisher", + "prompt": "what storage solution is specified for the polisher RPi", + "expect_present": [ + "USB SSD" + ], + "expect_absent": [ + "interferometer" + ], + "notes": "New p06 memory from triage: USB SSD mandatory, not SD card" + }, + { + "name": "p06-tailscale", + "project": "p06-polisher", + "prompt": "how do we access the polisher machine remotely", + "expect_present": [ + "Tailscale" + ], + "expect_absent": [ + "GigaBIT" + ], + "notes": "New p06 memory: Tailscale mesh for RPi remote access" } ]