From d3de9f67eaa08dfc5b2d86e8221b8c70fef266d3 Mon Sep 17 00:00:00 2001 From: Anto01 Date: Fri, 24 Apr 2026 20:54:56 -0400 Subject: [PATCH] fix(context): rank trusted state by query relevance --- scripts/retrieval_eval_fixtures.json | 3 +- src/atocore/context/builder.py | 56 +++++++++++++++++++++++++++- tests/test_context_builder.py | 46 +++++++++++++++++++++++ 3 files changed, 102 insertions(+), 3 deletions(-) diff --git a/scripts/retrieval_eval_fixtures.json b/scripts/retrieval_eval_fixtures.json index 2379ccb..a5de629 100644 --- a/scripts/retrieval_eval_fixtures.json +++ b/scripts/retrieval_eval_fixtures.json @@ -27,8 +27,7 @@ "expect_absent": [ "polisher suite" ], - "known_issue": true, - "notes": "Known content gap as of 2026-04-24: live retrieval surfaces related constraints but not the exact Zerodur / 1.2 strings. Keep visible, but do not make nightly harness red until the source/state gap is fixed." + "notes": "Regression guard: query-relevant Trusted Project State requirements must survive the project-state budget cap." }, { "name": "p04-short-ambiguous", diff --git a/src/atocore/context/builder.py b/src/atocore/context/builder.py index 5ef4481..f63394b 100644 --- a/src/atocore/context/builder.py +++ b/src/atocore/context/builder.py @@ -11,7 +11,7 @@ from dataclasses import dataclass, field from pathlib import Path import atocore.config as _config -from atocore.context.project_state import format_project_state, get_state +from atocore.context.project_state import ProjectStateEntry, format_project_state, get_state from atocore.memory.service import get_memories_for_context from atocore.observability.logger import get_logger from atocore.engineering.service import get_entities, get_entity_with_context @@ -116,6 +116,11 @@ def build_context( if canonical_project: state_entries = get_state(canonical_project) if state_entries: + state_entries = _rank_project_state_entries( + state_entries, + query=user_prompt, + project=canonical_project, + ) project_state_text = format_project_state(state_entries) project_state_text, project_state_chars = _truncate_text_block( project_state_text, @@ -284,6 +289,55 @@ def get_last_context_pack() -> ContextPack | None: return _last_context_pack +def _rank_project_state_entries( + entries: list[ProjectStateEntry], + query: str, + project: str, +) -> list[ProjectStateEntry]: + """Promote query-relevant trusted state before the state band is truncated.""" + if not query or len(entries) <= 1: + return entries + + from atocore.memory.reinforcement import _normalize, _tokenize + + query_text = _normalize(query.replace("_", " ")) + query_tokens = set(_tokenize(query_text)) + query_tokens -= { + "how", + "what", + "when", + "where", + "which", + "who", + "why", + "current", + "status", + "project", + } + for part in (project or "").lower().replace("_", "-").split("-"): + query_tokens.discard(part) + if not query_tokens: + return entries + + scored: list[tuple[int, float, float, int, ProjectStateEntry]] = [] + for index, entry in enumerate(entries): + entry_text = " ".join( + [ + entry.category, + entry.key.replace("_", " "), + entry.value, + entry.source, + ] + ) + entry_tokens = _tokenize(_normalize(entry_text)) + overlap = len(entry_tokens & query_tokens) if entry_tokens else 0 + density = overlap / len(entry_tokens) if entry_tokens else 0.0 + scored.append((overlap, density, entry.confidence, -index, entry)) + + scored.sort(key=lambda item: (item[0], item[1], item[2], item[3]), reverse=True) + return [entry for _, _, _, _, entry in scored] + + def _rank_chunks( candidates: list[ChunkResult], project_hint: str | None, diff --git a/tests/test_context_builder.py b/tests/test_context_builder.py index 24fd1b5..9a14945 100644 --- a/tests/test_context_builder.py +++ b/tests/test_context_builder.py @@ -143,6 +143,52 @@ def test_project_state_respects_total_budget(tmp_data_dir, sample_markdown): assert len(pack.formatted_context) <= 120 +def test_project_state_query_relevance_before_truncation(tmp_data_dir, sample_markdown): + """Relevant trusted state should survive the project-state budget cap.""" + init_db() + init_project_state_schema() + ingest_file(sample_markdown) + + set_state( + "p04-gigabit", + "contact", + "abb-space", + "ABB Space is the primary vendor contact for polishing, CCP, IBF, procurement coordination, " + "contract administration, interface planning, and delivery discussions.", + ) + set_state( + "p04-gigabit", + "decision", + "back-structure", + "Option B selected: conical isogrid back structure with variable rib density. " + "Chosen over flat-back for stiffness-to-weight ratio and manufacturability.", + ) + set_state( + "p04-gigabit", + "decision", + "polishing-vendor", + "ABB Space selected as polishing vendor. Contract includes computer-controlled polishing " + "and ion beam figuring.", + ) + set_state( + "p04-gigabit", + "requirement", + "key_constraints", + "The program targets a 1.2 m lightweight Zerodur mirror with filtered mechanical WFE below 15 nm " + "and mass below 103.5 kg.", + ) + + pack = build_context( + "what are the key GigaBIT M1 program constraints", + project_hint="p04-gigabit", + budget=3000, + ) + + assert "Zerodur" in pack.formatted_context + assert "1.2" in pack.formatted_context + assert pack.formatted_context.find("[REQUIREMENT]") < pack.formatted_context.find("[CONTACT]") + + def test_project_hint_matches_state_case_insensitively(tmp_data_dir, sample_markdown): """Project state lookup should not depend on exact casing.""" init_db()