fix(context): rank trusted state by query relevance
This commit is contained in:
@@ -27,8 +27,7 @@
|
||||
"expect_absent": [
|
||||
"polisher suite"
|
||||
],
|
||||
"known_issue": true,
|
||||
"notes": "Known content gap as of 2026-04-24: live retrieval surfaces related constraints but not the exact Zerodur / 1.2 strings. Keep visible, but do not make nightly harness red until the source/state gap is fixed."
|
||||
"notes": "Regression guard: query-relevant Trusted Project State requirements must survive the project-state budget cap."
|
||||
},
|
||||
{
|
||||
"name": "p04-short-ambiguous",
|
||||
|
||||
@@ -11,7 +11,7 @@ from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
import atocore.config as _config
|
||||
from atocore.context.project_state import format_project_state, get_state
|
||||
from atocore.context.project_state import ProjectStateEntry, format_project_state, get_state
|
||||
from atocore.memory.service import get_memories_for_context
|
||||
from atocore.observability.logger import get_logger
|
||||
from atocore.engineering.service import get_entities, get_entity_with_context
|
||||
@@ -116,6 +116,11 @@ def build_context(
|
||||
if canonical_project:
|
||||
state_entries = get_state(canonical_project)
|
||||
if state_entries:
|
||||
state_entries = _rank_project_state_entries(
|
||||
state_entries,
|
||||
query=user_prompt,
|
||||
project=canonical_project,
|
||||
)
|
||||
project_state_text = format_project_state(state_entries)
|
||||
project_state_text, project_state_chars = _truncate_text_block(
|
||||
project_state_text,
|
||||
@@ -284,6 +289,55 @@ def get_last_context_pack() -> ContextPack | None:
|
||||
return _last_context_pack
|
||||
|
||||
|
||||
def _rank_project_state_entries(
|
||||
entries: list[ProjectStateEntry],
|
||||
query: str,
|
||||
project: str,
|
||||
) -> list[ProjectStateEntry]:
|
||||
"""Promote query-relevant trusted state before the state band is truncated."""
|
||||
if not query or len(entries) <= 1:
|
||||
return entries
|
||||
|
||||
from atocore.memory.reinforcement import _normalize, _tokenize
|
||||
|
||||
query_text = _normalize(query.replace("_", " "))
|
||||
query_tokens = set(_tokenize(query_text))
|
||||
query_tokens -= {
|
||||
"how",
|
||||
"what",
|
||||
"when",
|
||||
"where",
|
||||
"which",
|
||||
"who",
|
||||
"why",
|
||||
"current",
|
||||
"status",
|
||||
"project",
|
||||
}
|
||||
for part in (project or "").lower().replace("_", "-").split("-"):
|
||||
query_tokens.discard(part)
|
||||
if not query_tokens:
|
||||
return entries
|
||||
|
||||
scored: list[tuple[int, float, float, int, ProjectStateEntry]] = []
|
||||
for index, entry in enumerate(entries):
|
||||
entry_text = " ".join(
|
||||
[
|
||||
entry.category,
|
||||
entry.key.replace("_", " "),
|
||||
entry.value,
|
||||
entry.source,
|
||||
]
|
||||
)
|
||||
entry_tokens = _tokenize(_normalize(entry_text))
|
||||
overlap = len(entry_tokens & query_tokens) if entry_tokens else 0
|
||||
density = overlap / len(entry_tokens) if entry_tokens else 0.0
|
||||
scored.append((overlap, density, entry.confidence, -index, entry))
|
||||
|
||||
scored.sort(key=lambda item: (item[0], item[1], item[2], item[3]), reverse=True)
|
||||
return [entry for _, _, _, _, entry in scored]
|
||||
|
||||
|
||||
def _rank_chunks(
|
||||
candidates: list[ChunkResult],
|
||||
project_hint: str | None,
|
||||
|
||||
@@ -143,6 +143,52 @@ def test_project_state_respects_total_budget(tmp_data_dir, sample_markdown):
|
||||
assert len(pack.formatted_context) <= 120
|
||||
|
||||
|
||||
def test_project_state_query_relevance_before_truncation(tmp_data_dir, sample_markdown):
|
||||
"""Relevant trusted state should survive the project-state budget cap."""
|
||||
init_db()
|
||||
init_project_state_schema()
|
||||
ingest_file(sample_markdown)
|
||||
|
||||
set_state(
|
||||
"p04-gigabit",
|
||||
"contact",
|
||||
"abb-space",
|
||||
"ABB Space is the primary vendor contact for polishing, CCP, IBF, procurement coordination, "
|
||||
"contract administration, interface planning, and delivery discussions.",
|
||||
)
|
||||
set_state(
|
||||
"p04-gigabit",
|
||||
"decision",
|
||||
"back-structure",
|
||||
"Option B selected: conical isogrid back structure with variable rib density. "
|
||||
"Chosen over flat-back for stiffness-to-weight ratio and manufacturability.",
|
||||
)
|
||||
set_state(
|
||||
"p04-gigabit",
|
||||
"decision",
|
||||
"polishing-vendor",
|
||||
"ABB Space selected as polishing vendor. Contract includes computer-controlled polishing "
|
||||
"and ion beam figuring.",
|
||||
)
|
||||
set_state(
|
||||
"p04-gigabit",
|
||||
"requirement",
|
||||
"key_constraints",
|
||||
"The program targets a 1.2 m lightweight Zerodur mirror with filtered mechanical WFE below 15 nm "
|
||||
"and mass below 103.5 kg.",
|
||||
)
|
||||
|
||||
pack = build_context(
|
||||
"what are the key GigaBIT M1 program constraints",
|
||||
project_hint="p04-gigabit",
|
||||
budget=3000,
|
||||
)
|
||||
|
||||
assert "Zerodur" in pack.formatted_context
|
||||
assert "1.2" in pack.formatted_context
|
||||
assert pack.formatted_context.find("[REQUIREMENT]") < pack.formatted_context.find("[CONTACT]")
|
||||
|
||||
|
||||
def test_project_hint_matches_state_case_insensitively(tmp_data_dir, sample_markdown):
|
||||
"""Project state lookup should not depend on exact casing."""
|
||||
init_db()
|
||||
|
||||
Reference in New Issue
Block a user