fix(context): rank trusted state by query relevance
This commit is contained in:
@@ -27,8 +27,7 @@
|
|||||||
"expect_absent": [
|
"expect_absent": [
|
||||||
"polisher suite"
|
"polisher suite"
|
||||||
],
|
],
|
||||||
"known_issue": true,
|
"notes": "Regression guard: query-relevant Trusted Project State requirements must survive the project-state budget cap."
|
||||||
"notes": "Known content gap as of 2026-04-24: live retrieval surfaces related constraints but not the exact Zerodur / 1.2 strings. Keep visible, but do not make nightly harness red until the source/state gap is fixed."
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "p04-short-ambiguous",
|
"name": "p04-short-ambiguous",
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from dataclasses import dataclass, field
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import atocore.config as _config
|
import atocore.config as _config
|
||||||
from atocore.context.project_state import format_project_state, get_state
|
from atocore.context.project_state import ProjectStateEntry, format_project_state, get_state
|
||||||
from atocore.memory.service import get_memories_for_context
|
from atocore.memory.service import get_memories_for_context
|
||||||
from atocore.observability.logger import get_logger
|
from atocore.observability.logger import get_logger
|
||||||
from atocore.engineering.service import get_entities, get_entity_with_context
|
from atocore.engineering.service import get_entities, get_entity_with_context
|
||||||
@@ -116,6 +116,11 @@ def build_context(
|
|||||||
if canonical_project:
|
if canonical_project:
|
||||||
state_entries = get_state(canonical_project)
|
state_entries = get_state(canonical_project)
|
||||||
if state_entries:
|
if state_entries:
|
||||||
|
state_entries = _rank_project_state_entries(
|
||||||
|
state_entries,
|
||||||
|
query=user_prompt,
|
||||||
|
project=canonical_project,
|
||||||
|
)
|
||||||
project_state_text = format_project_state(state_entries)
|
project_state_text = format_project_state(state_entries)
|
||||||
project_state_text, project_state_chars = _truncate_text_block(
|
project_state_text, project_state_chars = _truncate_text_block(
|
||||||
project_state_text,
|
project_state_text,
|
||||||
@@ -284,6 +289,55 @@ def get_last_context_pack() -> ContextPack | None:
|
|||||||
return _last_context_pack
|
return _last_context_pack
|
||||||
|
|
||||||
|
|
||||||
|
def _rank_project_state_entries(
|
||||||
|
entries: list[ProjectStateEntry],
|
||||||
|
query: str,
|
||||||
|
project: str,
|
||||||
|
) -> list[ProjectStateEntry]:
|
||||||
|
"""Promote query-relevant trusted state before the state band is truncated."""
|
||||||
|
if not query or len(entries) <= 1:
|
||||||
|
return entries
|
||||||
|
|
||||||
|
from atocore.memory.reinforcement import _normalize, _tokenize
|
||||||
|
|
||||||
|
query_text = _normalize(query.replace("_", " "))
|
||||||
|
query_tokens = set(_tokenize(query_text))
|
||||||
|
query_tokens -= {
|
||||||
|
"how",
|
||||||
|
"what",
|
||||||
|
"when",
|
||||||
|
"where",
|
||||||
|
"which",
|
||||||
|
"who",
|
||||||
|
"why",
|
||||||
|
"current",
|
||||||
|
"status",
|
||||||
|
"project",
|
||||||
|
}
|
||||||
|
for part in (project or "").lower().replace("_", "-").split("-"):
|
||||||
|
query_tokens.discard(part)
|
||||||
|
if not query_tokens:
|
||||||
|
return entries
|
||||||
|
|
||||||
|
scored: list[tuple[int, float, float, int, ProjectStateEntry]] = []
|
||||||
|
for index, entry in enumerate(entries):
|
||||||
|
entry_text = " ".join(
|
||||||
|
[
|
||||||
|
entry.category,
|
||||||
|
entry.key.replace("_", " "),
|
||||||
|
entry.value,
|
||||||
|
entry.source,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
entry_tokens = _tokenize(_normalize(entry_text))
|
||||||
|
overlap = len(entry_tokens & query_tokens) if entry_tokens else 0
|
||||||
|
density = overlap / len(entry_tokens) if entry_tokens else 0.0
|
||||||
|
scored.append((overlap, density, entry.confidence, -index, entry))
|
||||||
|
|
||||||
|
scored.sort(key=lambda item: (item[0], item[1], item[2], item[3]), reverse=True)
|
||||||
|
return [entry for _, _, _, _, entry in scored]
|
||||||
|
|
||||||
|
|
||||||
def _rank_chunks(
|
def _rank_chunks(
|
||||||
candidates: list[ChunkResult],
|
candidates: list[ChunkResult],
|
||||||
project_hint: str | None,
|
project_hint: str | None,
|
||||||
|
|||||||
@@ -143,6 +143,52 @@ def test_project_state_respects_total_budget(tmp_data_dir, sample_markdown):
|
|||||||
assert len(pack.formatted_context) <= 120
|
assert len(pack.formatted_context) <= 120
|
||||||
|
|
||||||
|
|
||||||
|
def test_project_state_query_relevance_before_truncation(tmp_data_dir, sample_markdown):
|
||||||
|
"""Relevant trusted state should survive the project-state budget cap."""
|
||||||
|
init_db()
|
||||||
|
init_project_state_schema()
|
||||||
|
ingest_file(sample_markdown)
|
||||||
|
|
||||||
|
set_state(
|
||||||
|
"p04-gigabit",
|
||||||
|
"contact",
|
||||||
|
"abb-space",
|
||||||
|
"ABB Space is the primary vendor contact for polishing, CCP, IBF, procurement coordination, "
|
||||||
|
"contract administration, interface planning, and delivery discussions.",
|
||||||
|
)
|
||||||
|
set_state(
|
||||||
|
"p04-gigabit",
|
||||||
|
"decision",
|
||||||
|
"back-structure",
|
||||||
|
"Option B selected: conical isogrid back structure with variable rib density. "
|
||||||
|
"Chosen over flat-back for stiffness-to-weight ratio and manufacturability.",
|
||||||
|
)
|
||||||
|
set_state(
|
||||||
|
"p04-gigabit",
|
||||||
|
"decision",
|
||||||
|
"polishing-vendor",
|
||||||
|
"ABB Space selected as polishing vendor. Contract includes computer-controlled polishing "
|
||||||
|
"and ion beam figuring.",
|
||||||
|
)
|
||||||
|
set_state(
|
||||||
|
"p04-gigabit",
|
||||||
|
"requirement",
|
||||||
|
"key_constraints",
|
||||||
|
"The program targets a 1.2 m lightweight Zerodur mirror with filtered mechanical WFE below 15 nm "
|
||||||
|
"and mass below 103.5 kg.",
|
||||||
|
)
|
||||||
|
|
||||||
|
pack = build_context(
|
||||||
|
"what are the key GigaBIT M1 program constraints",
|
||||||
|
project_hint="p04-gigabit",
|
||||||
|
budget=3000,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "Zerodur" in pack.formatted_context
|
||||||
|
assert "1.2" in pack.formatted_context
|
||||||
|
assert pack.formatted_context.find("[REQUIREMENT]") < pack.formatted_context.find("[CONTACT]")
|
||||||
|
|
||||||
|
|
||||||
def test_project_hint_matches_state_case_insensitively(tmp_data_dir, sample_markdown):
|
def test_project_hint_matches_state_case_insensitively(tmp_data_dir, sample_markdown):
|
||||||
"""Project state lookup should not depend on exact casing."""
|
"""Project state lookup should not depend on exact casing."""
|
||||||
init_db()
|
init_db()
|
||||||
|
|||||||
Reference in New Issue
Block a user