feat: query-relevance ordering for memory selection
get_memories_for_context now accepts an optional query string.
When provided, candidate memories are reranked by lexical overlap
with the query (stemmed token intersection, ties broken by
confidence) before the budget walk. Without a query the order is
unchanged — effectively "by confidence desc" as before — so
non-builder callers see no behaviour change.
The fetch limit is raised from 10 to 30 so there's a real pool to
rerank. Token overlap reuses _normalize/_tokenize from
reinforcement.py so ranking and reinforcement matching share the
same notion of distinctive terms.
build_context passes the user_prompt through to both the identity/
preference and project-memory calls. The retrieval harness
regression the fix is targeting:
- p05-vendor-signal FAIL @ 1161645: "Zygo" missing from the pack
even though an active vendor memory contained it. Root cause:
higher-confidence p05 memories filled the 25% budget slice
before the vendor memory ever got a chance. Query-aware ordering
puts the vendor memory first when the query is about vendors.
New regression test test_project_memories_query_relevance_ordering
locks the behaviour in with two p05 memories and a tight budget.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -115,6 +115,7 @@ def build_context(
|
||||
memory_text, memory_chars = get_memories_for_context(
|
||||
memory_types=["identity", "preference"],
|
||||
budget=memory_budget,
|
||||
query=user_prompt,
|
||||
)
|
||||
|
||||
# 2b. Get project-scoped memories (third precedence). Only
|
||||
@@ -135,6 +136,7 @@ def build_context(
|
||||
budget=project_memory_budget,
|
||||
header="--- Project Memories ---",
|
||||
footer="--- End Project Memories ---",
|
||||
query=user_prompt,
|
||||
)
|
||||
|
||||
# 3. Calculate remaining budget for retrieval
|
||||
|
||||
@@ -346,6 +346,7 @@ def get_memories_for_context(
|
||||
budget: int = 500,
|
||||
header: str = "--- AtoCore Memory ---",
|
||||
footer: str = "--- End Memory ---",
|
||||
query: str | None = None,
|
||||
) -> tuple[str, int]:
|
||||
"""Get formatted memories for context injection.
|
||||
|
||||
@@ -357,6 +358,12 @@ def get_memories_for_context(
|
||||
The caller can override ``header`` / ``footer`` to distinguish
|
||||
multiple memory blocks in the same pack (e.g. identity/preference
|
||||
vs project/knowledge memories).
|
||||
|
||||
When ``query`` is provided, candidates within each memory type
|
||||
are ranked by lexical overlap against the query (stemmed token
|
||||
intersection, ties broken by confidence). Without a query,
|
||||
candidates fall through in the order ``get_memories`` returns
|
||||
them — which is effectively "by confidence desc".
|
||||
"""
|
||||
if memory_types is None:
|
||||
memory_types = ["identity", "preference"]
|
||||
@@ -371,17 +378,34 @@ def get_memories_for_context(
|
||||
selected_entries: list[str] = []
|
||||
used = 0
|
||||
|
||||
# Pre-tokenize the query once. ``_score_memory_for_query`` is a
|
||||
# free function below that reuses the reinforcement tokenizer so
|
||||
# lexical scoring here matches the reinforcement matcher.
|
||||
query_tokens: set[str] | None = None
|
||||
if query:
|
||||
from atocore.memory.reinforcement import _normalize, _tokenize
|
||||
|
||||
query_tokens = _tokenize(_normalize(query))
|
||||
if not query_tokens:
|
||||
query_tokens = None
|
||||
|
||||
# Flat budget across types so paragraph-length project memories
|
||||
# aren't starved by an even slice. Types are still walked in order
|
||||
# (identity/preference first when they're the input), so earlier
|
||||
# types still get first pick when the budget is tight.
|
||||
for mtype in memory_types:
|
||||
for mem in get_memories(
|
||||
# Raise the fetch limit above the budget slice so query-relevance
|
||||
# ordering has a real pool to rerank. Without a query, the extras
|
||||
# just fall off the end harmlessly.
|
||||
candidates = get_memories(
|
||||
memory_type=mtype,
|
||||
project=project,
|
||||
min_confidence=0.5,
|
||||
limit=10,
|
||||
):
|
||||
limit=30,
|
||||
)
|
||||
if query_tokens is not None:
|
||||
candidates = _rank_memories_for_query(candidates, query_tokens)
|
||||
for mem in candidates:
|
||||
entry = f"[{mem.memory_type}] {mem.content}"
|
||||
entry_len = len(entry) + 1
|
||||
if entry_len > available - used:
|
||||
@@ -399,6 +423,28 @@ def get_memories_for_context(
|
||||
return text, len(text)
|
||||
|
||||
|
||||
def _rank_memories_for_query(
|
||||
memories: list["Memory"],
|
||||
query_tokens: set[str],
|
||||
) -> list["Memory"]:
|
||||
"""Rerank a memory list by lexical overlap with a pre-tokenized query.
|
||||
|
||||
Ordering key: (overlap_count DESC, confidence DESC). When a query
|
||||
shares no tokens with a memory, overlap is zero and confidence
|
||||
acts as the sole tiebreaker — which matches the pre-query
|
||||
behaviour and keeps no-query calls stable.
|
||||
"""
|
||||
from atocore.memory.reinforcement import _normalize, _tokenize
|
||||
|
||||
scored: list[tuple[int, float, Memory]] = []
|
||||
for mem in memories:
|
||||
mem_tokens = _tokenize(_normalize(mem.content))
|
||||
overlap = len(mem_tokens & query_tokens) if mem_tokens else 0
|
||||
scored.append((overlap, mem.confidence, mem))
|
||||
scored.sort(key=lambda t: (t[0], t[1]), reverse=True)
|
||||
return [mem for _, _, mem in scored]
|
||||
|
||||
|
||||
def _row_to_memory(row) -> Memory:
|
||||
"""Convert a DB row to Memory dataclass."""
|
||||
keys = row.keys() if hasattr(row, "keys") else []
|
||||
|
||||
@@ -308,3 +308,41 @@ def test_project_memories_absent_without_project_hint(tmp_data_dir, sample_markd
|
||||
pack = build_context("tell me something", budget=3000)
|
||||
assert "--- Project Memories ---" not in pack.formatted_context
|
||||
assert pack.project_memory_chars == 0
|
||||
|
||||
|
||||
def test_project_memories_query_relevance_ordering(tmp_data_dir, sample_markdown):
|
||||
"""When the budget only fits one memory, query-relevance ordering
|
||||
should pick the one the query is actually about — even if another
|
||||
memory has higher confidence.
|
||||
|
||||
Regression for the 2026-04-11 p05-vendor-signal harness failure:
|
||||
memory selection was fixed-order by confidence, so a lower-ranked
|
||||
vendor memory got starved out of the budget when a query was
|
||||
specifically about vendors.
|
||||
"""
|
||||
from atocore.memory.service import create_memory
|
||||
|
||||
init_db()
|
||||
init_project_state_schema()
|
||||
ingest_file(sample_markdown)
|
||||
|
||||
create_memory(
|
||||
memory_type="project",
|
||||
content="the folded-beam interferometer uses a CGH stage and fold mirror",
|
||||
project="p05-interferometer",
|
||||
confidence=0.97,
|
||||
)
|
||||
create_memory(
|
||||
memory_type="knowledge",
|
||||
content="vendor signal: Zygo Verifire SV is the strongest value path for the interferometer",
|
||||
project="p05-interferometer",
|
||||
confidence=0.85,
|
||||
)
|
||||
|
||||
pack = build_context(
|
||||
"what is the current vendor signal for the interferometer",
|
||||
project_hint="p05-interferometer",
|
||||
budget=1200, # tight enough that only one project memory fits
|
||||
)
|
||||
assert "Zygo Verifire SV" in pack.formatted_context
|
||||
assert pack.project_memory_chars > 0
|
||||
|
||||
Reference in New Issue
Block a user