feat: query-relevance ordering for memory selection

get_memories_for_context now accepts an optional query string. When provided, candidate memories are reranked by lexical overlap with the query (stemmed token intersection, ties broken by confidence) before the budget walk. Without a query the order is unchanged — effectively "by confidence desc" as before — so non-builder callers see no behaviour change. The fetch limit is raised from 10 to 30 so there's a real pool to rerank. Token overlap reuses _normalize/_tokenize from reinforcement.py so ranking and reinforcement matching share the same notion of distinctive terms. build_context passes the user_prompt through to both the identity/ preference and project-memory calls. The retrieval harness regression the fix is targeting: - p05-vendor-signal FAIL @ 1161645: "Zygo" missing from the pack even though an active vendor memory contained it. Root cause: higher-confidence p05 memories filled the 25% budget slice before the vendor memory ever got a chance. Query-aware ordering puts the vendor memory first when the query is about vendors. New regression test test_project_memories_query_relevance_ordering locks the behaviour in with two p05 memories and a tight budget. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-11 12:47:05 -04:00
parent 4da81c9e4e
commit 5aeeb1cad1
3 changed files with 89 additions and 3 deletions
--- a/src/atocore/memory/service.py
+++ b/src/atocore/memory/service.py
@@ -346,6 +346,7 @@ def get_memories_for_context(
    budget: int = 500,
    header: str = "--- AtoCore Memory ---",
    footer: str = "--- End Memory ---",
+    query: str | None = None,
 ) -> tuple[str, int]:
    """Get formatted memories for context injection.

@@ -357,6 +358,12 @@ def get_memories_for_context(
    The caller can override ``header`` / ``footer`` to distinguish
    multiple memory blocks in the same pack (e.g. identity/preference
    vs project/knowledge memories).
+
+    When ``query`` is provided, candidates within each memory type
+    are ranked by lexical overlap against the query (stemmed token
+    intersection, ties broken by confidence). Without a query,
+    candidates fall through in the order ``get_memories`` returns
+    them — which is effectively "by confidence desc".
    """
    if memory_types is None:
        memory_types = ["identity", "preference"]
@@ -371,17 +378,34 @@ def get_memories_for_context(
    selected_entries: list[str] = []
    used = 0

+    # Pre-tokenize the query once. ``_score_memory_for_query`` is a
+    # free function below that reuses the reinforcement tokenizer so
+    # lexical scoring here matches the reinforcement matcher.
+    query_tokens: set[str] | None = None
+    if query:
+        from atocore.memory.reinforcement import _normalize, _tokenize
+
+        query_tokens = _tokenize(_normalize(query))
+        if not query_tokens:
+            query_tokens = None
+
    # Flat budget across types so paragraph-length project memories
    # aren't starved by an even slice. Types are still walked in order
    # (identity/preference first when they're the input), so earlier
    # types still get first pick when the budget is tight.
    for mtype in memory_types:
-        for mem in get_memories(
+        # Raise the fetch limit above the budget slice so query-relevance
+        # ordering has a real pool to rerank. Without a query, the extras
+        # just fall off the end harmlessly.
+        candidates = get_memories(
            memory_type=mtype,
            project=project,
            min_confidence=0.5,
-            limit=10,
-        ):
+            limit=30,
+        )
+        if query_tokens is not None:
+            candidates = _rank_memories_for_query(candidates, query_tokens)
+        for mem in candidates:
            entry = f"[{mem.memory_type}] {mem.content}"
            entry_len = len(entry) + 1
            if entry_len > available - used:
@@ -399,6 +423,28 @@ def get_memories_for_context(
    return text, len(text)


+def _rank_memories_for_query(
+    memories: list["Memory"],
+    query_tokens: set[str],
+) -> list["Memory"]:
+    """Rerank a memory list by lexical overlap with a pre-tokenized query.
+
+    Ordering key: (overlap_count DESC, confidence DESC). When a query
+    shares no tokens with a memory, overlap is zero and confidence
+    acts as the sole tiebreaker — which matches the pre-query
+    behaviour and keeps no-query calls stable.
+    """
+    from atocore.memory.reinforcement import _normalize, _tokenize
+
+    scored: list[tuple[int, float, Memory]] = []
+    for mem in memories:
+        mem_tokens = _tokenize(_normalize(mem.content))
+        overlap = len(mem_tokens & query_tokens) if mem_tokens else 0
+        scored.append((overlap, mem.confidence, mem))
+    scored.sort(key=lambda t: (t[0], t[1]), reverse=True)
+    return [mem for _, _, mem in scored]
+
+
 def _row_to_memory(row) -> Memory:
    """Convert a DB row to Memory dataclass."""
    keys = row.keys() if hasattr(row, "keys") else []