fix(memory): rank project memories by query intent

2026-04-24 20:38:50 +00:00
parent 867a1abfaa
commit 4744c69d10
2 changed files with 113 additions and 6 deletions
--- a/src/atocore/memory/service.py
+++ b/src/atocore/memory/service.py
@@ -896,6 +896,7 @@ def get_memories_for_context(
        from atocore.memory.reinforcement import _normalize, _tokenize

        query_tokens = _tokenize(_normalize(query))
+        query_tokens = _prepare_memory_query_tokens(query_tokens, project=project)
        if not query_tokens:
            query_tokens = None

@@ -980,11 +981,11 @@ def _rank_memories_for_query(
 ) -> list["Memory"]:
    """Rerank a memory list by lexical overlap with a pre-tokenized query.

-    Primary key: overlap_density (overlap_count / memory_token_count),
-    which rewards short focused memories that match the query precisely
-    over long overview memories that incidentally share a few tokens.
-    Secondary: absolute overlap count. Tertiary: domain-tag match.
-    Quaternary: confidence.
+    Primary key: absolute overlap count, which keeps a richer memory
+    matching multiple query-intent terms ahead of a short memory that
+    only happens to share one term. Secondary: overlap_density
+    (overlap_count / memory_token_count), so ties still prefer short
+    focused memories. Tertiary: domain-tag match. Quaternary: confidence.

    Phase 3: domain_tags contribute a boost when they appear in the
    query text. A memory tagged [optics, thermal] for a query about
@@ -1010,10 +1011,46 @@ def _rank_memories_for_query(
                tag_hits += 1

        scored.append((density, overlap, tag_hits, mem.confidence, mem))
-    scored.sort(key=lambda t: (t[0], t[1], t[2], t[3]), reverse=True)
+    scored.sort(key=lambda t: (t[1], t[0], t[2], t[3]), reverse=True)
    return [mem for _, _, _, _, mem in scored]


+_MEMORY_QUERY_STOP_TOKENS = {
+    "how",
+    "what",
+    "when",
+    "where",
+    "which",
+    "who",
+    "why",
+    "current",
+    "status",
+    "project",
+    "machine",
+}
+
+_MEMORY_QUERY_TOKEN_EXPANSIONS = {
+    "remotely": {"remote"},
+}
+
+
+def _prepare_memory_query_tokens(
+    query_tokens: set[str],
+    project: str | None = None,
+) -> set[str]:
+    """Remove project-scope noise and add tiny intent-preserving expansions."""
+    prepared = set(query_tokens)
+    for token in list(prepared):
+        prepared.update(_MEMORY_QUERY_TOKEN_EXPANSIONS.get(token, set()))
+
+    prepared -= _MEMORY_QUERY_STOP_TOKENS
+    if project:
+        for part in project.lower().replace("_", "-").split("-"):
+            if part:
+                prepared.discard(part)
+    return prepared
+
+
 def _row_to_memory(row) -> Memory:
    """Convert a DB row to Memory dataclass."""
    import json as _json