fix(memory): rank project memories by query intent

This commit is contained in:
Antoine Letarte
2026-04-24 20:38:50 +00:00
parent 867a1abfaa
commit 4744c69d10
2 changed files with 113 additions and 6 deletions

View File

@@ -896,6 +896,7 @@ def get_memories_for_context(
from atocore.memory.reinforcement import _normalize, _tokenize
query_tokens = _tokenize(_normalize(query))
query_tokens = _prepare_memory_query_tokens(query_tokens, project=project)
if not query_tokens:
query_tokens = None
@@ -980,11 +981,11 @@ def _rank_memories_for_query(
) -> list["Memory"]:
"""Rerank a memory list by lexical overlap with a pre-tokenized query.
Primary key: overlap_density (overlap_count / memory_token_count),
which rewards short focused memories that match the query precisely
over long overview memories that incidentally share a few tokens.
Secondary: absolute overlap count. Tertiary: domain-tag match.
Quaternary: confidence.
Primary key: absolute overlap count, which keeps a richer memory
matching multiple query-intent terms ahead of a short memory that
only happens to share one term. Secondary: overlap_density
(overlap_count / memory_token_count), so ties still prefer short
focused memories. Tertiary: domain-tag match. Quaternary: confidence.
Phase 3: domain_tags contribute a boost when they appear in the
query text. A memory tagged [optics, thermal] for a query about
@@ -1010,10 +1011,46 @@ def _rank_memories_for_query(
tag_hits += 1
scored.append((density, overlap, tag_hits, mem.confidence, mem))
scored.sort(key=lambda t: (t[0], t[1], t[2], t[3]), reverse=True)
scored.sort(key=lambda t: (t[1], t[0], t[2], t[3]), reverse=True)
return [mem for _, _, _, _, mem in scored]
_MEMORY_QUERY_STOP_TOKENS = {
"how",
"what",
"when",
"where",
"which",
"who",
"why",
"current",
"status",
"project",
"machine",
}
_MEMORY_QUERY_TOKEN_EXPANSIONS = {
"remotely": {"remote"},
}
def _prepare_memory_query_tokens(
query_tokens: set[str],
project: str | None = None,
) -> set[str]:
"""Remove project-scope noise and add tiny intent-preserving expansions."""
prepared = set(query_tokens)
for token in list(prepared):
prepared.update(_MEMORY_QUERY_TOKEN_EXPANSIONS.get(token, set()))
prepared -= _MEMORY_QUERY_STOP_TOKENS
if project:
for part in project.lower().replace("_", "-").split("-"):
if part:
prepared.discard(part)
return prepared
def _row_to_memory(row) -> Memory:
"""Convert a DB row to Memory dataclass."""
import json as _json

View File

@@ -428,6 +428,76 @@ def test_context_builder_tag_boost_orders_results(isolated_db):
assert idx_tagged < idx_untagged
def test_project_memory_ranking_ignores_scope_noise(isolated_db):
"""Project words should not crowd out the actual query intent."""
from atocore.memory.service import create_memory, get_memories_for_context
create_memory(
"project",
"Norman is the end operator for p06-polisher and requires an explicit manual mode to operate the machine.",
project="p06-polisher",
confidence=0.7,
)
create_memory(
"project",
"Polisher Control firmware spec document titled 'Fulum Polisher Machine Control Firmware Spec v1' lives in PKM.",
project="p06-polisher",
confidence=0.7,
)
create_memory(
"project",
"Machine design principle: works fully offline and independently; network connection is for remote access only",
project="p06-polisher",
confidence=0.5,
)
create_memory(
"project",
"Use Tailscale mesh for RPi remote access to provide SSH, file transfer, and NAT traversal without port forwarding.",
project="p06-polisher",
confidence=0.5,
)
text, _ = get_memories_for_context(
memory_types=["project"],
project="p06-polisher",
budget=360,
query="how do we access the polisher machine remotely",
)
assert "Tailscale" in text
assert text.find("remote access only") < text.find("Tailscale")
assert "manual mode" not in text
def test_project_memory_ranking_prefers_multiple_intent_hits(isolated_db):
"""A rich memory with several query hits should beat a terse one-hit memory."""
from atocore.memory.service import create_memory, get_memories_for_context
create_memory(
"project",
"CGH vendor selected for p05. Active integration coordination with Katie/AOM.",
project="p05-interferometer",
confidence=0.7,
)
create_memory(
"knowledge",
"Vendor-summary current signal: 4D is the strongest technical Twyman-Green candidate; "
"a certified used Zygo Verifire SV around $55k emerged as a strong value path.",
project="p05-interferometer",
confidence=0.9,
)
text, _ = get_memories_for_context(
memory_types=["project", "knowledge"],
project="p05-interferometer",
budget=220,
query="what is the current vendor signal for the interferometer procurement",
)
assert "4D" in text
assert "Zygo" in text
def test_expire_stale_candidates_keeps_reinforced(isolated_db):
from atocore.memory.service import create_memory, expire_stale_candidates
from atocore.models.database import get_connection