diff --git a/src/atocore/memory/service.py b/src/atocore/memory/service.py index c59f133..81e73d0 100644 --- a/src/atocore/memory/service.py +++ b/src/atocore/memory/service.py @@ -896,6 +896,7 @@ def get_memories_for_context( from atocore.memory.reinforcement import _normalize, _tokenize query_tokens = _tokenize(_normalize(query)) + query_tokens = _prepare_memory_query_tokens(query_tokens, project=project) if not query_tokens: query_tokens = None @@ -980,11 +981,11 @@ def _rank_memories_for_query( ) -> list["Memory"]: """Rerank a memory list by lexical overlap with a pre-tokenized query. - Primary key: overlap_density (overlap_count / memory_token_count), - which rewards short focused memories that match the query precisely - over long overview memories that incidentally share a few tokens. - Secondary: absolute overlap count. Tertiary: domain-tag match. - Quaternary: confidence. + Primary key: absolute overlap count, which keeps a richer memory + matching multiple query-intent terms ahead of a short memory that + only happens to share one term. Secondary: overlap_density + (overlap_count / memory_token_count), so ties still prefer short + focused memories. Tertiary: domain-tag match. Quaternary: confidence. Phase 3: domain_tags contribute a boost when they appear in the query text. A memory tagged [optics, thermal] for a query about @@ -1010,10 +1011,46 @@ def _rank_memories_for_query( tag_hits += 1 scored.append((density, overlap, tag_hits, mem.confidence, mem)) - scored.sort(key=lambda t: (t[0], t[1], t[2], t[3]), reverse=True) + scored.sort(key=lambda t: (t[1], t[0], t[2], t[3]), reverse=True) return [mem for _, _, _, _, mem in scored] +_MEMORY_QUERY_STOP_TOKENS = { + "how", + "what", + "when", + "where", + "which", + "who", + "why", + "current", + "status", + "project", + "machine", +} + +_MEMORY_QUERY_TOKEN_EXPANSIONS = { + "remotely": {"remote"}, +} + + +def _prepare_memory_query_tokens( + query_tokens: set[str], + project: str | None = None, +) -> set[str]: + """Remove project-scope noise and add tiny intent-preserving expansions.""" + prepared = set(query_tokens) + for token in list(prepared): + prepared.update(_MEMORY_QUERY_TOKEN_EXPANSIONS.get(token, set())) + + prepared -= _MEMORY_QUERY_STOP_TOKENS + if project: + for part in project.lower().replace("_", "-").split("-"): + if part: + prepared.discard(part) + return prepared + + def _row_to_memory(row) -> Memory: """Convert a DB row to Memory dataclass.""" import json as _json diff --git a/tests/test_memory.py b/tests/test_memory.py index 71c7d1c..fd41692 100644 --- a/tests/test_memory.py +++ b/tests/test_memory.py @@ -428,6 +428,76 @@ def test_context_builder_tag_boost_orders_results(isolated_db): assert idx_tagged < idx_untagged +def test_project_memory_ranking_ignores_scope_noise(isolated_db): + """Project words should not crowd out the actual query intent.""" + from atocore.memory.service import create_memory, get_memories_for_context + + create_memory( + "project", + "Norman is the end operator for p06-polisher and requires an explicit manual mode to operate the machine.", + project="p06-polisher", + confidence=0.7, + ) + create_memory( + "project", + "Polisher Control firmware spec document titled 'Fulum Polisher Machine Control Firmware Spec v1' lives in PKM.", + project="p06-polisher", + confidence=0.7, + ) + create_memory( + "project", + "Machine design principle: works fully offline and independently; network connection is for remote access only", + project="p06-polisher", + confidence=0.5, + ) + create_memory( + "project", + "Use Tailscale mesh for RPi remote access to provide SSH, file transfer, and NAT traversal without port forwarding.", + project="p06-polisher", + confidence=0.5, + ) + + text, _ = get_memories_for_context( + memory_types=["project"], + project="p06-polisher", + budget=360, + query="how do we access the polisher machine remotely", + ) + + assert "Tailscale" in text + assert text.find("remote access only") < text.find("Tailscale") + assert "manual mode" not in text + + +def test_project_memory_ranking_prefers_multiple_intent_hits(isolated_db): + """A rich memory with several query hits should beat a terse one-hit memory.""" + from atocore.memory.service import create_memory, get_memories_for_context + + create_memory( + "project", + "CGH vendor selected for p05. Active integration coordination with Katie/AOM.", + project="p05-interferometer", + confidence=0.7, + ) + create_memory( + "knowledge", + "Vendor-summary current signal: 4D is the strongest technical Twyman-Green candidate; " + "a certified used Zygo Verifire SV around $55k emerged as a strong value path.", + project="p05-interferometer", + confidence=0.9, + ) + + text, _ = get_memories_for_context( + memory_types=["project", "knowledge"], + project="p05-interferometer", + budget=220, + query="what is the current vendor signal for the interferometer procurement", + ) + + assert "4D" in text + assert "Zygo" in text + + def test_expire_stale_candidates_keeps_reinforced(isolated_db): from atocore.memory.service import create_memory, expire_stale_candidates from atocore.models.database import get_connection