fix(memory): rank project memories by query intent
This commit is contained in:
@@ -896,6 +896,7 @@ def get_memories_for_context(
|
||||
from atocore.memory.reinforcement import _normalize, _tokenize
|
||||
|
||||
query_tokens = _tokenize(_normalize(query))
|
||||
query_tokens = _prepare_memory_query_tokens(query_tokens, project=project)
|
||||
if not query_tokens:
|
||||
query_tokens = None
|
||||
|
||||
@@ -980,11 +981,11 @@ def _rank_memories_for_query(
|
||||
) -> list["Memory"]:
|
||||
"""Rerank a memory list by lexical overlap with a pre-tokenized query.
|
||||
|
||||
Primary key: overlap_density (overlap_count / memory_token_count),
|
||||
which rewards short focused memories that match the query precisely
|
||||
over long overview memories that incidentally share a few tokens.
|
||||
Secondary: absolute overlap count. Tertiary: domain-tag match.
|
||||
Quaternary: confidence.
|
||||
Primary key: absolute overlap count, which keeps a richer memory
|
||||
matching multiple query-intent terms ahead of a short memory that
|
||||
only happens to share one term. Secondary: overlap_density
|
||||
(overlap_count / memory_token_count), so ties still prefer short
|
||||
focused memories. Tertiary: domain-tag match. Quaternary: confidence.
|
||||
|
||||
Phase 3: domain_tags contribute a boost when they appear in the
|
||||
query text. A memory tagged [optics, thermal] for a query about
|
||||
@@ -1010,10 +1011,46 @@ def _rank_memories_for_query(
|
||||
tag_hits += 1
|
||||
|
||||
scored.append((density, overlap, tag_hits, mem.confidence, mem))
|
||||
scored.sort(key=lambda t: (t[0], t[1], t[2], t[3]), reverse=True)
|
||||
scored.sort(key=lambda t: (t[1], t[0], t[2], t[3]), reverse=True)
|
||||
return [mem for _, _, _, _, mem in scored]
|
||||
|
||||
|
||||
_MEMORY_QUERY_STOP_TOKENS = {
|
||||
"how",
|
||||
"what",
|
||||
"when",
|
||||
"where",
|
||||
"which",
|
||||
"who",
|
||||
"why",
|
||||
"current",
|
||||
"status",
|
||||
"project",
|
||||
"machine",
|
||||
}
|
||||
|
||||
_MEMORY_QUERY_TOKEN_EXPANSIONS = {
|
||||
"remotely": {"remote"},
|
||||
}
|
||||
|
||||
|
||||
def _prepare_memory_query_tokens(
|
||||
query_tokens: set[str],
|
||||
project: str | None = None,
|
||||
) -> set[str]:
|
||||
"""Remove project-scope noise and add tiny intent-preserving expansions."""
|
||||
prepared = set(query_tokens)
|
||||
for token in list(prepared):
|
||||
prepared.update(_MEMORY_QUERY_TOKEN_EXPANSIONS.get(token, set()))
|
||||
|
||||
prepared -= _MEMORY_QUERY_STOP_TOKENS
|
||||
if project:
|
||||
for part in project.lower().replace("_", "-").split("-"):
|
||||
if part:
|
||||
prepared.discard(part)
|
||||
return prepared
|
||||
|
||||
|
||||
def _row_to_memory(row) -> Memory:
|
||||
"""Convert a DB row to Memory dataclass."""
|
||||
import json as _json
|
||||
|
||||
Reference in New Issue
Block a user