Compare commits
3 Commits
codex/proj
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| a87d9845a8 | |||
|
|
4744c69d10 | ||
| 867a1abfaa |
@@ -50,6 +50,9 @@ MEMORY_STATUSES = [
|
|||||||
"graduated", # Phase 5: memory has become an entity; content frozen, forward pointer in properties
|
"graduated", # Phase 5: memory has become an entity; content frozen, forward pointer in properties
|
||||||
]
|
]
|
||||||
|
|
||||||
|
DEFAULT_CONTEXT_MEMORY_LIMIT = 30
|
||||||
|
QUERY_CONTEXT_MEMORY_LIMIT = 120
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Memory:
|
class Memory:
|
||||||
@@ -896,6 +899,7 @@ def get_memories_for_context(
|
|||||||
from atocore.memory.reinforcement import _normalize, _tokenize
|
from atocore.memory.reinforcement import _normalize, _tokenize
|
||||||
|
|
||||||
query_tokens = _tokenize(_normalize(query))
|
query_tokens = _tokenize(_normalize(query))
|
||||||
|
query_tokens = _prepare_memory_query_tokens(query_tokens, project=project)
|
||||||
if not query_tokens:
|
if not query_tokens:
|
||||||
query_tokens = None
|
query_tokens = None
|
||||||
|
|
||||||
@@ -908,12 +912,13 @@ def get_memories_for_context(
|
|||||||
# ``_rank_memories_for_query`` via Python's stable sort.
|
# ``_rank_memories_for_query`` via Python's stable sort.
|
||||||
pool: list[Memory] = []
|
pool: list[Memory] = []
|
||||||
seen_ids: set[str] = set()
|
seen_ids: set[str] = set()
|
||||||
|
candidate_limit = QUERY_CONTEXT_MEMORY_LIMIT if query_tokens is not None else DEFAULT_CONTEXT_MEMORY_LIMIT
|
||||||
for mtype in memory_types:
|
for mtype in memory_types:
|
||||||
for mem in get_memories(
|
for mem in get_memories(
|
||||||
memory_type=mtype,
|
memory_type=mtype,
|
||||||
project=project,
|
project=project,
|
||||||
min_confidence=0.5,
|
min_confidence=0.5,
|
||||||
limit=30,
|
limit=candidate_limit,
|
||||||
):
|
):
|
||||||
if mem.id in seen_ids:
|
if mem.id in seen_ids:
|
||||||
continue
|
continue
|
||||||
@@ -980,11 +985,11 @@ def _rank_memories_for_query(
|
|||||||
) -> list["Memory"]:
|
) -> list["Memory"]:
|
||||||
"""Rerank a memory list by lexical overlap with a pre-tokenized query.
|
"""Rerank a memory list by lexical overlap with a pre-tokenized query.
|
||||||
|
|
||||||
Primary key: overlap_density (overlap_count / memory_token_count),
|
Primary key: absolute overlap count, which keeps a richer memory
|
||||||
which rewards short focused memories that match the query precisely
|
matching multiple query-intent terms ahead of a short memory that
|
||||||
over long overview memories that incidentally share a few tokens.
|
only happens to share one term. Secondary: overlap_density
|
||||||
Secondary: absolute overlap count. Tertiary: domain-tag match.
|
(overlap_count / memory_token_count), so ties still prefer short
|
||||||
Quaternary: confidence.
|
focused memories. Tertiary: domain-tag match. Quaternary: confidence.
|
||||||
|
|
||||||
Phase 3: domain_tags contribute a boost when they appear in the
|
Phase 3: domain_tags contribute a boost when they appear in the
|
||||||
query text. A memory tagged [optics, thermal] for a query about
|
query text. A memory tagged [optics, thermal] for a query about
|
||||||
@@ -1010,10 +1015,46 @@ def _rank_memories_for_query(
|
|||||||
tag_hits += 1
|
tag_hits += 1
|
||||||
|
|
||||||
scored.append((density, overlap, tag_hits, mem.confidence, mem))
|
scored.append((density, overlap, tag_hits, mem.confidence, mem))
|
||||||
scored.sort(key=lambda t: (t[0], t[1], t[2], t[3]), reverse=True)
|
scored.sort(key=lambda t: (t[1], t[0], t[2], t[3]), reverse=True)
|
||||||
return [mem for _, _, _, _, mem in scored]
|
return [mem for _, _, _, _, mem in scored]
|
||||||
|
|
||||||
|
|
||||||
|
_MEMORY_QUERY_STOP_TOKENS = {
|
||||||
|
"how",
|
||||||
|
"what",
|
||||||
|
"when",
|
||||||
|
"where",
|
||||||
|
"which",
|
||||||
|
"who",
|
||||||
|
"why",
|
||||||
|
"current",
|
||||||
|
"status",
|
||||||
|
"project",
|
||||||
|
"machine",
|
||||||
|
}
|
||||||
|
|
||||||
|
_MEMORY_QUERY_TOKEN_EXPANSIONS = {
|
||||||
|
"remotely": {"remote"},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_memory_query_tokens(
|
||||||
|
query_tokens: set[str],
|
||||||
|
project: str | None = None,
|
||||||
|
) -> set[str]:
|
||||||
|
"""Remove project-scope noise and add tiny intent-preserving expansions."""
|
||||||
|
prepared = set(query_tokens)
|
||||||
|
for token in list(prepared):
|
||||||
|
prepared.update(_MEMORY_QUERY_TOKEN_EXPANSIONS.get(token, set()))
|
||||||
|
|
||||||
|
prepared -= _MEMORY_QUERY_STOP_TOKENS
|
||||||
|
if project:
|
||||||
|
for part in project.lower().replace("_", "-").split("-"):
|
||||||
|
if part:
|
||||||
|
prepared.discard(part)
|
||||||
|
return prepared
|
||||||
|
|
||||||
|
|
||||||
def _row_to_memory(row) -> Memory:
|
def _row_to_memory(row) -> Memory:
|
||||||
"""Convert a DB row to Memory dataclass."""
|
"""Convert a DB row to Memory dataclass."""
|
||||||
import json as _json
|
import json as _json
|
||||||
|
|||||||
@@ -428,6 +428,136 @@ def test_context_builder_tag_boost_orders_results(isolated_db):
|
|||||||
assert idx_tagged < idx_untagged
|
assert idx_tagged < idx_untagged
|
||||||
|
|
||||||
|
|
||||||
|
def test_project_memory_ranking_ignores_scope_noise(isolated_db):
|
||||||
|
"""Project words should not crowd out the actual query intent."""
|
||||||
|
from atocore.memory.service import create_memory, get_memories_for_context
|
||||||
|
|
||||||
|
create_memory(
|
||||||
|
"project",
|
||||||
|
"Norman is the end operator for p06-polisher and requires an explicit manual mode to operate the machine.",
|
||||||
|
project="p06-polisher",
|
||||||
|
confidence=0.7,
|
||||||
|
)
|
||||||
|
create_memory(
|
||||||
|
"project",
|
||||||
|
"Polisher Control firmware spec document titled 'Fulum Polisher Machine Control Firmware Spec v1' lives in PKM.",
|
||||||
|
project="p06-polisher",
|
||||||
|
confidence=0.7,
|
||||||
|
)
|
||||||
|
create_memory(
|
||||||
|
"project",
|
||||||
|
"Machine design principle: works fully offline and independently; network connection is for remote access only",
|
||||||
|
project="p06-polisher",
|
||||||
|
confidence=0.5,
|
||||||
|
)
|
||||||
|
create_memory(
|
||||||
|
"project",
|
||||||
|
"Use Tailscale mesh for RPi remote access to provide SSH, file transfer, and NAT traversal without port forwarding.",
|
||||||
|
project="p06-polisher",
|
||||||
|
confidence=0.5,
|
||||||
|
)
|
||||||
|
|
||||||
|
text, _ = get_memories_for_context(
|
||||||
|
memory_types=["project"],
|
||||||
|
project="p06-polisher",
|
||||||
|
budget=360,
|
||||||
|
query="how do we access the polisher machine remotely",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "Tailscale" in text
|
||||||
|
assert text.find("remote access only") < text.find("Tailscale")
|
||||||
|
assert "manual mode" not in text
|
||||||
|
|
||||||
|
|
||||||
|
def test_project_memory_ranking_prefers_multiple_intent_hits(isolated_db):
|
||||||
|
"""A rich memory with several query hits should beat a terse one-hit memory."""
|
||||||
|
from atocore.memory.service import create_memory, get_memories_for_context
|
||||||
|
|
||||||
|
create_memory(
|
||||||
|
"project",
|
||||||
|
"CGH vendor selected for p05. Active integration coordination with Katie/AOM.",
|
||||||
|
project="p05-interferometer",
|
||||||
|
confidence=0.7,
|
||||||
|
)
|
||||||
|
create_memory(
|
||||||
|
"knowledge",
|
||||||
|
"Vendor-summary current signal: 4D is the strongest technical Twyman-Green candidate; "
|
||||||
|
"a certified used Zygo Verifire SV around $55k emerged as a strong value path.",
|
||||||
|
project="p05-interferometer",
|
||||||
|
confidence=0.9,
|
||||||
|
)
|
||||||
|
|
||||||
|
text, _ = get_memories_for_context(
|
||||||
|
memory_types=["project", "knowledge"],
|
||||||
|
project="p05-interferometer",
|
||||||
|
budget=220,
|
||||||
|
query="what is the current vendor signal for the interferometer procurement",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "4D" in text
|
||||||
|
assert "Zygo" in text
|
||||||
|
|
||||||
|
|
||||||
|
def test_project_memory_query_ranks_beyond_confidence_prefilter(isolated_db):
|
||||||
|
"""Query-time ranking should see older low-confidence but exact-intent memories."""
|
||||||
|
from atocore.memory.service import create_memory, get_memories_for_context
|
||||||
|
|
||||||
|
for idx in range(35):
|
||||||
|
create_memory(
|
||||||
|
"project",
|
||||||
|
f"High confidence p06 filler memory {idx}: Polisher Control planning note.",
|
||||||
|
project="p06-polisher",
|
||||||
|
confidence=0.9,
|
||||||
|
)
|
||||||
|
create_memory(
|
||||||
|
"project",
|
||||||
|
"Use Tailscale mesh for RPi remote access to provide SSH, file transfer, and NAT traversal without port forwarding.",
|
||||||
|
project="p06-polisher",
|
||||||
|
confidence=0.5,
|
||||||
|
)
|
||||||
|
|
||||||
|
text, _ = get_memories_for_context(
|
||||||
|
memory_types=["project"],
|
||||||
|
project="p06-polisher",
|
||||||
|
budget=360,
|
||||||
|
query="how do we access the polisher machine remotely",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "Tailscale" in text
|
||||||
|
|
||||||
|
|
||||||
|
def test_project_memory_query_prefers_exact_cam_fact(isolated_db):
|
||||||
|
from atocore.memory.service import create_memory, get_memories_for_context
|
||||||
|
|
||||||
|
create_memory(
|
||||||
|
"project",
|
||||||
|
"Polisher Control firmware spec document titled 'Fulum Polisher Machine Control Firmware Spec v1' lives in PKM.",
|
||||||
|
project="p06-polisher",
|
||||||
|
confidence=0.9,
|
||||||
|
)
|
||||||
|
create_memory(
|
||||||
|
"project",
|
||||||
|
"Polisher Control doc must cover manual mode for Norman as a required deliverable per the plan.",
|
||||||
|
project="p06-polisher",
|
||||||
|
confidence=0.9,
|
||||||
|
)
|
||||||
|
create_memory(
|
||||||
|
"project",
|
||||||
|
"Cam amplitude and offset are mechanically set by operator and read via encoders; no actuators control them.",
|
||||||
|
project="p06-polisher",
|
||||||
|
confidence=0.5,
|
||||||
|
)
|
||||||
|
|
||||||
|
text, _ = get_memories_for_context(
|
||||||
|
memory_types=["project"],
|
||||||
|
project="p06-polisher",
|
||||||
|
budget=300,
|
||||||
|
query="how is cam amplitude controlled on the polisher",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "encoders" in text
|
||||||
|
|
||||||
|
|
||||||
def test_expire_stale_candidates_keeps_reinforced(isolated_db):
|
def test_expire_stale_candidates_keeps_reinforced(isolated_db):
|
||||||
from atocore.memory.service import create_memory, expire_stale_candidates
|
from atocore.memory.service import create_memory, expire_stale_candidates
|
||||||
from atocore.models.database import get_connection
|
from atocore.models.database import get_connection
|
||||||
|
|||||||
Reference in New Issue
Block a user