wiki: hide low-signal memories + collapse ambient AKC sessions on domain/homepage
Real usage showed two failure modes on /wiki/domains/{tag} and the homepage:
1. Empty-transcript AKC sessions (mic-off) and synthetic E2E test memories
competed with real knowledge in topical listings. A user hitting the
'optics' domain page saw pages of '(no transcript)' and
'IMG integration test — synthetic session' before finding anything useful.
2. Every AKC voice session writes an episodic memory as provenance. At
one session per capture burst these quickly dominate any domain page
carrying a common tag like 'p05' or 'optics'.
This change partitions memories into three buckets on the domain and
homepage surfaces:
- low-signal → hidden entirely (counted in a dim sub-line)
- AKC session → collapsed behind a single link to /wiki/activity
- real → rendered inline
Filter predicates are additive: a memory that's both an empty-transcript
session AND tagged as AKC lands in low-signal (priority).
Tests: 4 new tests in test_wiki_pages.py lock the partition behaviour.
All 15 pre-existing wiki tests still pass.
No schema change, no migration needed.
This commit is contained in:
@@ -161,3 +161,103 @@ def test_memory_detail_shows_superseded_sources(tmp_data_dir):
|
||||
assert html1 is not None
|
||||
assert "superseded" in html1
|
||||
assert "auto-dedup-tier1" in html1 # audit trail shows who merged
|
||||
|
||||
|
||||
# -------------------------------------------------- low-signal wiki filters
|
||||
# Ambient AKC session memories and test pollution shouldn't dominate domain
|
||||
# pages / homepage counts. These tests lock the partitioning behaviour.
|
||||
|
||||
def test_domain_page_hides_empty_transcript_sessions(tmp_data_dir):
|
||||
"""Silent-mic AKC sessions (content has '(no transcript)') are ambient
|
||||
noise — they go into the hidden count, not the main list."""
|
||||
_init_all()
|
||||
# One real knowledge memory with tag "optics"
|
||||
create_memory(
|
||||
"knowledge",
|
||||
"CGH null corrector supports F/1.2 asphere testing",
|
||||
project="p05", confidence=0.9, domain_tags=["optics", "cgh"],
|
||||
)
|
||||
# One silent AKC session with the same tag — should NOT appear
|
||||
create_memory(
|
||||
"episodic",
|
||||
"AKC voice session abc (gen-002)\nDuration: 60s, 2 captures\n"
|
||||
"\n## Transcript\n(no transcript)\n",
|
||||
project="p05", confidence=0.7,
|
||||
domain_tags=["optics", "session", "akc", "voice"],
|
||||
)
|
||||
html = render_domain("optics")
|
||||
assert "CGH null corrector" in html
|
||||
# The hidden-count banner should be present
|
||||
assert "low-signal" in html or "Ambient provenance" in html
|
||||
# And the empty-transcript content itself is not rendered inline
|
||||
assert "(no transcript)" not in html
|
||||
|
||||
|
||||
def test_domain_page_collapses_akc_session_snapshots(tmp_data_dir):
|
||||
"""AKC voice-session memories are provenance records — count them as
|
||||
a single collapsed link, don't inline every one."""
|
||||
_init_all()
|
||||
for i in range(5):
|
||||
create_memory(
|
||||
"episodic",
|
||||
f"AKC voice session session-{i} (gen-00{i})\nDuration: 120s, 3 captures\n"
|
||||
f"\n## Transcript\nReal transcript number {i}",
|
||||
project="p05", confidence=0.7,
|
||||
domain_tags=["optics", "session", "akc", "voice"],
|
||||
)
|
||||
html = render_domain("optics")
|
||||
# Inline count should mention AKC session snapshots
|
||||
assert "AKC voice session snapshots" in html
|
||||
# None of the session transcripts should be pasted inline on the domain
|
||||
# page (they're provenance, linked via /wiki/activity)
|
||||
assert "Real transcript number 0" not in html
|
||||
|
||||
|
||||
def test_homepage_stats_exclude_ambient_memory(tmp_data_dir):
|
||||
"""Homepage system-stats line shows real memory count, pushes ambient
|
||||
counts into a dimmed sub-segment."""
|
||||
_init_all()
|
||||
# 2 real memories + 3 ambient sessions + 1 silent junk
|
||||
create_memory("knowledge", "Real fact 1", project="p05", confidence=0.8)
|
||||
create_memory("knowledge", "Real fact 2", project="p05", confidence=0.8)
|
||||
for i in range(3):
|
||||
create_memory(
|
||||
"episodic",
|
||||
f"AKC voice session s{i} (gen-00{i})\nReal transcript x",
|
||||
project="p05", confidence=0.7,
|
||||
domain_tags=["session", "akc", "voice"],
|
||||
)
|
||||
create_memory(
|
||||
"episodic",
|
||||
"AKC voice session silent (gen-099)\nDuration: 30s, 0 captures\n"
|
||||
"\n## Transcript\n(no transcript)\n",
|
||||
project="p05", confidence=0.7,
|
||||
domain_tags=["session", "akc", "voice"],
|
||||
)
|
||||
html = render_homepage()
|
||||
assert "3 AKC session snapshots" in html
|
||||
assert "low-signal hidden" in html
|
||||
# Main count reflects only real knowledge
|
||||
assert "2 memories" in html
|
||||
|
||||
|
||||
def test_low_signal_predicate_catches_known_patterns():
|
||||
from atocore.engineering.wiki import _is_low_signal_memory, _is_akc_session_memory
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class M:
|
||||
content: str = ""
|
||||
domain_tags: list = None
|
||||
|
||||
# Explicit empty-transcript — low signal
|
||||
assert _is_low_signal_memory(M(content="AKC voice session x\n## Transcript\n(no transcript)\n"))
|
||||
# E2E test pollution — low signal
|
||||
assert _is_low_signal_memory(M(content="IMG integration test — synthetic session"))
|
||||
assert _is_low_signal_memory(M(content="synthetic AKC integration session"))
|
||||
# Real knowledge — NOT low signal
|
||||
assert not _is_low_signal_memory(M(content="The CGH is mounted to the fold mirror via…"))
|
||||
# AKC session tag predicate
|
||||
assert _is_akc_session_memory(M(content="anything", domain_tags=["session", "akc", "voice"]))
|
||||
assert _is_akc_session_memory(M(content="AKC voice session abc"))
|
||||
assert not _is_akc_session_memory(M(content="Real fact", domain_tags=["optics"]))
|
||||
|
||||
Reference in New Issue
Block a user