diff --git a/src/atocore/engineering/wiki.py b/src/atocore/engineering/wiki.py
index 3ad8d36..057d32d 100644
--- a/src/atocore/engineering/wiki.py
+++ b/src/atocore/engineering/wiki.py
@@ -240,10 +240,30 @@ def render_homepage() -> str:
# Quick stats
all_entities = get_entities(limit=500)
- all_memories = get_memories(active_only=True, limit=500)
+ all_memories_raw = get_memories(active_only=True, limit=500)
+ # Partition real knowledge from ambient provenance so counts are honest.
+ # Each memory lands in exactly one bucket (low-signal takes priority).
+ all_memories: list = []
+ akc_session_count = 0
+ low_signal_count = 0
+ for _m in all_memories_raw:
+ if _is_low_signal_memory(_m):
+ low_signal_count += 1
+ elif _is_akc_session_memory(_m):
+ akc_session_count += 1
+ else:
+ all_memories.append(_m)
pending = get_memories(status="candidate", limit=500)
lines.append('
System
')
- lines.append(f'{len(all_entities)} entities · {len(all_memories)} active memories · {len(projects)} projects
')
+ lines.append(
+ f'{len(all_entities)} entities · {len(all_memories)} memories · '
+ f'{len(projects)} projects'
+ + (f' · {akc_session_count} AKC session snapshots'
+ + (f", {low_signal_count} low-signal hidden" if low_signal_count else "")
+ + ''
+ if akc_session_count or low_signal_count else '')
+ + '
'
+ )
# Triage queue prompt — surfaced prominently if non-empty
if pending:
@@ -286,6 +306,44 @@ import re as _re
_WIKILINK_PATTERN = _re.compile(r"\[\[([^\[\]|]+?)(?:\|([^\[\]]+?))?\]\]")
+# ----------------------------------------------------------------- signal/noise
+# Memories with these patterns are low-signal ambient artefacts — they
+# inflate lists on the homepage and domain pages without being informative.
+# They remain in the DB (for provenance / audit) but are filtered from
+# default browsing surfaces. Pass `include_low_signal=True` on a page query
+# param to surface them.
+_LOW_SIGNAL_CONTENT_PATTERNS = (
+ "(no transcript)", # silent-mic AKC sessions
+ "synthetic AKC integration", # E2E test pollution
+ "AKC-E2E-", # E2E test prefix in content
+ "AKC-IMG-TEST-", # image-upload test prefix
+ "IMG integration test — synthetic", # E2E narrative header
+)
+
+# AKC voice-session ambient memories follow this pattern — they're
+# provenance records, not knowledge. Collapse them behind a link on domain
+# pages instead of rendering each inline.
+_AKC_SESSION_HEADER = "AKC voice session "
+
+
+def _is_low_signal_memory(mem) -> bool:
+ """True for memories whose content is known ambient/test pollution."""
+ content = (getattr(mem, "content", "") or "")
+ if not content:
+ return True
+ return any(p in content for p in _LOW_SIGNAL_CONTENT_PATTERNS)
+
+
+def _is_akc_session_memory(mem) -> bool:
+ """True for AKC voice-session ambient snapshots (have value as provenance,
+ but shouldn't clutter topical listings)."""
+ content = (getattr(mem, "content", "") or "")
+ tags = getattr(mem, "domain_tags", None) or []
+ if any(t in ("session", "akc") for t in tags) and "voice" in tags:
+ return True
+ return content.startswith(_AKC_SESSION_HEADER)
+
+
def _resolve_wikilink(target: str, current_project: str | None) -> tuple[str, str, str]:
"""Resolve a ``[[Name]]`` target to ``(href, css_class, extra_suffix)``.
@@ -874,8 +932,22 @@ def render_domain(tag: str) -> str:
breadcrumbs=[("Wiki", "/wiki"), ("Domains", "")])
all_mems = get_memories(active_only=True, limit=500)
- matching = [m for m in all_mems
- if any((t or "").lower() == tag for t in (m.domain_tags or []))]
+ matching_all = [m for m in all_mems
+ if any((t or "").lower() == tag for t in (m.domain_tags or []))]
+
+ # Partition: low-signal test pollution is hidden entirely, ambient AKC
+ # session memories are collapsed (shown as a count + link to
+ # /wiki/activity). Priority: low-signal > session > real.
+ matching: list = []
+ akc_sessions: list = []
+ hidden_low_signal = 0
+ for m in matching_all:
+ if _is_low_signal_memory(m):
+ hidden_low_signal += 1
+ elif _is_akc_session_memory(m):
+ akc_sessions.append(m)
+ else:
+ matching.append(m)
# Group by project
by_project: dict[str, list] = {}
@@ -884,6 +956,18 @@ def render_domain(tag: str) -> str:
lines = [f'Domain: {tag}
']
lines.append(f'{len(matching)} active memories across {len(by_project)} projects
')
+ if akc_sessions or hidden_low_signal:
+ noise_bits = []
+ if akc_sessions:
+ noise_bits.append(
+ f'{len(akc_sessions)} AKC voice session snapshots'
+ )
+ if hidden_low_signal:
+ noise_bits.append(f"{hidden_low_signal} low-signal memories hidden")
+ lines.append(
+ f''
+ f'Ambient provenance not listed: {" · ".join(noise_bits)}.
'
+ )
if not matching:
lines.append(
diff --git a/tests/test_wiki_pages.py b/tests/test_wiki_pages.py
index 64b50c6..935d05a 100644
--- a/tests/test_wiki_pages.py
+++ b/tests/test_wiki_pages.py
@@ -161,3 +161,103 @@ def test_memory_detail_shows_superseded_sources(tmp_data_dir):
assert html1 is not None
assert "superseded" in html1
assert "auto-dedup-tier1" in html1 # audit trail shows who merged
+
+
+# -------------------------------------------------- low-signal wiki filters
+# Ambient AKC session memories and test pollution shouldn't dominate domain
+# pages / homepage counts. These tests lock the partitioning behaviour.
+
+def test_domain_page_hides_empty_transcript_sessions(tmp_data_dir):
+ """Silent-mic AKC sessions (content has '(no transcript)') are ambient
+ noise — they go into the hidden count, not the main list."""
+ _init_all()
+ # One real knowledge memory with tag "optics"
+ create_memory(
+ "knowledge",
+ "CGH null corrector supports F/1.2 asphere testing",
+ project="p05", confidence=0.9, domain_tags=["optics", "cgh"],
+ )
+ # One silent AKC session with the same tag — should NOT appear
+ create_memory(
+ "episodic",
+ "AKC voice session abc (gen-002)\nDuration: 60s, 2 captures\n"
+ "\n## Transcript\n(no transcript)\n",
+ project="p05", confidence=0.7,
+ domain_tags=["optics", "session", "akc", "voice"],
+ )
+ html = render_domain("optics")
+ assert "CGH null corrector" in html
+ # The hidden-count banner should be present
+ assert "low-signal" in html or "Ambient provenance" in html
+ # And the empty-transcript content itself is not rendered inline
+ assert "(no transcript)" not in html
+
+
+def test_domain_page_collapses_akc_session_snapshots(tmp_data_dir):
+ """AKC voice-session memories are provenance records — count them as
+ a single collapsed link, don't inline every one."""
+ _init_all()
+ for i in range(5):
+ create_memory(
+ "episodic",
+ f"AKC voice session session-{i} (gen-00{i})\nDuration: 120s, 3 captures\n"
+ f"\n## Transcript\nReal transcript number {i}",
+ project="p05", confidence=0.7,
+ domain_tags=["optics", "session", "akc", "voice"],
+ )
+ html = render_domain("optics")
+ # Inline count should mention AKC session snapshots
+ assert "AKC voice session snapshots" in html
+ # None of the session transcripts should be pasted inline on the domain
+ # page (they're provenance, linked via /wiki/activity)
+ assert "Real transcript number 0" not in html
+
+
+def test_homepage_stats_exclude_ambient_memory(tmp_data_dir):
+ """Homepage system-stats line shows real memory count, pushes ambient
+ counts into a dimmed sub-segment."""
+ _init_all()
+ # 2 real memories + 3 ambient sessions + 1 silent junk
+ create_memory("knowledge", "Real fact 1", project="p05", confidence=0.8)
+ create_memory("knowledge", "Real fact 2", project="p05", confidence=0.8)
+ for i in range(3):
+ create_memory(
+ "episodic",
+ f"AKC voice session s{i} (gen-00{i})\nReal transcript x",
+ project="p05", confidence=0.7,
+ domain_tags=["session", "akc", "voice"],
+ )
+ create_memory(
+ "episodic",
+ "AKC voice session silent (gen-099)\nDuration: 30s, 0 captures\n"
+ "\n## Transcript\n(no transcript)\n",
+ project="p05", confidence=0.7,
+ domain_tags=["session", "akc", "voice"],
+ )
+ html = render_homepage()
+ assert "3 AKC session snapshots" in html
+ assert "low-signal hidden" in html
+ # Main count reflects only real knowledge
+ assert "2 memories" in html
+
+
+def test_low_signal_predicate_catches_known_patterns():
+ from atocore.engineering.wiki import _is_low_signal_memory, _is_akc_session_memory
+ from dataclasses import dataclass
+
+ @dataclass
+ class M:
+ content: str = ""
+ domain_tags: list = None
+
+ # Explicit empty-transcript — low signal
+ assert _is_low_signal_memory(M(content="AKC voice session x\n## Transcript\n(no transcript)\n"))
+ # E2E test pollution — low signal
+ assert _is_low_signal_memory(M(content="IMG integration test — synthetic session"))
+ assert _is_low_signal_memory(M(content="synthetic AKC integration session"))
+ # Real knowledge — NOT low signal
+ assert not _is_low_signal_memory(M(content="The CGH is mounted to the fold mirror via…"))
+ # AKC session tag predicate
+ assert _is_akc_session_memory(M(content="anything", domain_tags=["session", "akc", "voice"]))
+ assert _is_akc_session_memory(M(content="AKC voice session abc"))
+ assert not _is_akc_session_memory(M(content="Real fact", domain_tags=["optics"]))