Files
ATOCore/tests/test_wiki_pages.py
Anto01 fbf3e9c806 wiki: hide low-signal memories + collapse ambient AKC sessions on domain/homepage
Real usage showed two failure modes on /wiki/domains/{tag} and the homepage:

  1. Empty-transcript AKC sessions (mic-off) and synthetic E2E test memories
     competed with real knowledge in topical listings. A user hitting the
     'optics' domain page saw pages of '(no transcript)' and
     'IMG integration test — synthetic session' before finding anything useful.

  2. Every AKC voice session writes an episodic memory as provenance. At
     one session per capture burst these quickly dominate any domain page
     carrying a common tag like 'p05' or 'optics'.

This change partitions memories into three buckets on the domain and
homepage surfaces:
  - low-signal  → hidden entirely (counted in a dim sub-line)
  - AKC session → collapsed behind a single link to /wiki/activity
  - real        → rendered inline

Filter predicates are additive: a memory that's both an empty-transcript
session AND tagged as AKC lands in low-signal (priority).

Tests: 4 new tests in test_wiki_pages.py lock the partition behaviour.
All 15 pre-existing wiki tests still pass.

No schema change, no migration needed.
2026-04-22 13:51:40 -04:00

264 lines
9.5 KiB
Python

"""Tests for the new wiki pages shipped in the UI refresh:
- /wiki/capture (7I follow-up)
- /wiki/memories/{id} (7E)
- /wiki/domains/{tag} (7F)
- /wiki/activity (activity feed)
- home refresh (topnav + activity snippet)
"""
from __future__ import annotations
import pytest
from atocore.engineering.wiki import (
render_activity,
render_capture,
render_domain,
render_homepage,
render_memory_detail,
)
from atocore.engineering.service import init_engineering_schema
from atocore.memory.service import create_memory
from atocore.models.database import init_db
def _init_all():
"""Wiki pages read from both the memory and engineering schemas, so
tests need both initialized (the engineering schema is a separate
init_engineering_schema() call)."""
init_db()
init_engineering_schema()
def test_capture_page_renders_as_fallback(tmp_data_dir):
_init_all()
html = render_capture()
# Page is reachable but now labeled as a fallback, not promoted
assert "fallback only" in html
assert "sanctioned capture surfaces are Claude Code" in html
# Form inputs still exist for emergency use
assert "cap-prompt" in html
assert "cap-response" in html
def test_capture_not_in_topnav(tmp_data_dir):
"""The paste form should NOT appear in topnav — it's not the sanctioned path."""
_init_all()
html = render_homepage()
assert "/wiki/capture" not in html
assert "📥 Capture" not in html
def test_memory_detail_renders(tmp_data_dir):
_init_all()
m = create_memory(
"knowledge", "APM uses NX bridge for DXF → STL",
project="apm", confidence=0.7, domain_tags=["apm", "nx", "cad"],
)
html = render_memory_detail(m.id)
assert html is not None
assert "APM uses NX" in html
assert "Audit trail" in html
# Tag links go to domain pages
assert '/wiki/domains/apm' in html
assert '/wiki/domains/nx' in html
# Project link present
assert '/wiki/projects/apm' in html
def test_memory_detail_404(tmp_data_dir):
_init_all()
assert render_memory_detail("nonexistent-id") is None
def test_domain_page_lists_memories(tmp_data_dir):
_init_all()
create_memory("knowledge", "optics fact 1", project="p04-gigabit",
domain_tags=["optics"])
create_memory("knowledge", "optics fact 2", project="p05-interferometer",
domain_tags=["optics", "metrology"])
create_memory("knowledge", "other", project="p06-polisher",
domain_tags=["firmware"])
html = render_domain("optics")
assert "Domain: <code>optics</code>" in html
assert "p04-gigabit" in html
assert "p05-interferometer" in html
assert "optics fact 1" in html
assert "optics fact 2" in html
# Unrelated memory should NOT appear
assert "other" not in html or "firmware" not in html
def test_domain_page_empty(tmp_data_dir):
_init_all()
html = render_domain("definitely-not-a-tag")
assert "No memories currently carry" in html
def test_domain_page_normalizes_tag(tmp_data_dir):
_init_all()
create_memory("knowledge", "x", domain_tags=["firmware"])
# Case-insensitive
assert "firmware" in render_domain("FIRMWARE")
# Whitespace tolerant
assert "firmware" in render_domain(" firmware ")
def test_activity_feed_renders(tmp_data_dir):
_init_all()
m = create_memory("knowledge", "activity test")
html = render_activity()
assert "Activity Feed" in html
# The newly-created memory should appear as a "created" event
assert "created" in html
# Short timestamp format
assert m.id[:8] in html
def test_activity_feed_groups_by_action_and_actor(tmp_data_dir):
_init_all()
for i in range(3):
create_memory("knowledge", f"m{i}", actor="test-actor")
html = render_activity()
# Summary row should show "created: 3" or similar
assert "created" in html
assert "test-actor" in html
def test_homepage_has_topnav_and_activity(tmp_data_dir):
_init_all()
create_memory("knowledge", "homepage test")
html = render_homepage()
# Topnav with expected items (Capture removed — it's not sanctioned capture)
assert "🏠 Home" in html
assert "📡 Activity" in html
assert "/wiki/activity" in html
assert "/wiki/capture" not in html
# Activity snippet
assert "What the brain is doing" in html
def test_memory_detail_shows_superseded_sources(tmp_data_dir):
"""After a merge, sources go to status=superseded. Detail page should
still render them."""
from atocore.memory.service import (
create_merge_candidate, merge_memories,
)
_init_all()
m1 = create_memory("knowledge", "alpha variant 1", project="test")
m2 = create_memory("knowledge", "alpha variant 2", project="test")
cid = create_merge_candidate(
memory_ids=[m1.id, m2.id], similarity=0.9,
proposed_content="alpha merged",
proposed_memory_type="knowledge", proposed_project="test",
)
merge_memories(cid, actor="auto-dedup-tier1")
# Source detail page should render and show the superseded status
html1 = render_memory_detail(m1.id)
assert html1 is not None
assert "superseded" in html1
assert "auto-dedup-tier1" in html1 # audit trail shows who merged
# -------------------------------------------------- low-signal wiki filters
# Ambient AKC session memories and test pollution shouldn't dominate domain
# pages / homepage counts. These tests lock the partitioning behaviour.
def test_domain_page_hides_empty_transcript_sessions(tmp_data_dir):
"""Silent-mic AKC sessions (content has '(no transcript)') are ambient
noise — they go into the hidden count, not the main list."""
_init_all()
# One real knowledge memory with tag "optics"
create_memory(
"knowledge",
"CGH null corrector supports F/1.2 asphere testing",
project="p05", confidence=0.9, domain_tags=["optics", "cgh"],
)
# One silent AKC session with the same tag — should NOT appear
create_memory(
"episodic",
"AKC voice session abc (gen-002)\nDuration: 60s, 2 captures\n"
"\n## Transcript\n(no transcript)\n",
project="p05", confidence=0.7,
domain_tags=["optics", "session", "akc", "voice"],
)
html = render_domain("optics")
assert "CGH null corrector" in html
# The hidden-count banner should be present
assert "low-signal" in html or "Ambient provenance" in html
# And the empty-transcript content itself is not rendered inline
assert "(no transcript)" not in html
def test_domain_page_collapses_akc_session_snapshots(tmp_data_dir):
"""AKC voice-session memories are provenance records — count them as
a single collapsed link, don't inline every one."""
_init_all()
for i in range(5):
create_memory(
"episodic",
f"AKC voice session session-{i} (gen-00{i})\nDuration: 120s, 3 captures\n"
f"\n## Transcript\nReal transcript number {i}",
project="p05", confidence=0.7,
domain_tags=["optics", "session", "akc", "voice"],
)
html = render_domain("optics")
# Inline count should mention AKC session snapshots
assert "AKC voice session snapshots" in html
# None of the session transcripts should be pasted inline on the domain
# page (they're provenance, linked via /wiki/activity)
assert "Real transcript number 0" not in html
def test_homepage_stats_exclude_ambient_memory(tmp_data_dir):
"""Homepage system-stats line shows real memory count, pushes ambient
counts into a dimmed sub-segment."""
_init_all()
# 2 real memories + 3 ambient sessions + 1 silent junk
create_memory("knowledge", "Real fact 1", project="p05", confidence=0.8)
create_memory("knowledge", "Real fact 2", project="p05", confidence=0.8)
for i in range(3):
create_memory(
"episodic",
f"AKC voice session s{i} (gen-00{i})\nReal transcript x",
project="p05", confidence=0.7,
domain_tags=["session", "akc", "voice"],
)
create_memory(
"episodic",
"AKC voice session silent (gen-099)\nDuration: 30s, 0 captures\n"
"\n## Transcript\n(no transcript)\n",
project="p05", confidence=0.7,
domain_tags=["session", "akc", "voice"],
)
html = render_homepage()
assert "3 AKC session snapshots" in html
assert "low-signal hidden" in html
# Main count reflects only real knowledge
assert "2 memories" in html
def test_low_signal_predicate_catches_known_patterns():
from atocore.engineering.wiki import _is_low_signal_memory, _is_akc_session_memory
from dataclasses import dataclass
@dataclass
class M:
content: str = ""
domain_tags: list = None
# Explicit empty-transcript — low signal
assert _is_low_signal_memory(M(content="AKC voice session x\n## Transcript\n(no transcript)\n"))
# E2E test pollution — low signal
assert _is_low_signal_memory(M(content="IMG integration test — synthetic session"))
assert _is_low_signal_memory(M(content="synthetic AKC integration session"))
# Real knowledge — NOT low signal
assert not _is_low_signal_memory(M(content="The CGH is mounted to the fold mirror via…"))
# AKC session tag predicate
assert _is_akc_session_memory(M(content="anything", domain_tags=["session", "akc", "voice"]))
assert _is_akc_session_memory(M(content="AKC voice session abc"))
assert not _is_akc_session_memory(M(content="Real fact", domain_tags=["optics"]))