Compare commits
1 Commits
claude/v1-
...
akc-wiki-h
| Author | SHA1 | Date | |
|---|---|---|---|
| fbf3e9c806 |
@@ -240,10 +240,30 @@ def render_homepage() -> str:
|
|||||||
|
|
||||||
# Quick stats
|
# Quick stats
|
||||||
all_entities = get_entities(limit=500)
|
all_entities = get_entities(limit=500)
|
||||||
all_memories = get_memories(active_only=True, limit=500)
|
all_memories_raw = get_memories(active_only=True, limit=500)
|
||||||
|
# Partition real knowledge from ambient provenance so counts are honest.
|
||||||
|
# Each memory lands in exactly one bucket (low-signal takes priority).
|
||||||
|
all_memories: list = []
|
||||||
|
akc_session_count = 0
|
||||||
|
low_signal_count = 0
|
||||||
|
for _m in all_memories_raw:
|
||||||
|
if _is_low_signal_memory(_m):
|
||||||
|
low_signal_count += 1
|
||||||
|
elif _is_akc_session_memory(_m):
|
||||||
|
akc_session_count += 1
|
||||||
|
else:
|
||||||
|
all_memories.append(_m)
|
||||||
pending = get_memories(status="candidate", limit=500)
|
pending = get_memories(status="candidate", limit=500)
|
||||||
lines.append('<h2>System</h2>')
|
lines.append('<h2>System</h2>')
|
||||||
lines.append(f'<p>{len(all_entities)} entities · {len(all_memories)} active memories · {len(projects)} projects</p>')
|
lines.append(
|
||||||
|
f'<p>{len(all_entities)} entities · {len(all_memories)} memories · '
|
||||||
|
f'{len(projects)} projects'
|
||||||
|
+ (f' · <span style="color:#888;">{akc_session_count} AKC session snapshots'
|
||||||
|
+ (f", {low_signal_count} low-signal hidden" if low_signal_count else "")
|
||||||
|
+ '</span>'
|
||||||
|
if akc_session_count or low_signal_count else '')
|
||||||
|
+ '</p>'
|
||||||
|
)
|
||||||
|
|
||||||
# Triage queue prompt — surfaced prominently if non-empty
|
# Triage queue prompt — surfaced prominently if non-empty
|
||||||
if pending:
|
if pending:
|
||||||
@@ -286,6 +306,44 @@ import re as _re
|
|||||||
_WIKILINK_PATTERN = _re.compile(r"\[\[([^\[\]|]+?)(?:\|([^\[\]]+?))?\]\]")
|
_WIKILINK_PATTERN = _re.compile(r"\[\[([^\[\]|]+?)(?:\|([^\[\]]+?))?\]\]")
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------- signal/noise
|
||||||
|
# Memories with these patterns are low-signal ambient artefacts — they
|
||||||
|
# inflate lists on the homepage and domain pages without being informative.
|
||||||
|
# They remain in the DB (for provenance / audit) but are filtered from
|
||||||
|
# default browsing surfaces. Pass `include_low_signal=True` on a page query
|
||||||
|
# param to surface them.
|
||||||
|
_LOW_SIGNAL_CONTENT_PATTERNS = (
|
||||||
|
"(no transcript)", # silent-mic AKC sessions
|
||||||
|
"synthetic AKC integration", # E2E test pollution
|
||||||
|
"AKC-E2E-", # E2E test prefix in content
|
||||||
|
"AKC-IMG-TEST-", # image-upload test prefix
|
||||||
|
"IMG integration test — synthetic", # E2E narrative header
|
||||||
|
)
|
||||||
|
|
||||||
|
# AKC voice-session ambient memories follow this pattern — they're
|
||||||
|
# provenance records, not knowledge. Collapse them behind a link on domain
|
||||||
|
# pages instead of rendering each inline.
|
||||||
|
_AKC_SESSION_HEADER = "AKC voice session "
|
||||||
|
|
||||||
|
|
||||||
|
def _is_low_signal_memory(mem) -> bool:
|
||||||
|
"""True for memories whose content is known ambient/test pollution."""
|
||||||
|
content = (getattr(mem, "content", "") or "")
|
||||||
|
if not content:
|
||||||
|
return True
|
||||||
|
return any(p in content for p in _LOW_SIGNAL_CONTENT_PATTERNS)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_akc_session_memory(mem) -> bool:
|
||||||
|
"""True for AKC voice-session ambient snapshots (have value as provenance,
|
||||||
|
but shouldn't clutter topical listings)."""
|
||||||
|
content = (getattr(mem, "content", "") or "")
|
||||||
|
tags = getattr(mem, "domain_tags", None) or []
|
||||||
|
if any(t in ("session", "akc") for t in tags) and "voice" in tags:
|
||||||
|
return True
|
||||||
|
return content.startswith(_AKC_SESSION_HEADER)
|
||||||
|
|
||||||
|
|
||||||
def _resolve_wikilink(target: str, current_project: str | None) -> tuple[str, str, str]:
|
def _resolve_wikilink(target: str, current_project: str | None) -> tuple[str, str, str]:
|
||||||
"""Resolve a ``[[Name]]`` target to ``(href, css_class, extra_suffix)``.
|
"""Resolve a ``[[Name]]`` target to ``(href, css_class, extra_suffix)``.
|
||||||
|
|
||||||
@@ -874,9 +932,23 @@ def render_domain(tag: str) -> str:
|
|||||||
breadcrumbs=[("Wiki", "/wiki"), ("Domains", "")])
|
breadcrumbs=[("Wiki", "/wiki"), ("Domains", "")])
|
||||||
|
|
||||||
all_mems = get_memories(active_only=True, limit=500)
|
all_mems = get_memories(active_only=True, limit=500)
|
||||||
matching = [m for m in all_mems
|
matching_all = [m for m in all_mems
|
||||||
if any((t or "").lower() == tag for t in (m.domain_tags or []))]
|
if any((t or "").lower() == tag for t in (m.domain_tags or []))]
|
||||||
|
|
||||||
|
# Partition: low-signal test pollution is hidden entirely, ambient AKC
|
||||||
|
# session memories are collapsed (shown as a count + link to
|
||||||
|
# /wiki/activity). Priority: low-signal > session > real.
|
||||||
|
matching: list = []
|
||||||
|
akc_sessions: list = []
|
||||||
|
hidden_low_signal = 0
|
||||||
|
for m in matching_all:
|
||||||
|
if _is_low_signal_memory(m):
|
||||||
|
hidden_low_signal += 1
|
||||||
|
elif _is_akc_session_memory(m):
|
||||||
|
akc_sessions.append(m)
|
||||||
|
else:
|
||||||
|
matching.append(m)
|
||||||
|
|
||||||
# Group by project
|
# Group by project
|
||||||
by_project: dict[str, list] = {}
|
by_project: dict[str, list] = {}
|
||||||
for m in matching:
|
for m in matching:
|
||||||
@@ -884,6 +956,18 @@ def render_domain(tag: str) -> str:
|
|||||||
|
|
||||||
lines = [f'<h1>Domain: <code>{tag}</code></h1>']
|
lines = [f'<h1>Domain: <code>{tag}</code></h1>']
|
||||||
lines.append(f'<p class="meta">{len(matching)} active memories across {len(by_project)} projects</p>')
|
lines.append(f'<p class="meta">{len(matching)} active memories across {len(by_project)} projects</p>')
|
||||||
|
if akc_sessions or hidden_low_signal:
|
||||||
|
noise_bits = []
|
||||||
|
if akc_sessions:
|
||||||
|
noise_bits.append(
|
||||||
|
f'<a href="/wiki/activity">{len(akc_sessions)} AKC voice session snapshots</a>'
|
||||||
|
)
|
||||||
|
if hidden_low_signal:
|
||||||
|
noise_bits.append(f"{hidden_low_signal} low-signal memories hidden")
|
||||||
|
lines.append(
|
||||||
|
f'<p class="meta" style="font-size:0.85rem; color:#888;">'
|
||||||
|
f'Ambient provenance not listed: {" · ".join(noise_bits)}.</p>'
|
||||||
|
)
|
||||||
|
|
||||||
if not matching:
|
if not matching:
|
||||||
lines.append(
|
lines.append(
|
||||||
|
|||||||
@@ -161,3 +161,103 @@ def test_memory_detail_shows_superseded_sources(tmp_data_dir):
|
|||||||
assert html1 is not None
|
assert html1 is not None
|
||||||
assert "superseded" in html1
|
assert "superseded" in html1
|
||||||
assert "auto-dedup-tier1" in html1 # audit trail shows who merged
|
assert "auto-dedup-tier1" in html1 # audit trail shows who merged
|
||||||
|
|
||||||
|
|
||||||
|
# -------------------------------------------------- low-signal wiki filters
|
||||||
|
# Ambient AKC session memories and test pollution shouldn't dominate domain
|
||||||
|
# pages / homepage counts. These tests lock the partitioning behaviour.
|
||||||
|
|
||||||
|
def test_domain_page_hides_empty_transcript_sessions(tmp_data_dir):
|
||||||
|
"""Silent-mic AKC sessions (content has '(no transcript)') are ambient
|
||||||
|
noise — they go into the hidden count, not the main list."""
|
||||||
|
_init_all()
|
||||||
|
# One real knowledge memory with tag "optics"
|
||||||
|
create_memory(
|
||||||
|
"knowledge",
|
||||||
|
"CGH null corrector supports F/1.2 asphere testing",
|
||||||
|
project="p05", confidence=0.9, domain_tags=["optics", "cgh"],
|
||||||
|
)
|
||||||
|
# One silent AKC session with the same tag — should NOT appear
|
||||||
|
create_memory(
|
||||||
|
"episodic",
|
||||||
|
"AKC voice session abc (gen-002)\nDuration: 60s, 2 captures\n"
|
||||||
|
"\n## Transcript\n(no transcript)\n",
|
||||||
|
project="p05", confidence=0.7,
|
||||||
|
domain_tags=["optics", "session", "akc", "voice"],
|
||||||
|
)
|
||||||
|
html = render_domain("optics")
|
||||||
|
assert "CGH null corrector" in html
|
||||||
|
# The hidden-count banner should be present
|
||||||
|
assert "low-signal" in html or "Ambient provenance" in html
|
||||||
|
# And the empty-transcript content itself is not rendered inline
|
||||||
|
assert "(no transcript)" not in html
|
||||||
|
|
||||||
|
|
||||||
|
def test_domain_page_collapses_akc_session_snapshots(tmp_data_dir):
|
||||||
|
"""AKC voice-session memories are provenance records — count them as
|
||||||
|
a single collapsed link, don't inline every one."""
|
||||||
|
_init_all()
|
||||||
|
for i in range(5):
|
||||||
|
create_memory(
|
||||||
|
"episodic",
|
||||||
|
f"AKC voice session session-{i} (gen-00{i})\nDuration: 120s, 3 captures\n"
|
||||||
|
f"\n## Transcript\nReal transcript number {i}",
|
||||||
|
project="p05", confidence=0.7,
|
||||||
|
domain_tags=["optics", "session", "akc", "voice"],
|
||||||
|
)
|
||||||
|
html = render_domain("optics")
|
||||||
|
# Inline count should mention AKC session snapshots
|
||||||
|
assert "AKC voice session snapshots" in html
|
||||||
|
# None of the session transcripts should be pasted inline on the domain
|
||||||
|
# page (they're provenance, linked via /wiki/activity)
|
||||||
|
assert "Real transcript number 0" not in html
|
||||||
|
|
||||||
|
|
||||||
|
def test_homepage_stats_exclude_ambient_memory(tmp_data_dir):
|
||||||
|
"""Homepage system-stats line shows real memory count, pushes ambient
|
||||||
|
counts into a dimmed sub-segment."""
|
||||||
|
_init_all()
|
||||||
|
# 2 real memories + 3 ambient sessions + 1 silent junk
|
||||||
|
create_memory("knowledge", "Real fact 1", project="p05", confidence=0.8)
|
||||||
|
create_memory("knowledge", "Real fact 2", project="p05", confidence=0.8)
|
||||||
|
for i in range(3):
|
||||||
|
create_memory(
|
||||||
|
"episodic",
|
||||||
|
f"AKC voice session s{i} (gen-00{i})\nReal transcript x",
|
||||||
|
project="p05", confidence=0.7,
|
||||||
|
domain_tags=["session", "akc", "voice"],
|
||||||
|
)
|
||||||
|
create_memory(
|
||||||
|
"episodic",
|
||||||
|
"AKC voice session silent (gen-099)\nDuration: 30s, 0 captures\n"
|
||||||
|
"\n## Transcript\n(no transcript)\n",
|
||||||
|
project="p05", confidence=0.7,
|
||||||
|
domain_tags=["session", "akc", "voice"],
|
||||||
|
)
|
||||||
|
html = render_homepage()
|
||||||
|
assert "3 AKC session snapshots" in html
|
||||||
|
assert "low-signal hidden" in html
|
||||||
|
# Main count reflects only real knowledge
|
||||||
|
assert "2 memories" in html
|
||||||
|
|
||||||
|
|
||||||
|
def test_low_signal_predicate_catches_known_patterns():
|
||||||
|
from atocore.engineering.wiki import _is_low_signal_memory, _is_akc_session_memory
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class M:
|
||||||
|
content: str = ""
|
||||||
|
domain_tags: list = None
|
||||||
|
|
||||||
|
# Explicit empty-transcript — low signal
|
||||||
|
assert _is_low_signal_memory(M(content="AKC voice session x\n## Transcript\n(no transcript)\n"))
|
||||||
|
# E2E test pollution — low signal
|
||||||
|
assert _is_low_signal_memory(M(content="IMG integration test — synthetic session"))
|
||||||
|
assert _is_low_signal_memory(M(content="synthetic AKC integration session"))
|
||||||
|
# Real knowledge — NOT low signal
|
||||||
|
assert not _is_low_signal_memory(M(content="The CGH is mounted to the fold mirror via…"))
|
||||||
|
# AKC session tag predicate
|
||||||
|
assert _is_akc_session_memory(M(content="anything", domain_tags=["session", "akc", "voice"]))
|
||||||
|
assert _is_akc_session_memory(M(content="AKC voice session abc"))
|
||||||
|
assert not _is_akc_session_memory(M(content="Real fact", domain_tags=["optics"]))
|
||||||
|
|||||||
Reference in New Issue
Block a user