feat: "Make It Actually Useful" sprint — observability + Phase 10

Pipeline observability:
- Retrieval harness runs nightly (Step E in batch-extract.sh)
- Pipeline summary persisted to project state after each run
  (pipeline_last_run, pipeline_summary, retrieval_harness_result)
- Dashboard enhanced: interaction total + by_client, pipeline health
  (last_run, hours_since, harness results, triage stats), dynamic
  project list from registry

Phase 10 — reinforcement-based auto-promotion:
- auto_promote_reinforced(): candidates with reference_count >= 3 and
  confidence >= 0.7 auto-graduate to active
- expire_stale_candidates(): candidates unreinforced for 14+ days
  auto-rejected to prevent unbounded queue growth
- Both wired into nightly cron (Step B2)
- Batch script: scripts/auto_promote_reinforced.py (--dry-run support)

Knowledge seeding:
- scripts/seed_project_state.py: 26 curated Trusted Project State
  entries across p04-gigabit, p05-interferometer, p06-polisher,
  atomizer-v2, abb-space, atocore (decisions, requirements, facts,
  contacts, milestones)

Tests: 299 → 303 (4 new Phase 10 tests)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-16 13:59:12 -04:00
parent b687e7fa6f
commit 775960c8c8
6 changed files with 570 additions and 20 deletions

View File

@@ -929,11 +929,14 @@ def api_dashboard() -> dict:
"""One-shot system observability dashboard.
Returns memory counts by type/project/status, project state
entry counts, recent interaction volume, and extraction pipeline
entry counts, interaction volume by client, pipeline health
(harness, triage stats, last run), and extraction pipeline
status — everything an operator needs to understand AtoCore's
health beyond the basic /health endpoint.
"""
import json as _json
from collections import Counter
from datetime import datetime as _dt, timezone as _tz
all_memories = get_memories(active_only=False, limit=500)
active = [m for m in all_memories if m.status == "active"]
@@ -943,27 +946,81 @@ def api_dashboard() -> dict:
project_counts = dict(Counter(m.project or "(none)" for m in active))
reinforced = [m for m in active if m.reference_count > 0]
interactions = list_interactions(limit=1)
recent_interaction = interactions[0].created_at if interactions else None
# Interaction stats — total + by_client from DB directly
interaction_stats: dict = {"most_recent": None, "total": 0, "by_client": {}}
try:
from atocore.models.database import get_connection as _gc
# Extraction pipeline status
extract_state = {}
with _gc() as conn:
row = conn.execute("SELECT count(*) FROM interactions").fetchone()
interaction_stats["total"] = row[0] if row else 0
rows = conn.execute(
"SELECT client, count(*) FROM interactions GROUP BY client"
).fetchall()
interaction_stats["by_client"] = {r[0]: r[1] for r in rows}
row = conn.execute(
"SELECT created_at FROM interactions ORDER BY created_at DESC LIMIT 1"
).fetchone()
interaction_stats["most_recent"] = row[0] if row else None
except Exception:
interactions = list_interactions(limit=1)
interaction_stats["most_recent"] = (
interactions[0].created_at if interactions else None
)
# Pipeline health from project state
pipeline: dict = {}
extract_state: dict = {}
try:
state_entries = get_state("atocore")
for entry in state_entries:
if entry.category == "status" and entry.key == "last_extract_batch_run":
if entry.category != "status":
continue
if entry.key == "last_extract_batch_run":
extract_state["last_run"] = entry.value
elif entry.key == "pipeline_last_run":
pipeline["last_run"] = entry.value
try:
last = _dt.fromisoformat(entry.value.replace("Z", "+00:00"))
delta = _dt.now(_tz.utc) - last
pipeline["hours_since_last_run"] = round(
delta.total_seconds() / 3600, 1
)
except Exception:
pass
elif entry.key == "pipeline_summary":
try:
pipeline["summary"] = _json.loads(entry.value)
except Exception:
pipeline["summary_raw"] = entry.value
elif entry.key == "retrieval_harness_result":
try:
pipeline["harness"] = _json.loads(entry.value)
except Exception:
pipeline["harness_raw"] = entry.value
except Exception:
pass
# Project state counts
# Project state counts — include all registered projects
ps_counts = {}
for proj_id in ["p04-gigabit", "p05-interferometer", "p06-polisher", "atocore"]:
try:
entries = get_state(proj_id)
ps_counts[proj_id] = len(entries)
except Exception:
pass
try:
from atocore.projects.registry import load_project_registry as _lpr
for proj in _lpr():
try:
entries = get_state(proj.project_id)
ps_counts[proj.project_id] = len(entries)
except Exception:
pass
except Exception:
for proj_id in [
"p04-gigabit", "p05-interferometer", "p06-polisher", "atocore",
]:
try:
entries = get_state(proj_id)
ps_counts[proj_id] = len(entries)
except Exception:
pass
return {
"memories": {
@@ -977,10 +1034,9 @@ def api_dashboard() -> dict:
"counts": ps_counts,
"total": sum(ps_counts.values()),
},
"interactions": {
"most_recent": recent_interaction,
},
"interactions": interaction_stats,
"extraction_pipeline": extract_state,
"pipeline": pipeline,
}

View File

@@ -340,6 +340,84 @@ def reinforce_memory(
return True, old_confidence, new_confidence
def auto_promote_reinforced(
min_reference_count: int = 3,
min_confidence: float = 0.7,
max_age_days: int = 14,
) -> list[str]:
"""Auto-promote candidate memories with strong reinforcement signals.
Phase 10: memories that have been reinforced by multiple interactions
graduate from candidate to active without human review. This rewards
knowledge that the system keeps referencing organically.
Returns a list of promoted memory IDs.
"""
from datetime import timedelta
cutoff = (
datetime.now(timezone.utc) - timedelta(days=max_age_days)
).strftime("%Y-%m-%d %H:%M:%S")
promoted: list[str] = []
with get_connection() as conn:
rows = conn.execute(
"SELECT id, content, memory_type, project, confidence, "
"reference_count FROM memories "
"WHERE status = 'candidate' "
"AND COALESCE(reference_count, 0) >= ? "
"AND confidence >= ? "
"AND last_referenced_at >= ?",
(min_reference_count, min_confidence, cutoff),
).fetchall()
for row in rows:
mid = row["id"]
ok = promote_memory(mid)
if ok:
promoted.append(mid)
log.info(
"memory_auto_promoted",
memory_id=mid,
memory_type=row["memory_type"],
project=row["project"] or "(global)",
reference_count=row["reference_count"],
confidence=round(row["confidence"], 3),
)
return promoted
def expire_stale_candidates(
max_age_days: int = 14,
) -> list[str]:
"""Reject candidate memories that sat in queue too long unreinforced.
Candidates older than ``max_age_days`` with zero reinforcement are
auto-rejected to prevent unbounded queue growth. Returns rejected IDs.
"""
from datetime import timedelta
cutoff = (
datetime.now(timezone.utc) - timedelta(days=max_age_days)
).strftime("%Y-%m-%d %H:%M:%S")
expired: list[str] = []
with get_connection() as conn:
rows = conn.execute(
"SELECT id FROM memories "
"WHERE status = 'candidate' "
"AND COALESCE(reference_count, 0) = 0 "
"AND created_at < ?",
(cutoff,),
).fetchall()
for row in rows:
mid = row["id"]
ok = reject_candidate_memory(mid)
if ok:
expired.append(mid)
log.info("memory_expired", memory_id=mid)
return expired
def get_memories_for_context(
memory_types: list[str] | None = None,
project: str | None = None,