feat: "Make It Actually Useful" sprint — observability + Phase 10

Pipeline observability:
- Retrieval harness runs nightly (Step E in batch-extract.sh)
- Pipeline summary persisted to project state after each run
  (pipeline_last_run, pipeline_summary, retrieval_harness_result)
- Dashboard enhanced: interaction total + by_client, pipeline health
  (last_run, hours_since, harness results, triage stats), dynamic
  project list from registry

Phase 10 — reinforcement-based auto-promotion:
- auto_promote_reinforced(): candidates with reference_count >= 3 and
  confidence >= 0.7 auto-graduate to active
- expire_stale_candidates(): candidates unreinforced for 14+ days
  auto-rejected to prevent unbounded queue growth
- Both wired into nightly cron (Step B2)
- Batch script: scripts/auto_promote_reinforced.py (--dry-run support)

Knowledge seeding:
- scripts/seed_project_state.py: 26 curated Trusted Project State
  entries across p04-gigabit, p05-interferometer, p06-polisher,
  atomizer-v2, abb-space, atocore (decisions, requirements, facts,
  contacts, milestones)

Tests: 299 → 303 (4 new Phase 10 tests)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-16 13:59:12 -04:00
parent b687e7fa6f
commit 775960c8c8
6 changed files with 570 additions and 20 deletions

View File

@@ -929,11 +929,14 @@ def api_dashboard() -> dict:
"""One-shot system observability dashboard.
Returns memory counts by type/project/status, project state
entry counts, recent interaction volume, and extraction pipeline
entry counts, interaction volume by client, pipeline health
(harness, triage stats, last run), and extraction pipeline
status — everything an operator needs to understand AtoCore's
health beyond the basic /health endpoint.
"""
import json as _json
from collections import Counter
from datetime import datetime as _dt, timezone as _tz
all_memories = get_memories(active_only=False, limit=500)
active = [m for m in all_memories if m.status == "active"]
@@ -943,27 +946,81 @@ def api_dashboard() -> dict:
project_counts = dict(Counter(m.project or "(none)" for m in active))
reinforced = [m for m in active if m.reference_count > 0]
interactions = list_interactions(limit=1)
recent_interaction = interactions[0].created_at if interactions else None
# Interaction stats — total + by_client from DB directly
interaction_stats: dict = {"most_recent": None, "total": 0, "by_client": {}}
try:
from atocore.models.database import get_connection as _gc
# Extraction pipeline status
extract_state = {}
with _gc() as conn:
row = conn.execute("SELECT count(*) FROM interactions").fetchone()
interaction_stats["total"] = row[0] if row else 0
rows = conn.execute(
"SELECT client, count(*) FROM interactions GROUP BY client"
).fetchall()
interaction_stats["by_client"] = {r[0]: r[1] for r in rows}
row = conn.execute(
"SELECT created_at FROM interactions ORDER BY created_at DESC LIMIT 1"
).fetchone()
interaction_stats["most_recent"] = row[0] if row else None
except Exception:
interactions = list_interactions(limit=1)
interaction_stats["most_recent"] = (
interactions[0].created_at if interactions else None
)
# Pipeline health from project state
pipeline: dict = {}
extract_state: dict = {}
try:
state_entries = get_state("atocore")
for entry in state_entries:
if entry.category == "status" and entry.key == "last_extract_batch_run":
if entry.category != "status":
continue
if entry.key == "last_extract_batch_run":
extract_state["last_run"] = entry.value
elif entry.key == "pipeline_last_run":
pipeline["last_run"] = entry.value
try:
last = _dt.fromisoformat(entry.value.replace("Z", "+00:00"))
delta = _dt.now(_tz.utc) - last
pipeline["hours_since_last_run"] = round(
delta.total_seconds() / 3600, 1
)
except Exception:
pass
elif entry.key == "pipeline_summary":
try:
pipeline["summary"] = _json.loads(entry.value)
except Exception:
pipeline["summary_raw"] = entry.value
elif entry.key == "retrieval_harness_result":
try:
pipeline["harness"] = _json.loads(entry.value)
except Exception:
pipeline["harness_raw"] = entry.value
except Exception:
pass
# Project state counts
# Project state counts — include all registered projects
ps_counts = {}
for proj_id in ["p04-gigabit", "p05-interferometer", "p06-polisher", "atocore"]:
try:
entries = get_state(proj_id)
ps_counts[proj_id] = len(entries)
except Exception:
pass
try:
from atocore.projects.registry import load_project_registry as _lpr
for proj in _lpr():
try:
entries = get_state(proj.project_id)
ps_counts[proj.project_id] = len(entries)
except Exception:
pass
except Exception:
for proj_id in [
"p04-gigabit", "p05-interferometer", "p06-polisher", "atocore",
]:
try:
entries = get_state(proj_id)
ps_counts[proj_id] = len(entries)
except Exception:
pass
return {
"memories": {
@@ -977,10 +1034,9 @@ def api_dashboard() -> dict:
"counts": ps_counts,
"total": sum(ps_counts.values()),
},
"interactions": {
"most_recent": recent_interaction,
},
"interactions": interaction_stats,
"extraction_pipeline": extract_state,
"pipeline": pipeline,
}