feat: "Make It Actually Useful" sprint — observability + Phase 10

Pipeline observability:
- Retrieval harness runs nightly (Step E in batch-extract.sh)
- Pipeline summary persisted to project state after each run
  (pipeline_last_run, pipeline_summary, retrieval_harness_result)
- Dashboard enhanced: interaction total + by_client, pipeline health
  (last_run, hours_since, harness results, triage stats), dynamic
  project list from registry

Phase 10 — reinforcement-based auto-promotion:
- auto_promote_reinforced(): candidates with reference_count >= 3 and
  confidence >= 0.7 auto-graduate to active
- expire_stale_candidates(): candidates unreinforced for 14+ days
  auto-rejected to prevent unbounded queue growth
- Both wired into nightly cron (Step B2)
- Batch script: scripts/auto_promote_reinforced.py (--dry-run support)

Knowledge seeding:
- scripts/seed_project_state.py: 26 curated Trusted Project State
  entries across p04-gigabit, p05-interferometer, p06-polisher,
  atomizer-v2, abb-space, atocore (decisions, requirements, facts,
  contacts, milestones)

Tests: 299 → 303 (4 new Phase 10 tests)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-16 13:59:12 -04:00
parent b687e7fa6f
commit 775960c8c8
6 changed files with 570 additions and 20 deletions

View File

@@ -34,22 +34,36 @@ export PYTHONPATH="$APP_DIR/src:${PYTHONPATH:-}"
log "=== AtoCore batch extraction + triage starting ==="
log "URL=$ATOCORE_URL LIMIT=$LIMIT"
# --- Pipeline stats accumulator ---
EXTRACT_OUT=""
TRIAGE_OUT=""
HARNESS_OUT=""
# Step A: Extract candidates from recent interactions
log "Step A: LLM extraction"
python3 "$APP_DIR/scripts/batch_llm_extract_live.py" \
EXTRACT_OUT=$(python3 "$APP_DIR/scripts/batch_llm_extract_live.py" \
--base-url "$ATOCORE_URL" \
--limit "$LIMIT" \
2>&1 || {
2>&1) || {
log "WARN: batch extraction failed (non-blocking)"
}
echo "$EXTRACT_OUT"
# Step B: Auto-triage candidates in the queue
log "Step B: auto-triage"
python3 "$APP_DIR/scripts/auto_triage.py" \
TRIAGE_OUT=$(python3 "$APP_DIR/scripts/auto_triage.py" \
--base-url "$ATOCORE_URL" \
2>&1 || {
2>&1) || {
log "WARN: auto-triage failed (non-blocking)"
}
echo "$TRIAGE_OUT"
# Step B2: Auto-promote reinforced candidates + expire stale ones
log "Step B2: auto-promote + expire"
python3 "$APP_DIR/scripts/auto_promote_reinforced.py" \
2>&1 || {
log "WARN: auto-promote/expire failed (non-blocking)"
}
# Step C: Weekly synthesis (Sundays only)
if [[ "$(date -u +%u)" == "7" ]]; then
@@ -66,4 +80,73 @@ if [[ "$(date -u +%u)" == "7" ]]; then
2>&1 || true
fi
# Step E: Retrieval harness (daily)
log "Step E: retrieval harness"
HARNESS_OUT=$(python3 "$APP_DIR/scripts/retrieval_eval.py" \
--json \
--base-url "$ATOCORE_URL" \
2>&1) || {
log "WARN: retrieval harness failed (non-blocking)"
}
echo "$HARNESS_OUT"
# Step F: Persist pipeline summary to project state
log "Step F: pipeline summary"
python3 -c "
import json, urllib.request, re, sys
base = '$ATOCORE_URL'
ts = '$TIMESTAMP'
def post_state(key, value):
body = json.dumps({
'project': 'atocore', 'category': 'status',
'key': key, 'value': value, 'source': 'nightly pipeline',
}).encode()
req = urllib.request.Request(
f'{base}/project/state', data=body,
headers={'Content-Type': 'application/json'}, method='POST',
)
try:
urllib.request.urlopen(req, timeout=10)
except Exception as e:
print(f'WARN: failed to persist {key}: {e}', file=sys.stderr)
# Parse harness JSON
harness = {}
try:
harness = json.loads('''$HARNESS_OUT''')
post_state('retrieval_harness_result', json.dumps({
'passed': harness.get('passed', 0),
'total': harness.get('total', 0),
'failures': [f['name'] for f in harness.get('fixtures', []) if not f.get('ok')],
'run_at': ts,
}))
p, t = harness.get('passed', '?'), harness.get('total', '?')
print(f'Harness: {p}/{t}')
except Exception:
print('WARN: could not parse harness output')
# Parse triage counts from stdout
triage_out = '''$TRIAGE_OUT'''
promoted = len(re.findall(r'promoted', triage_out, re.IGNORECASE))
rejected = len(re.findall(r'rejected', triage_out, re.IGNORECASE))
needs_human = len(re.findall(r'needs.human', triage_out, re.IGNORECASE))
# Build summary
summary = {
'run_at': ts,
'harness_passed': harness.get('passed', -1),
'harness_total': harness.get('total', -1),
'triage_promoted': promoted,
'triage_rejected': rejected,
'triage_needs_human': needs_human,
}
post_state('pipeline_last_run', ts)
post_state('pipeline_summary', json.dumps(summary))
print(f'Pipeline summary persisted: {json.dumps(summary)}')
" 2>&1 || {
log "WARN: pipeline summary persistence failed (non-blocking)"
}
log "=== AtoCore batch extraction + triage complete ==="

View File

@@ -0,0 +1,79 @@
#!/usr/bin/env python3
"""Auto-promote reinforced candidates + expire stale ones.
Phase 10: reinforcement-based auto-promotion. Candidates referenced
by 3+ interactions with confidence >= 0.7 graduate to active.
Candidates unreinforced for 14+ days are auto-rejected.
Usage:
python3 scripts/auto_promote_reinforced.py [--base-url URL] [--dry-run]
"""
from __future__ import annotations
import argparse
import json
import os
import sys
# Allow importing from src/ when run from repo root
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
from atocore.memory.service import auto_promote_reinforced, expire_stale_candidates
def main() -> None:
parser = argparse.ArgumentParser(description="Auto-promote + expire candidates")
parser.add_argument("--dry-run", action="store_true", help="Report only, don't change anything")
parser.add_argument("--min-refs", type=int, default=3, help="Min reference_count for promotion")
parser.add_argument("--min-confidence", type=float, default=0.7, help="Min confidence for promotion")
parser.add_argument("--expire-days", type=int, default=14, help="Days before unreinforced candidates expire")
args = parser.parse_args()
if args.dry_run:
print("DRY RUN — no changes will be made")
# For dry-run, query directly and report
from atocore.models.database import get_connection
from datetime import datetime, timedelta, timezone
cutoff_promote = (datetime.now(timezone.utc) - timedelta(days=args.expire_days)).strftime("%Y-%m-%d %H:%M:%S")
cutoff_expire = cutoff_promote
with get_connection() as conn:
promotable = conn.execute(
"SELECT id, content, memory_type, project, confidence, reference_count "
"FROM memories WHERE status = 'candidate' "
"AND COALESCE(reference_count, 0) >= ? AND confidence >= ? "
"AND last_referenced_at >= ?",
(args.min_refs, args.min_confidence, cutoff_promote),
).fetchall()
expirable = conn.execute(
"SELECT id, content, memory_type, project "
"FROM memories WHERE status = 'candidate' "
"AND COALESCE(reference_count, 0) = 0 AND created_at < ?",
(cutoff_expire,),
).fetchall()
print(f"\nWould promote {len(promotable)} candidates:")
for r in promotable:
print(f" [{r['memory_type']}] refs={r['reference_count']} conf={r['confidence']:.2f} | {r['content'][:80]}...")
print(f"\nWould expire {len(expirable)} stale candidates:")
for r in expirable:
print(f" [{r['memory_type']}] {r['project'] or 'global'} | {r['content'][:80]}...")
return
promoted = auto_promote_reinforced(
min_reference_count=args.min_refs,
min_confidence=args.min_confidence,
)
expired = expire_stale_candidates(max_age_days=args.expire_days)
print(f"promoted={len(promoted)} expired={len(expired)}")
if promoted:
print(f"Promoted IDs: {promoted}")
if expired:
print(f"Expired IDs: {expired}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,159 @@
#!/usr/bin/env python3
"""Seed Trusted Project State entries for all active projects.
Populates the project_state table with curated decisions, requirements,
facts, contacts, and milestones so context packs have real content
in the highest-trust tier.
Usage:
python3 scripts/seed_project_state.py --base-url http://dalidou:8100
python3 scripts/seed_project_state.py --base-url http://dalidou:8100 --dry-run
"""
from __future__ import annotations
import argparse
import json
import urllib.request
import sys
# Each entry: (project, category, key, value, source)
SEED_ENTRIES: list[tuple[str, str, str, str, str]] = [
# ---- p04-gigabit (GigaBIT M1 1.2m Primary Mirror) ----
("p04-gigabit", "fact", "mirror-spec",
"1.2m borosilicate primary mirror for GigaBIT telescope. F/1.5, lightweight isogrid back structure.",
"CDR docs + vault"),
("p04-gigabit", "decision", "back-structure",
"Option B selected: conical isogrid back structure with variable rib density. Chosen over flat-back for stiffness-to-weight ratio.",
"CDR 2026-01"),
("p04-gigabit", "decision", "polishing-vendor",
"ABB Space (formerly INO) selected as polishing vendor. Contract includes computer-controlled polishing (CCP) and ion beam figuring (IBF).",
"Entente de service 2026-01"),
("p04-gigabit", "requirement", "surface-quality",
"Surface figure accuracy: < 25nm RMS after final figuring. Microroughness: < 2nm RMS.",
"CDR requirements"),
("p04-gigabit", "contact", "abb-space",
"ABB Space (INO), Quebec City. Primary contact for mirror polishing, CCP, and IBF. Project lead: coordinating FDR deliverables.",
"vendor records"),
("p04-gigabit", "milestone", "fdr",
"Final Design Review (FDR) in preparation. Deliverables include interface drawings, thermal analysis, and updated error budget.",
"project timeline"),
# ---- p05-interferometer (Fullum Interferometer) ----
("p05-interferometer", "fact", "system-overview",
"Custom Fizeau interferometer for in-situ metrology of large optics. Designed for the Fullum observatory polishing facility.",
"vault docs"),
("p05-interferometer", "decision", "cgh-design",
"Computer-generated hologram (CGH) selected for null testing of the 1.2m mirror. Vendor: Diffraction International.",
"vendor correspondence"),
("p05-interferometer", "requirement", "measurement-accuracy",
"Measurement accuracy target: lambda/20 (< 30nm PV) for surface figure verification.",
"system requirements"),
("p05-interferometer", "fact", "laser-source",
"HeNe laser source at 632.8nm. Beam expansion to cover full 1.2m aperture via diverger + CGH.",
"optical design docs"),
("p05-interferometer", "contact", "diffraction-intl",
"Diffraction International: CGH vendor. Fabricates the computer-generated hologram for null testing.",
"vendor records"),
# ---- p06-polisher (Polisher Suite / P11-Polisher-Fullum) ----
("p06-polisher", "fact", "suite-overview",
"Integrated CNC polishing suite for the Fullum observatory. Includes 3-axis polishing machine, metrology integration, and real-time process control.",
"vault docs"),
("p06-polisher", "decision", "control-architecture",
"Beckhoff TwinCAT 3 selected for real-time motion control. EtherCAT fieldbus for servo drives and I/O.",
"architecture docs"),
("p06-polisher", "decision", "firmware-split",
"Firmware split into safety layer (PLC-level interlocks) and application layer (trajectory generation, adaptive dwell-time).",
"architecture docs"),
("p06-polisher", "requirement", "axis-travel",
"Z-axis: 200mm travel for tool engagement. X/Y: covers 1.2m mirror diameter plus overshoot margin.",
"mechanical requirements"),
("p06-polisher", "fact", "telemetry",
"Real-time telemetry via MQTT. Metrics: spindle RPM, force sensor, temperature probes, position feedback at 1kHz.",
"control design docs"),
("p06-polisher", "contact", "fullum-observatory",
"Fullum Observatory: site where the polishing suite will be installed. Provides infrastructure (power, vibration isolation, clean environment).",
"project records"),
# ---- atomizer-v2 ----
("atomizer-v2", "fact", "product-overview",
"Atomizer V2: internal project management and multi-agent orchestration platform. War-room based task coordination.",
"repo docs"),
("atomizer-v2", "decision", "projects-first-architecture",
"Migration to projects-first architecture: each project is a workspace with its own agents, tasks, and knowledge.",
"war-room-migration-plan-v2.md"),
# ---- abb-space (P08) ----
("abb-space", "fact", "contract-overview",
"ABB Space mirror polishing contract. Phase 1: spherical mirror polishing (200mm). Schott Zerodur substrate.",
"quotes + correspondence"),
("abb-space", "contact", "schott",
"Schott AG: substrate supplier for Zerodur mirror blanks. Quote received for 200mm blank.",
"vendor records"),
# ---- atocore ----
("atocore", "fact", "architecture",
"AtoCore: runtime memory and knowledge layer. FastAPI + SQLite + ChromaDB. Hosted on Dalidou (Docker). Nightly pipeline: backup, extract, triage, synthesis.",
"codebase"),
("atocore", "decision", "no-api-keys",
"No API keys allowed in AtoCore. LLM-assisted features use OAuth via 'claude -p' CLI or equivalent CLI-authenticated paths.",
"DEV-LEDGER 2026-04-12"),
("atocore", "decision", "storage-separation",
"Human-readable sources (vault, drive) and machine operational storage (SQLite, ChromaDB) must remain separate. Machine DB is derived state.",
"AGENTS.md"),
("atocore", "decision", "extraction-off-hot-path",
"Extraction stays off the capture hot path. Batch/manual only. Never block interaction recording with extraction.",
"DEV-LEDGER 2026-04-11"),
]
def main() -> None:
parser = argparse.ArgumentParser(description="Seed Trusted Project State")
parser.add_argument("--base-url", default="http://dalidou:8100")
parser.add_argument("--dry-run", action="store_true")
args = parser.parse_args()
base = args.base_url.rstrip("/")
created = 0
skipped = 0
errors = 0
for project, category, key, value, source in SEED_ENTRIES:
if args.dry_run:
print(f" [DRY] {project}/{category}/{key}: {value[:60]}...")
created += 1
continue
body = json.dumps({
"project": project,
"category": category,
"key": key,
"value": value,
"source": source,
"confidence": 1.0,
}).encode()
req = urllib.request.Request(
f"{base}/project/state",
data=body,
headers={"Content-Type": "application/json"},
method="POST",
)
try:
resp = urllib.request.urlopen(req, timeout=10)
result = json.loads(resp.read())
if result.get("created"):
created += 1
print(f" + {project}/{category}/{key}")
else:
skipped += 1
print(f" = {project}/{category}/{key} (already exists)")
except Exception as e:
errors += 1
print(f" ! {project}/{category}/{key}: {e}", file=sys.stderr)
print(f"\nDone: {created} created, {skipped} skipped, {errors} errors")
if __name__ == "__main__":
main()

View File

@@ -929,11 +929,14 @@ def api_dashboard() -> dict:
"""One-shot system observability dashboard.
Returns memory counts by type/project/status, project state
entry counts, recent interaction volume, and extraction pipeline
entry counts, interaction volume by client, pipeline health
(harness, triage stats, last run), and extraction pipeline
status — everything an operator needs to understand AtoCore's
health beyond the basic /health endpoint.
"""
import json as _json
from collections import Counter
from datetime import datetime as _dt, timezone as _tz
all_memories = get_memories(active_only=False, limit=500)
active = [m for m in all_memories if m.status == "active"]
@@ -943,27 +946,81 @@ def api_dashboard() -> dict:
project_counts = dict(Counter(m.project or "(none)" for m in active))
reinforced = [m for m in active if m.reference_count > 0]
interactions = list_interactions(limit=1)
recent_interaction = interactions[0].created_at if interactions else None
# Interaction stats — total + by_client from DB directly
interaction_stats: dict = {"most_recent": None, "total": 0, "by_client": {}}
try:
from atocore.models.database import get_connection as _gc
# Extraction pipeline status
extract_state = {}
with _gc() as conn:
row = conn.execute("SELECT count(*) FROM interactions").fetchone()
interaction_stats["total"] = row[0] if row else 0
rows = conn.execute(
"SELECT client, count(*) FROM interactions GROUP BY client"
).fetchall()
interaction_stats["by_client"] = {r[0]: r[1] for r in rows}
row = conn.execute(
"SELECT created_at FROM interactions ORDER BY created_at DESC LIMIT 1"
).fetchone()
interaction_stats["most_recent"] = row[0] if row else None
except Exception:
interactions = list_interactions(limit=1)
interaction_stats["most_recent"] = (
interactions[0].created_at if interactions else None
)
# Pipeline health from project state
pipeline: dict = {}
extract_state: dict = {}
try:
state_entries = get_state("atocore")
for entry in state_entries:
if entry.category == "status" and entry.key == "last_extract_batch_run":
if entry.category != "status":
continue
if entry.key == "last_extract_batch_run":
extract_state["last_run"] = entry.value
elif entry.key == "pipeline_last_run":
pipeline["last_run"] = entry.value
try:
last = _dt.fromisoformat(entry.value.replace("Z", "+00:00"))
delta = _dt.now(_tz.utc) - last
pipeline["hours_since_last_run"] = round(
delta.total_seconds() / 3600, 1
)
except Exception:
pass
elif entry.key == "pipeline_summary":
try:
pipeline["summary"] = _json.loads(entry.value)
except Exception:
pipeline["summary_raw"] = entry.value
elif entry.key == "retrieval_harness_result":
try:
pipeline["harness"] = _json.loads(entry.value)
except Exception:
pipeline["harness_raw"] = entry.value
except Exception:
pass
# Project state counts
# Project state counts — include all registered projects
ps_counts = {}
for proj_id in ["p04-gigabit", "p05-interferometer", "p06-polisher", "atocore"]:
try:
entries = get_state(proj_id)
ps_counts[proj_id] = len(entries)
except Exception:
pass
try:
from atocore.projects.registry import load_project_registry as _lpr
for proj in _lpr():
try:
entries = get_state(proj.project_id)
ps_counts[proj.project_id] = len(entries)
except Exception:
pass
except Exception:
for proj_id in [
"p04-gigabit", "p05-interferometer", "p06-polisher", "atocore",
]:
try:
entries = get_state(proj_id)
ps_counts[proj_id] = len(entries)
except Exception:
pass
return {
"memories": {
@@ -977,10 +1034,9 @@ def api_dashboard() -> dict:
"counts": ps_counts,
"total": sum(ps_counts.values()),
},
"interactions": {
"most_recent": recent_interaction,
},
"interactions": interaction_stats,
"extraction_pipeline": extract_state,
"pipeline": pipeline,
}

View File

@@ -340,6 +340,84 @@ def reinforce_memory(
return True, old_confidence, new_confidence
def auto_promote_reinforced(
min_reference_count: int = 3,
min_confidence: float = 0.7,
max_age_days: int = 14,
) -> list[str]:
"""Auto-promote candidate memories with strong reinforcement signals.
Phase 10: memories that have been reinforced by multiple interactions
graduate from candidate to active without human review. This rewards
knowledge that the system keeps referencing organically.
Returns a list of promoted memory IDs.
"""
from datetime import timedelta
cutoff = (
datetime.now(timezone.utc) - timedelta(days=max_age_days)
).strftime("%Y-%m-%d %H:%M:%S")
promoted: list[str] = []
with get_connection() as conn:
rows = conn.execute(
"SELECT id, content, memory_type, project, confidence, "
"reference_count FROM memories "
"WHERE status = 'candidate' "
"AND COALESCE(reference_count, 0) >= ? "
"AND confidence >= ? "
"AND last_referenced_at >= ?",
(min_reference_count, min_confidence, cutoff),
).fetchall()
for row in rows:
mid = row["id"]
ok = promote_memory(mid)
if ok:
promoted.append(mid)
log.info(
"memory_auto_promoted",
memory_id=mid,
memory_type=row["memory_type"],
project=row["project"] or "(global)",
reference_count=row["reference_count"],
confidence=round(row["confidence"], 3),
)
return promoted
def expire_stale_candidates(
max_age_days: int = 14,
) -> list[str]:
"""Reject candidate memories that sat in queue too long unreinforced.
Candidates older than ``max_age_days`` with zero reinforcement are
auto-rejected to prevent unbounded queue growth. Returns rejected IDs.
"""
from datetime import timedelta
cutoff = (
datetime.now(timezone.utc) - timedelta(days=max_age_days)
).strftime("%Y-%m-%d %H:%M:%S")
expired: list[str] = []
with get_connection() as conn:
rows = conn.execute(
"SELECT id FROM memories "
"WHERE status = 'candidate' "
"AND COALESCE(reference_count, 0) = 0 "
"AND created_at < ?",
(cutoff,),
).fetchall()
for row in rows:
mid = row["id"]
ok = reject_candidate_memory(mid)
if ok:
expired.append(mid)
log.info("memory_expired", memory_id=mid)
return expired
def get_memories_for_context(
memory_types: list[str] | None = None,
project: str | None = None,

View File

@@ -186,3 +186,98 @@ def test_memories_for_context_empty(isolated_db):
text, chars = get_memories_for_context()
assert text == ""
assert chars == 0
# --- Phase 10: auto-promotion + candidate expiry ---
def _get_memory_by_id(memory_id):
"""Helper: fetch a single memory by ID."""
from atocore.models.database import get_connection
with get_connection() as conn:
row = conn.execute("SELECT * FROM memories WHERE id = ?", (memory_id,)).fetchone()
return dict(row) if row else None
def test_auto_promote_reinforced_basic(isolated_db):
from atocore.memory.service import (
auto_promote_reinforced,
create_memory,
reinforce_memory,
)
mem_obj = create_memory("knowledge", "Zerodur has near-zero CTE", status="candidate", confidence=0.7)
mid = mem_obj.id
# reinforce_memory only touches active memories, so we need to
# promote first to reinforce, then demote back to candidate —
# OR just bump reference_count + last_referenced_at directly
from atocore.models.database import get_connection
from datetime import datetime, timezone
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
with get_connection() as conn:
conn.execute(
"UPDATE memories SET reference_count = 3, last_referenced_at = ? WHERE id = ?",
(now, mid),
)
promoted = auto_promote_reinforced(min_reference_count=3, min_confidence=0.7)
assert mid in promoted
mem = _get_memory_by_id(mid)
assert mem["status"] == "active"
def test_auto_promote_reinforced_ignores_low_refs(isolated_db):
from atocore.memory.service import auto_promote_reinforced, create_memory
from atocore.models.database import get_connection
from datetime import datetime, timezone
mem_obj = create_memory("knowledge", "Some knowledge", status="candidate", confidence=0.7)
mid = mem_obj.id
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
with get_connection() as conn:
conn.execute(
"UPDATE memories SET reference_count = 1, last_referenced_at = ? WHERE id = ?",
(now, mid),
)
promoted = auto_promote_reinforced(min_reference_count=3, min_confidence=0.7)
assert mid not in promoted
mem = _get_memory_by_id(mid)
assert mem["status"] == "candidate"
def test_expire_stale_candidates(isolated_db):
from atocore.memory.service import create_memory, expire_stale_candidates
from atocore.models.database import get_connection
mem_obj = create_memory("knowledge", "Old unreferenced fact", status="candidate")
mid = mem_obj.id
with get_connection() as conn:
conn.execute(
"UPDATE memories SET created_at = datetime('now', '-30 days') WHERE id = ?",
(mid,),
)
expired = expire_stale_candidates(max_age_days=14)
assert mid in expired
mem = _get_memory_by_id(mid)
assert mem["status"] == "invalid"
def test_expire_stale_candidates_keeps_reinforced(isolated_db):
from atocore.memory.service import create_memory, expire_stale_candidates
from atocore.models.database import get_connection
mem_obj = create_memory("knowledge", "Referenced fact", status="candidate")
mid = mem_obj.id
with get_connection() as conn:
conn.execute(
"UPDATE memories SET reference_count = 1, "
"created_at = datetime('now', '-30 days') WHERE id = ?",
(mid,),
)
expired = expire_stale_candidates(max_age_days=14)
assert mid not in expired
mem = _get_memory_by_id(mid)
assert mem["status"] == "candidate"