feat: Phase 7D — confidence decay on unreferenced cold memories

Daily job multiplies confidence by 0.97 (~2-month half-life) for active memories with reference_count=0 AND idle > 30 days. Below 0.3 → auto-supersede with audit. Reversible via reinforcement (which already bumps confidence back up). Rationale: stale memories currently rank equal to fresh ones in retrieval. Without decay, the brain accumulates obsolete facts that compete with fresh knowledge for context-pack slots. With decay, memories earn their longevity via reference. - decay_unreferenced_memories() in service.py (stdlib-only, no cron infra needed) - POST /admin/memory/decay-run endpoint - Nightly Step F4 in batch-extract.sh - Exempt: reinforced (refcount > 0), graduated, superseded, invalid - Audit row per supersession ("decayed below floor, no references"), actor="confidence-decay". Per-decay rows skipped (chatty, no human value — status change is the meaningful signal). - Configurable via env: ATOCORE_DECAY_* (exposed through endpoint body) Tests: +13 (basic decay, reinforcement protection, supersede at floor, audit trail, graduated/superseded exemption, reinforcement reversibility, threshold tuning, parameter validation, cross-run stacking). 401 → 414. Next in Phase 7: 7C tag canonicalization (weekly), then 7B contradiction detection. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 16:50:20 -04:00
parent 56d5df0ab4
commit e840ef4be3
4 changed files with 406 additions and 0 deletions
--- a/src/atocore/memory/service.py
+++ b/src/atocore/memory/service.py
@@ -691,6 +691,117 @@ def extend_reinforced_valid_until(
    return extended


+def decay_unreferenced_memories(
+    idle_days_threshold: int = 30,
+    daily_decay_factor: float = 0.97,
+    supersede_confidence_floor: float = 0.30,
+    actor: str = "confidence-decay",
+) -> dict[str, list]:
+    """Phase 7D — daily confidence decay on cold memories.
+
+    For every active, non-graduated memory with ``reference_count == 0``
+    AND whose last activity (``last_referenced_at`` if set, else
+    ``created_at``) is older than ``idle_days_threshold``: multiply
+    confidence by ``daily_decay_factor`` (0.97/day ≈ 2-month half-life).
+
+    If the decayed confidence falls below ``supersede_confidence_floor``,
+    auto-supersede the memory with note "decayed, no references".
+    Supersession is non-destructive — the row stays queryable via
+    ``status='superseded'`` for audit.
+
+    Reinforcement already bumps confidence back up, so a decayed memory
+    that later gets referenced reverses its trajectory naturally.
+
+    The job is idempotent-per-day: running it multiple times in one day
+    decays extra, but the cron runs once/day so this stays on-policy.
+    If a day's cron gets skipped, we under-decay (safe direction —
+    memories age slower, not faster, than the policy).
+
+    Returns {"decayed": [...], "superseded": [...]} with per-memory
+    before/after snapshots for audit/observability.
+    """
+    from datetime import timedelta
+
+    if not (0.0 < daily_decay_factor < 1.0):
+        raise ValueError("daily_decay_factor must be between 0 and 1 (exclusive)")
+    if not (0.0 <= supersede_confidence_floor <= 1.0):
+        raise ValueError("supersede_confidence_floor must be in [0,1]")
+
+    cutoff_dt = datetime.now(timezone.utc) - timedelta(days=idle_days_threshold)
+    cutoff_str = cutoff_dt.strftime("%Y-%m-%d %H:%M:%S")
+    now_str = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
+
+    decayed: list[dict] = []
+    superseded: list[dict] = []
+
+    with get_connection() as conn:
+        # COALESCE(last_referenced_at, created_at) is the effective "last
+        # activity" — if a memory was never reinforced, we measure age
+        # from creation. "IS NOT status graduated" is enforced to keep
+        # graduated memories (which are frozen pointers to entities)
+        # out of the decay pool.
+        rows = conn.execute(
+            "SELECT id, confidence, last_referenced_at, created_at "
+            "FROM memories "
+            "WHERE status = 'active' "
+            "AND COALESCE(reference_count, 0) = 0 "
+            "AND COALESCE(last_referenced_at, created_at) < ?",
+            (cutoff_str,),
+        ).fetchall()
+
+        for r in rows:
+            mid = r["id"]
+            old_conf = float(r["confidence"])
+            new_conf = max(0.0, old_conf * daily_decay_factor)
+
+            if new_conf < supersede_confidence_floor:
+                # Auto-supersede
+                conn.execute(
+                    "UPDATE memories SET status = 'superseded', "
+                    "confidence = ?, updated_at = ? WHERE id = ?",
+                    (new_conf, now_str, mid),
+                )
+                superseded.append({
+                    "memory_id": mid,
+                    "old_confidence": old_conf,
+                    "new_confidence": new_conf,
+                })
+            else:
+                conn.execute(
+                    "UPDATE memories SET confidence = ?, updated_at = ? WHERE id = ?",
+                    (new_conf, now_str, mid),
+                )
+                decayed.append({
+                    "memory_id": mid,
+                    "old_confidence": old_conf,
+                    "new_confidence": new_conf,
+                })
+
+    # Audit rows outside the transaction. We skip per-decay audit because
+    # it would be too chatty (potentially hundreds of rows/day for no
+    # human value); supersessions ARE audited because those are
+    # status-changing events humans may want to review.
+    for entry in superseded:
+        _audit_memory(
+            memory_id=entry["memory_id"],
+            action="superseded",
+            actor=actor,
+            before={"status": "active", "confidence": entry["old_confidence"]},
+            after={"status": "superseded", "confidence": entry["new_confidence"]},
+            note=f"decayed below floor {supersede_confidence_floor}, no references",
+        )
+
+    if decayed or superseded:
+        log.info(
+            "confidence_decay_run",
+            decayed=len(decayed),
+            superseded=len(superseded),
+            idle_days_threshold=idle_days_threshold,
+            daily_decay_factor=daily_decay_factor,
+        )
+    return {"decayed": decayed, "superseded": superseded}
+
+
 def expire_stale_candidates(
    max_age_days: int = 14,
 ) -> list[str]: