feat: Phase 7D — confidence decay on unreferenced cold memories
Daily job multiplies confidence by 0.97 (~2-month half-life) for
active memories with reference_count=0 AND idle > 30 days. Below
0.3 → auto-supersede with audit. Reversible via reinforcement
(which already bumps confidence back up).
Rationale: stale memories currently rank equal to fresh ones in
retrieval. Without decay, the brain accumulates obsolete facts
that compete with fresh knowledge for context-pack slots. With
decay, memories earn their longevity via reference.
- decay_unreferenced_memories() in service.py (stdlib-only, no cron
infra needed)
- POST /admin/memory/decay-run endpoint
- Nightly Step F4 in batch-extract.sh
- Exempt: reinforced (refcount > 0), graduated, superseded, invalid
- Audit row per supersession ("decayed below floor, no references"),
actor="confidence-decay". Per-decay rows skipped (chatty, no
human value — status change is the meaningful signal).
- Configurable via env: ATOCORE_DECAY_* (exposed through endpoint body)
Tests: +13 (basic decay, reinforcement protection, supersede at floor,
audit trail, graduated/superseded exemption, reinforcement reversibility,
threshold tuning, parameter validation, cross-run stacking).
401 → 414.
Next in Phase 7: 7C tag canonicalization (weekly), then 7B contradiction
detection.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
251
tests/test_confidence_decay.py
Normal file
251
tests/test_confidence_decay.py
Normal file
@@ -0,0 +1,251 @@
|
||||
"""Phase 7D — confidence decay tests.
|
||||
|
||||
Covers:
|
||||
- idle unreferenced memories decay at the expected rate
|
||||
- fresh / reinforced memories are untouched
|
||||
- below floor → auto-supersede with audit
|
||||
- graduated memories exempt
|
||||
- reinforcement reverses decay (integration with Phase 9 Commit B)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import pytest
|
||||
|
||||
from atocore.memory.service import (
|
||||
create_memory,
|
||||
decay_unreferenced_memories,
|
||||
get_memory_audit,
|
||||
reinforce_memory,
|
||||
)
|
||||
from atocore.models.database import get_connection, init_db
|
||||
|
||||
|
||||
def _force_old(mem_id: str, days_ago: int) -> None:
|
||||
"""Force last_referenced_at and created_at to N days in the past."""
|
||||
ts = (datetime.now(timezone.utc) - timedelta(days=days_ago)).strftime("%Y-%m-%d %H:%M:%S")
|
||||
with get_connection() as conn:
|
||||
conn.execute(
|
||||
"UPDATE memories SET last_referenced_at = ?, created_at = ? WHERE id = ?",
|
||||
(ts, ts, mem_id),
|
||||
)
|
||||
|
||||
|
||||
def _set_confidence(mem_id: str, c: float) -> None:
|
||||
with get_connection() as conn:
|
||||
conn.execute("UPDATE memories SET confidence = ? WHERE id = ?", (c, mem_id))
|
||||
|
||||
|
||||
def _set_reference_count(mem_id: str, n: int) -> None:
|
||||
with get_connection() as conn:
|
||||
conn.execute("UPDATE memories SET reference_count = ? WHERE id = ?", (n, mem_id))
|
||||
|
||||
|
||||
def _get(mem_id: str) -> dict:
|
||||
with get_connection() as conn:
|
||||
row = conn.execute("SELECT * FROM memories WHERE id = ?", (mem_id,)).fetchone()
|
||||
return dict(row) if row else {}
|
||||
|
||||
|
||||
def _set_status(mem_id: str, status: str) -> None:
|
||||
with get_connection() as conn:
|
||||
conn.execute("UPDATE memories SET status = ? WHERE id = ?", (status, mem_id))
|
||||
|
||||
|
||||
# --- Basic decay mechanics ---
|
||||
|
||||
|
||||
def test_decay_applies_to_idle_unreferenced(tmp_data_dir):
|
||||
init_db()
|
||||
m = create_memory("knowledge", "cold fact", confidence=0.8)
|
||||
_force_old(m.id, days_ago=60)
|
||||
_set_reference_count(m.id, 0)
|
||||
|
||||
result = decay_unreferenced_memories()
|
||||
assert len(result["decayed"]) == 1
|
||||
assert result["decayed"][0]["memory_id"] == m.id
|
||||
|
||||
row = _get(m.id)
|
||||
# 0.8 * 0.97 = 0.776
|
||||
assert row["confidence"] == pytest.approx(0.776)
|
||||
assert row["status"] == "active" # still above floor
|
||||
|
||||
|
||||
def test_decay_skips_fresh_memory(tmp_data_dir):
|
||||
"""A memory created today shouldn't decay even if reference_count=0."""
|
||||
init_db()
|
||||
m = create_memory("knowledge", "just-created fact", confidence=0.8)
|
||||
# Don't force old — it's fresh
|
||||
result = decay_unreferenced_memories()
|
||||
assert not any(e["memory_id"] == m.id for e in result["decayed"])
|
||||
assert not any(e["memory_id"] == m.id for e in result["superseded"])
|
||||
|
||||
row = _get(m.id)
|
||||
assert row["confidence"] == pytest.approx(0.8)
|
||||
|
||||
|
||||
def test_decay_skips_reinforced_memory(tmp_data_dir):
|
||||
"""Any reinforcement protects the memory from decay."""
|
||||
init_db()
|
||||
m = create_memory("knowledge", "referenced fact", confidence=0.8)
|
||||
_force_old(m.id, days_ago=90)
|
||||
_set_reference_count(m.id, 1) # just one reference is enough
|
||||
|
||||
result = decay_unreferenced_memories()
|
||||
assert not any(e["memory_id"] == m.id for e in result["decayed"])
|
||||
|
||||
row = _get(m.id)
|
||||
assert row["confidence"] == pytest.approx(0.8)
|
||||
|
||||
|
||||
# --- Auto-supersede at floor ---
|
||||
|
||||
|
||||
def test_decay_supersedes_below_floor(tmp_data_dir):
|
||||
init_db()
|
||||
m = create_memory("knowledge", "very cold fact", confidence=0.31)
|
||||
_force_old(m.id, days_ago=60)
|
||||
_set_reference_count(m.id, 0)
|
||||
|
||||
# 0.31 * 0.97 = 0.3007 which is still above the default floor 0.30.
|
||||
# Drop it a hair lower to cross the floor in one step.
|
||||
_set_confidence(m.id, 0.305)
|
||||
|
||||
result = decay_unreferenced_memories(supersede_confidence_floor=0.30)
|
||||
# 0.305 * 0.97 = 0.29585 → below 0.30, supersede
|
||||
assert len(result["superseded"]) == 1
|
||||
assert result["superseded"][0]["memory_id"] == m.id
|
||||
|
||||
row = _get(m.id)
|
||||
assert row["status"] == "superseded"
|
||||
assert row["confidence"] < 0.30
|
||||
|
||||
|
||||
def test_supersede_writes_audit_row(tmp_data_dir):
|
||||
init_db()
|
||||
m = create_memory("knowledge", "will decay out", confidence=0.305)
|
||||
_force_old(m.id, days_ago=60)
|
||||
_set_reference_count(m.id, 0)
|
||||
|
||||
decay_unreferenced_memories(supersede_confidence_floor=0.30)
|
||||
|
||||
audit = get_memory_audit(m.id)
|
||||
actions = [a["action"] for a in audit]
|
||||
assert "superseded" in actions
|
||||
entry = next(a for a in audit if a["action"] == "superseded")
|
||||
assert entry["actor"] == "confidence-decay"
|
||||
assert "decayed below floor" in entry["note"]
|
||||
|
||||
|
||||
# --- Exemptions ---
|
||||
|
||||
|
||||
def test_decay_skips_graduated_memory(tmp_data_dir):
|
||||
"""Graduated memories are frozen pointers to entities — never decay."""
|
||||
init_db()
|
||||
m = create_memory("knowledge", "graduated fact", confidence=0.8)
|
||||
_force_old(m.id, days_ago=90)
|
||||
_set_reference_count(m.id, 0)
|
||||
_set_status(m.id, "graduated")
|
||||
|
||||
result = decay_unreferenced_memories()
|
||||
assert not any(e["memory_id"] == m.id for e in result["decayed"])
|
||||
|
||||
row = _get(m.id)
|
||||
assert row["confidence"] == pytest.approx(0.8) # unchanged
|
||||
|
||||
|
||||
def test_decay_skips_superseded_memory(tmp_data_dir):
|
||||
"""Already superseded memories don't decay further."""
|
||||
init_db()
|
||||
m = create_memory("knowledge", "old news", confidence=0.5)
|
||||
_force_old(m.id, days_ago=90)
|
||||
_set_reference_count(m.id, 0)
|
||||
_set_status(m.id, "superseded")
|
||||
|
||||
result = decay_unreferenced_memories()
|
||||
assert not any(e["memory_id"] == m.id for e in result["decayed"])
|
||||
|
||||
|
||||
# --- Reversibility ---
|
||||
|
||||
|
||||
def test_reinforcement_reverses_decay(tmp_data_dir):
|
||||
"""A memory that decayed then got reinforced comes back up."""
|
||||
init_db()
|
||||
m = create_memory("knowledge", "will come back", confidence=0.8)
|
||||
_force_old(m.id, days_ago=60)
|
||||
_set_reference_count(m.id, 0)
|
||||
|
||||
decay_unreferenced_memories()
|
||||
# Now at 0.776
|
||||
reinforce_memory(m.id, confidence_delta=0.05)
|
||||
row = _get(m.id)
|
||||
assert row["confidence"] == pytest.approx(0.826)
|
||||
assert row["reference_count"] >= 1
|
||||
|
||||
|
||||
def test_reinforced_memory_no_longer_decays(tmp_data_dir):
|
||||
"""Once reinforce_memory bumps reference_count, decay skips it."""
|
||||
init_db()
|
||||
m = create_memory("knowledge", "protected", confidence=0.8)
|
||||
_force_old(m.id, days_ago=90)
|
||||
# Simulate reinforcement
|
||||
reinforce_memory(m.id)
|
||||
|
||||
result = decay_unreferenced_memories()
|
||||
assert not any(e["memory_id"] == m.id for e in result["decayed"])
|
||||
|
||||
|
||||
# --- Parameter validation ---
|
||||
|
||||
|
||||
def test_decay_rejects_invalid_factor(tmp_data_dir):
|
||||
init_db()
|
||||
with pytest.raises(ValueError):
|
||||
decay_unreferenced_memories(daily_decay_factor=1.0)
|
||||
with pytest.raises(ValueError):
|
||||
decay_unreferenced_memories(daily_decay_factor=0.0)
|
||||
with pytest.raises(ValueError):
|
||||
decay_unreferenced_memories(daily_decay_factor=-0.5)
|
||||
|
||||
|
||||
def test_decay_rejects_invalid_floor(tmp_data_dir):
|
||||
init_db()
|
||||
with pytest.raises(ValueError):
|
||||
decay_unreferenced_memories(supersede_confidence_floor=1.5)
|
||||
with pytest.raises(ValueError):
|
||||
decay_unreferenced_memories(supersede_confidence_floor=-0.1)
|
||||
|
||||
|
||||
# --- Threshold tuning ---
|
||||
|
||||
|
||||
def test_decay_threshold_tight_excludes_newer(tmp_data_dir):
|
||||
"""With idle_days_threshold=90, a 60-day-old memory should NOT decay."""
|
||||
init_db()
|
||||
m = create_memory("knowledge", "60-day-old", confidence=0.8)
|
||||
_force_old(m.id, days_ago=60)
|
||||
_set_reference_count(m.id, 0)
|
||||
|
||||
result = decay_unreferenced_memories(idle_days_threshold=90)
|
||||
assert not any(e["memory_id"] == m.id for e in result["decayed"])
|
||||
|
||||
|
||||
# --- Idempotency-ish (multiple runs apply additional decay) ---
|
||||
|
||||
|
||||
def test_decay_stacks_across_runs(tmp_data_dir):
|
||||
"""Running decay twice (simulating two days) compounds the factor."""
|
||||
init_db()
|
||||
m = create_memory("knowledge", "aging fact", confidence=0.8)
|
||||
_force_old(m.id, days_ago=60)
|
||||
_set_reference_count(m.id, 0)
|
||||
|
||||
decay_unreferenced_memories()
|
||||
decay_unreferenced_memories()
|
||||
row = _get(m.id)
|
||||
# 0.8 * 0.97 * 0.97 = 0.75272
|
||||
assert row["confidence"] == pytest.approx(0.75272, rel=1e-4)
|
||||
Reference in New Issue
Block a user