fix(memory): Wave 1 — SQL-aggregate dashboard counts + memory write-path fixes

Closes three live-affecting bugs surfaced by the 2026-04-29 Codex review,
all in the memory write/read path. Pre-deploy on Dalidou the live
discrepancy was dashboard.memories.active=315 vs integrity active=1091.

1. /admin/dashboard counts now SQL-aggregate (no sampling).
   New get_memory_count_summary() helper. Dashboard memories.{active,
   candidates,by_type,by_project,reinforced,by_status,total} all derive
   from full-table SQL, not a confidence-sorted limit=500 sample. Post
   deploy the dashboard active count must match the integrity panel.

2. PUT /memory/{id} accepts project; auto-triage now applies it.
   Added project to MemoryUpdateRequest and update_memory() with
   resolve_project_name canonicalization, before/after audit, and
   duplicate-active check scoped to the new project. scripts/auto_triage.py
   suggested-project correction now PUTs {"project": suggested} so
   misattribution flags actually retarget the memory.

3. POST /memory/{id}/invalidate uses direct id lookup.
   New get_memory(id) helper. Replaces the old
   _get_memories(status="active", limit=1) lookup, which only saw the
   highest-confidence active row. Active memories outside slot 0 no
   longer 404. Same status-guard structure applied to
   POST /memory/{id}/supersede so candidates can't silently flip to
   superseded.

14 regression tests added (572 -> 586 locally). Reviewed by Codex twice:
verdict GO on tip 9604c3e.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-28 21:57:08 -04:00
parent 7042eaea46
commit 4c7075650c
6 changed files with 412 additions and 64 deletions

View File

@@ -192,3 +192,120 @@ def test_v1_aliases_present(env):
"/v1/memory/{memory_id}/supersede",
):
assert p in paths, f"{p} missing"
# ---------------------------------------------------------------------------
# Wave 1 (2026-04-29) — invalidation route used to do
# `_get_memories(status='active', limit=1)` and look for the target id
# inside that single highest-confidence row, so any active memory
# outside slot 0 fell through as 404. Direct id lookup fixes it.
# ---------------------------------------------------------------------------
def test_api_invalidate_finds_active_memory_outside_top_one(env):
"""An active memory not at the top of the confidence sort must still
be invalidatable via POST /memory/{id}/invalidate."""
high = create_memory(
memory_type="knowledge",
content="high-confidence top row",
confidence=0.99,
)
low = create_memory(
memory_type="knowledge",
content="lower-confidence target",
confidence=0.55,
)
client = TestClient(app)
r = client.post(f"/memory/{low.id}/invalidate", json={"reason": "wave1 regression"})
assert r.status_code == 200, r.text
assert r.json()["status"] == "invalidated"
# And confirm the high-confidence row is untouched
assert _get_memory(high.id).status == "active"
assert _get_memory(low.id).status == "invalid"
def test_api_invalidate_already_invalid_is_idempotent(env):
m = create_memory(memory_type="knowledge", content="already invalid")
client = TestClient(app)
r1 = client.post(f"/memory/{m.id}/invalidate", json={"reason": "first"})
assert r1.status_code == 200
r2 = client.post(f"/memory/{m.id}/invalidate", json={"reason": "again"})
assert r2.status_code == 200
assert r2.json()["status"] == "already_invalid"
def test_api_invalidate_candidate_returns_409(env):
m = create_memory(
memory_type="knowledge", content="candidate route", status="candidate"
)
client = TestClient(app)
r = client.post(f"/memory/{m.id}/invalidate", json={"reason": "wrong route"})
assert r.status_code == 409
def test_api_invalidate_unknown_id_is_404(env):
client = TestClient(app)
r = client.post("/memory/no-such-id/invalidate", json={"reason": "ghost"})
assert r.status_code == 404
def test_api_supersede_candidate_returns_409(env):
"""Mirror of the invalidate guard: candidates must not silently flip
to superseded via the active-only supersede route."""
m = create_memory(
memory_type="knowledge", content="candidate target", status="candidate"
)
client = TestClient(app)
r = client.post(f"/memory/{m.id}/supersede", json={"reason": "wrong route"})
assert r.status_code == 409
# Row should still be a candidate
assert _get_memory(m.id).status == "candidate"
def test_api_supersede_already_superseded_is_idempotent(env):
m = create_memory(memory_type="knowledge", content="will be superseded")
client = TestClient(app)
r1 = client.post(f"/memory/{m.id}/supersede", json={"reason": "first"})
assert r1.status_code == 200
r2 = client.post(f"/memory/{m.id}/supersede", json={"reason": "again"})
assert r2.status_code == 200
assert r2.json()["status"] == "already_superseded"
def test_api_supersede_unknown_id_is_404(env):
client = TestClient(app)
r = client.post("/memory/no-such-id/supersede", json={"reason": "ghost"})
assert r.status_code == 404
def test_admin_dashboard_active_count_matches_full_table(env):
"""/admin/dashboard memories.active must match the SQL aggregate even
when there are more active memories than the legacy sample limit (500).
This guards the Codex finding that the dashboard was deriving counts
from a confidence-sorted limit=500 fetch, hiding rows past the cap.
We don't need 500 rows in the test — a small corpus that exercises
the SQL-aggregate path is enough; the integrity-vs-dashboard equality
is the invariant being asserted.
"""
# Mix of statuses to exercise the by_status aggregate
create_memory(memory_type="knowledge", content="a")
create_memory(memory_type="knowledge", content="b", project="p06-polisher")
create_memory(memory_type="project", content="c-cand", status="candidate")
cand = create_memory(memory_type="project", content="d-cand", status="candidate")
# Invalidate one to seed an "invalid" bucket
from atocore.memory.service import invalidate_memory
target_id = cand.id
# Promote it first via direct DB so invalidate does flip a candidate
# to invalid via the service path (mirrors actual API trajectory).
invalidate_memory(target_id)
client = TestClient(app)
dash = client.get("/admin/dashboard").json()
assert dash["memories"]["active"] == 2
assert dash["memories"]["candidates"] == 1
assert dash["memories"]["by_status"]["invalid"] == 1
assert dash["memories"]["total"] == 4
assert dash["memories"]["by_project"].get("p06-polisher") == 1
# "(none)" bucket is the COALESCE label for empty/null project
assert "(none)" in dash["memories"]["by_project"]

View File

@@ -575,3 +575,121 @@ def test_expire_stale_candidates_keeps_reinforced(isolated_db):
assert mid not in expired
mem = _get_memory_by_id(mid)
assert mem["status"] == "candidate"
# ---------------------------------------------------------------------------
# Wave 1 (2026-04-29) — counts come from SQL, not from the top-N sample.
# Exposed by Codex audit when prod /admin/dashboard reported 315 active
# while /admin/integrity-check reported 1091. The dashboard was building
# its counts from a confidence-sorted limit=500 fetch.
# ---------------------------------------------------------------------------
def test_get_memory_count_summary_returns_full_table_aggregates(isolated_db):
"""Counts come from SQL aggregates, not a sampled fetch."""
from atocore.memory.service import (
create_memory,
get_memory_count_summary,
invalidate_memory,
)
# Create more rows than any reasonable sampling LIMIT so any
# LIMIT-based counter would visibly disagree with reality.
for i in range(120):
create_memory(
"knowledge",
f"fact-{i}",
project="p04-gigabit",
confidence=0.9,
status="active",
)
for i in range(7):
create_memory("knowledge", f"cand-{i}", status="candidate")
invalid_obj = create_memory("knowledge", "to-invalidate", status="active")
invalidate_memory(invalid_obj.id)
summary = get_memory_count_summary()
assert summary["total"] == 120 + 7 + 1
assert summary["by_status"]["active"] == 120
assert summary["by_status"]["candidate"] == 7
assert summary["by_status"]["invalid"] == 1
assert summary["active"]["total"] == 120
assert summary["active"]["by_type"] == {"knowledge": 120}
assert summary["active"]["by_project"] == {"p04-gigabit": 120}
def test_get_memory_returns_single_row_or_none(isolated_db):
from atocore.memory.service import create_memory, get_memory
mem = create_memory("knowledge", "single-row test")
fetched = get_memory(mem.id)
assert fetched is not None
assert fetched.id == mem.id
assert get_memory("non-existent-id") is None
def test_update_memory_can_change_project_with_canonicalization(
isolated_db, project_registry
):
"""update_memory(project=...) canonicalizes aliases and writes audit."""
project_registry(("p04-gigabit", ("p04", "gigabit")))
from atocore.memory.service import (
create_memory,
get_memory,
get_memory_audit,
update_memory,
)
mem = create_memory("knowledge", "retargetable fact", project="atocore")
ok = update_memory(mem.id, project="p04") # alias
assert ok is True
refreshed = get_memory(mem.id)
assert refreshed.project == "p04-gigabit" # canonical, not "p04"
audit_rows = get_memory_audit(mem.id, limit=10)
update_rows = [r for r in audit_rows if r.get("action") == "updated"]
assert update_rows, f"expected an updated audit row, got {audit_rows}"
head = update_rows[0]
assert head["before"]["project"] == "atocore"
assert head["after"]["project"] == "p04-gigabit"
def test_update_memory_project_unchanged_when_not_passed(isolated_db):
from atocore.memory.service import create_memory, get_memory, update_memory
mem = create_memory("knowledge", "untouched project", project="p06-polisher")
update_memory(mem.id, content="edited content")
assert get_memory(mem.id).project == "p06-polisher"
def test_update_memory_to_empty_project_detects_global_duplicate(isolated_db):
"""Codex P3: when retargeting to project='' (global), the duplicate
check must scope to the new project. If a global active memory with
the same content already exists, the update must raise."""
import pytest as _pytest
from atocore.memory.service import create_memory, update_memory
create_memory("knowledge", "shared global fact", project="")
scoped = create_memory("knowledge", "shared global fact", project="p04-gigabit")
with _pytest.raises(ValueError, match="duplicate active memory"):
update_memory(scoped.id, project="")
def test_auto_triage_suggested_project_put_body_uses_project_key():
"""Regression: the auto_triage caller used to PUT {"content": ...}
which silently dropped the suggested project change. The fix sends
{"project": suggested}. Inspect the script source so we don't have
to spin up a live triage run."""
from pathlib import Path
src = Path(__file__).resolve().parents[1] / "scripts" / "auto_triage.py"
text = src.read_text(encoding="utf-8")
# The block that PUTs to /memory/{mid} for a suggested_project fix
assert 'json.dumps({"project": suggested})' in text, (
"auto_triage.py must PUT {\"project\": suggested} so the "
"suggested-project correction actually applies. See Wave 1."
)
# And must not be back to the old shape
assert 'json.dumps({"content": cand["content"]})' not in text