Daily job multiplies confidence by 0.97 (~2-month half-life) for
active memories with reference_count=0 AND idle > 30 days. Below
0.3 → auto-supersede with audit. Reversible via reinforcement
(which already bumps confidence back up).
Rationale: stale memories currently rank equal to fresh ones in
retrieval. Without decay, the brain accumulates obsolete facts
that compete with fresh knowledge for context-pack slots. With
decay, memories earn their longevity via reference.
- decay_unreferenced_memories() in service.py (stdlib-only, no cron
infra needed)
- POST /admin/memory/decay-run endpoint
- Nightly Step F4 in batch-extract.sh
- Exempt: reinforced (refcount > 0), graduated, superseded, invalid
- Audit row per supersession ("decayed below floor, no references"),
actor="confidence-decay". Per-decay rows skipped (chatty, no
human value — status change is the meaningful signal).
- Configurable via env: ATOCORE_DECAY_* (exposed through endpoint body)
Tests: +13 (basic decay, reinforcement protection, supersede at floor,
audit trail, graduated/superseded exemption, reinforcement reversibility,
threshold tuning, parameter validation, cross-run stacking).
401 → 414.
Next in Phase 7: 7C tag canonicalization (weekly), then 7B contradiction
detection.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1363 lines
49 KiB
Python
1363 lines
49 KiB
Python
"""Memory Core — structured memory management.
|
||
|
||
Memory types (per Master Plan):
|
||
- identity: who the user is, role, background
|
||
- preference: how they like to work, style, tools
|
||
- project: project-specific knowledge and context
|
||
- episodic: what happened, conversations, events
|
||
- knowledge: verified facts, technical knowledge
|
||
- adaptation: learned corrections, behavioral adjustments
|
||
|
||
Memories have:
|
||
- confidence (0.0–1.0): how certain we are
|
||
- status: lifecycle state, one of MEMORY_STATUSES
|
||
* candidate: extracted from an interaction, awaiting human review
|
||
(Phase 9 Commit C). Candidates are NEVER included in
|
||
context packs.
|
||
* active: promoted/curated, visible to retrieval and context
|
||
* superseded: replaced by a newer entry
|
||
* invalid: rejected / error-corrected
|
||
- last_referenced_at / reference_count: reinforcement signal
|
||
(Phase 9 Commit B). Bumped whenever a captured interaction's
|
||
response content echoes this memory.
|
||
- optional link to source chunk: traceability
|
||
"""
|
||
|
||
import uuid
|
||
from dataclasses import dataclass
|
||
from datetime import datetime, timezone
|
||
|
||
from atocore.models.database import get_connection
|
||
from atocore.observability.logger import get_logger
|
||
from atocore.projects.registry import resolve_project_name
|
||
|
||
log = get_logger("memory")
|
||
|
||
MEMORY_TYPES = [
|
||
"identity",
|
||
"preference",
|
||
"project",
|
||
"episodic",
|
||
"knowledge",
|
||
"adaptation",
|
||
]
|
||
|
||
MEMORY_STATUSES = [
|
||
"candidate",
|
||
"active",
|
||
"superseded",
|
||
"invalid",
|
||
"graduated", # Phase 5: memory has become an entity; content frozen, forward pointer in properties
|
||
]
|
||
|
||
|
||
@dataclass
|
||
class Memory:
|
||
id: str
|
||
memory_type: str
|
||
content: str
|
||
project: str
|
||
source_chunk_id: str
|
||
confidence: float
|
||
status: str
|
||
created_at: str
|
||
updated_at: str
|
||
last_referenced_at: str = ""
|
||
reference_count: int = 0
|
||
domain_tags: list[str] | None = None
|
||
valid_until: str = "" # ISO UTC; empty = permanent
|
||
|
||
|
||
def _audit_memory(
|
||
memory_id: str,
|
||
action: str,
|
||
actor: str = "api",
|
||
before: dict | None = None,
|
||
after: dict | None = None,
|
||
note: str = "",
|
||
) -> None:
|
||
"""Append an entry to memory_audit.
|
||
|
||
Phase 4 Robustness V1. Every memory mutation flows through this
|
||
helper so we can answer "how did this memory get to its current
|
||
state?" and "when did we learn X?".
|
||
|
||
``action`` is a short verb: created, updated, promoted, rejected,
|
||
superseded, invalidated, reinforced, auto_promoted, expired.
|
||
``actor`` identifies the caller: api (default), auto-triage,
|
||
human-triage, host-cron, reinforcement, phase10-auto-promote,
|
||
etc. ``before`` / ``after`` are field snapshots (JSON-serialized).
|
||
Fail-open: a logging failure never breaks the mutation itself.
|
||
"""
|
||
import json as _json
|
||
try:
|
||
with get_connection() as conn:
|
||
conn.execute(
|
||
"INSERT INTO memory_audit (id, memory_id, action, actor, "
|
||
"before_json, after_json, note) VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||
(
|
||
str(uuid.uuid4()),
|
||
memory_id,
|
||
action,
|
||
actor or "api",
|
||
_json.dumps(before or {}),
|
||
_json.dumps(after or {}),
|
||
(note or "")[:500],
|
||
),
|
||
)
|
||
except Exception as e:
|
||
log.warning("memory_audit_failed", memory_id=memory_id, action=action, error=str(e))
|
||
|
||
|
||
def get_memory_audit(memory_id: str, limit: int = 100) -> list[dict]:
|
||
"""Fetch audit entries for a memory, newest first."""
|
||
import json as _json
|
||
with get_connection() as conn:
|
||
rows = conn.execute(
|
||
"SELECT id, memory_id, action, actor, before_json, after_json, note, timestamp "
|
||
"FROM memory_audit WHERE memory_id = ? ORDER BY timestamp DESC LIMIT ?",
|
||
(memory_id, limit),
|
||
).fetchall()
|
||
out = []
|
||
for r in rows:
|
||
try:
|
||
before = _json.loads(r["before_json"] or "{}")
|
||
except Exception:
|
||
before = {}
|
||
try:
|
||
after = _json.loads(r["after_json"] or "{}")
|
||
except Exception:
|
||
after = {}
|
||
out.append({
|
||
"id": r["id"],
|
||
"memory_id": r["memory_id"],
|
||
"action": r["action"],
|
||
"actor": r["actor"] or "api",
|
||
"before": before,
|
||
"after": after,
|
||
"note": r["note"] or "",
|
||
"timestamp": r["timestamp"],
|
||
})
|
||
return out
|
||
|
||
|
||
def get_recent_audit(limit: int = 50) -> list[dict]:
|
||
"""Fetch recent memory_audit entries across all memories, newest first."""
|
||
import json as _json
|
||
with get_connection() as conn:
|
||
rows = conn.execute(
|
||
"SELECT id, memory_id, action, actor, before_json, after_json, note, timestamp "
|
||
"FROM memory_audit ORDER BY timestamp DESC LIMIT ?",
|
||
(limit,),
|
||
).fetchall()
|
||
out = []
|
||
for r in rows:
|
||
try:
|
||
after = _json.loads(r["after_json"] or "{}")
|
||
except Exception:
|
||
after = {}
|
||
out.append({
|
||
"id": r["id"],
|
||
"memory_id": r["memory_id"],
|
||
"action": r["action"],
|
||
"actor": r["actor"] or "api",
|
||
"note": r["note"] or "",
|
||
"timestamp": r["timestamp"],
|
||
"content_preview": (after.get("content") or "")[:120],
|
||
})
|
||
return out
|
||
|
||
|
||
def _normalize_tags(tags) -> list[str]:
|
||
"""Coerce a tags value (list, JSON string, None) to a clean lowercase list."""
|
||
import json as _json
|
||
if tags is None:
|
||
return []
|
||
if isinstance(tags, str):
|
||
try:
|
||
tags = _json.loads(tags) if tags.strip().startswith("[") else []
|
||
except Exception:
|
||
tags = []
|
||
if not isinstance(tags, list):
|
||
return []
|
||
out = []
|
||
for t in tags:
|
||
if not isinstance(t, str):
|
||
continue
|
||
t = t.strip().lower()
|
||
if t and t not in out:
|
||
out.append(t)
|
||
return out
|
||
|
||
|
||
def create_memory(
|
||
memory_type: str,
|
||
content: str,
|
||
project: str = "",
|
||
source_chunk_id: str = "",
|
||
confidence: float = 1.0,
|
||
status: str = "active",
|
||
domain_tags: list[str] | None = None,
|
||
valid_until: str = "",
|
||
actor: str = "api",
|
||
) -> Memory:
|
||
"""Create a new memory entry.
|
||
|
||
``status`` defaults to ``active`` for backward compatibility. Pass
|
||
``candidate`` when the memory is being proposed by the Phase 9 Commit C
|
||
extractor and still needs human review before it can influence context.
|
||
|
||
Phase 3: ``domain_tags`` is a list of lowercase domain strings
|
||
(optics, mechanics, firmware, ...) for cross-project retrieval.
|
||
``valid_until`` is an ISO UTC timestamp; memories with valid_until
|
||
in the past are excluded from context packs (but remain queryable).
|
||
"""
|
||
import json as _json
|
||
|
||
if memory_type not in MEMORY_TYPES:
|
||
raise ValueError(f"Invalid memory type '{memory_type}'. Must be one of: {MEMORY_TYPES}")
|
||
if status not in MEMORY_STATUSES:
|
||
raise ValueError(f"Invalid status '{status}'. Must be one of: {MEMORY_STATUSES}")
|
||
_validate_confidence(confidence)
|
||
|
||
project = resolve_project_name(project)
|
||
tags = _normalize_tags(domain_tags)
|
||
tags_json = _json.dumps(tags)
|
||
valid_until = (valid_until or "").strip() or None
|
||
|
||
memory_id = str(uuid.uuid4())
|
||
now = datetime.now(timezone.utc).isoformat()
|
||
|
||
with get_connection() as conn:
|
||
existing = conn.execute(
|
||
"SELECT id FROM memories "
|
||
"WHERE memory_type = ? AND content = ? AND project = ? AND status = ?",
|
||
(memory_type, content, project, status),
|
||
).fetchone()
|
||
if existing:
|
||
log.info(
|
||
"memory_duplicate_skipped",
|
||
memory_type=memory_type,
|
||
status=status,
|
||
content_preview=content[:80],
|
||
)
|
||
return _row_to_memory(
|
||
conn.execute("SELECT * FROM memories WHERE id = ?", (existing["id"],)).fetchone()
|
||
)
|
||
|
||
conn.execute(
|
||
"INSERT INTO memories (id, memory_type, content, project, source_chunk_id, "
|
||
"confidence, status, domain_tags, valid_until) "
|
||
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||
(memory_id, memory_type, content, project, source_chunk_id or None,
|
||
confidence, status, tags_json, valid_until),
|
||
)
|
||
|
||
log.info(
|
||
"memory_created",
|
||
memory_type=memory_type,
|
||
status=status,
|
||
content_preview=content[:80],
|
||
tags=tags,
|
||
valid_until=valid_until or "",
|
||
)
|
||
|
||
_audit_memory(
|
||
memory_id=memory_id,
|
||
action="created",
|
||
actor=actor,
|
||
after={
|
||
"memory_type": memory_type,
|
||
"content": content,
|
||
"project": project,
|
||
"status": status,
|
||
"confidence": confidence,
|
||
"domain_tags": tags,
|
||
"valid_until": valid_until or "",
|
||
},
|
||
)
|
||
|
||
return Memory(
|
||
id=memory_id,
|
||
memory_type=memory_type,
|
||
content=content,
|
||
project=project,
|
||
source_chunk_id=source_chunk_id,
|
||
confidence=confidence,
|
||
status=status,
|
||
created_at=now,
|
||
updated_at=now,
|
||
last_referenced_at="",
|
||
reference_count=0,
|
||
domain_tags=tags,
|
||
valid_until=valid_until or "",
|
||
)
|
||
|
||
|
||
def get_memories(
|
||
memory_type: str | None = None,
|
||
project: str | None = None,
|
||
active_only: bool = True,
|
||
min_confidence: float = 0.0,
|
||
limit: int = 50,
|
||
status: str | None = None,
|
||
) -> list[Memory]:
|
||
"""Retrieve memories, optionally filtered.
|
||
|
||
When ``status`` is provided explicitly, it takes precedence over
|
||
``active_only`` so callers can list the candidate review queue via
|
||
``get_memories(status='candidate')``. When ``status`` is omitted the
|
||
legacy ``active_only`` behaviour still applies.
|
||
"""
|
||
if status is not None and status not in MEMORY_STATUSES:
|
||
raise ValueError(f"Invalid status '{status}'. Must be one of: {MEMORY_STATUSES}")
|
||
|
||
query = "SELECT * FROM memories WHERE 1=1"
|
||
params: list = []
|
||
|
||
if memory_type:
|
||
query += " AND memory_type = ?"
|
||
params.append(memory_type)
|
||
if project is not None:
|
||
# Canonicalize on the read side so a caller passing an alias
|
||
# finds rows that were stored under the canonical id (and
|
||
# vice versa). resolve_project_name returns the input
|
||
# unchanged for unregistered names so empty-string queries
|
||
# for "no project scope" still work.
|
||
query += " AND project = ?"
|
||
params.append(resolve_project_name(project))
|
||
if status is not None:
|
||
query += " AND status = ?"
|
||
params.append(status)
|
||
elif active_only:
|
||
query += " AND status = 'active'"
|
||
if min_confidence > 0:
|
||
query += " AND confidence >= ?"
|
||
params.append(min_confidence)
|
||
|
||
query += " ORDER BY confidence DESC, updated_at DESC LIMIT ?"
|
||
params.append(limit)
|
||
|
||
with get_connection() as conn:
|
||
rows = conn.execute(query, params).fetchall()
|
||
|
||
return [_row_to_memory(r) for r in rows]
|
||
|
||
|
||
def update_memory(
|
||
memory_id: str,
|
||
content: str | None = None,
|
||
confidence: float | None = None,
|
||
status: str | None = None,
|
||
memory_type: str | None = None,
|
||
domain_tags: list[str] | None = None,
|
||
valid_until: str | None = None,
|
||
actor: str = "api",
|
||
note: str = "",
|
||
) -> bool:
|
||
"""Update an existing memory."""
|
||
import json as _json
|
||
|
||
with get_connection() as conn:
|
||
existing = conn.execute("SELECT * FROM memories WHERE id = ?", (memory_id,)).fetchone()
|
||
if existing is None:
|
||
return False
|
||
|
||
next_content = content if content is not None else existing["content"]
|
||
next_status = status if status is not None else existing["status"]
|
||
if confidence is not None:
|
||
_validate_confidence(confidence)
|
||
|
||
if next_status == "active":
|
||
duplicate = conn.execute(
|
||
"SELECT id FROM memories "
|
||
"WHERE memory_type = ? AND content = ? AND project = ? AND status = 'active' AND id != ?",
|
||
(existing["memory_type"], next_content, existing["project"] or "", memory_id),
|
||
).fetchone()
|
||
if duplicate:
|
||
raise ValueError("Update would create a duplicate active memory")
|
||
|
||
# Capture before-state for audit
|
||
before_snapshot = {
|
||
"content": existing["content"],
|
||
"status": existing["status"],
|
||
"confidence": existing["confidence"],
|
||
"memory_type": existing["memory_type"],
|
||
}
|
||
after_snapshot = dict(before_snapshot)
|
||
|
||
updates = []
|
||
params: list = []
|
||
|
||
if content is not None:
|
||
updates.append("content = ?")
|
||
params.append(content)
|
||
after_snapshot["content"] = content
|
||
if confidence is not None:
|
||
updates.append("confidence = ?")
|
||
params.append(confidence)
|
||
after_snapshot["confidence"] = confidence
|
||
if status is not None:
|
||
if status not in MEMORY_STATUSES:
|
||
raise ValueError(f"Invalid status '{status}'. Must be one of: {MEMORY_STATUSES}")
|
||
updates.append("status = ?")
|
||
params.append(status)
|
||
after_snapshot["status"] = status
|
||
if memory_type is not None:
|
||
if memory_type not in MEMORY_TYPES:
|
||
raise ValueError(f"Invalid memory type '{memory_type}'. Must be one of: {MEMORY_TYPES}")
|
||
updates.append("memory_type = ?")
|
||
params.append(memory_type)
|
||
after_snapshot["memory_type"] = memory_type
|
||
if domain_tags is not None:
|
||
norm_tags = _normalize_tags(domain_tags)
|
||
updates.append("domain_tags = ?")
|
||
params.append(_json.dumps(norm_tags))
|
||
after_snapshot["domain_tags"] = norm_tags
|
||
if valid_until is not None:
|
||
vu = valid_until.strip() or None
|
||
updates.append("valid_until = ?")
|
||
params.append(vu)
|
||
after_snapshot["valid_until"] = vu or ""
|
||
|
||
if not updates:
|
||
return False
|
||
|
||
updates.append("updated_at = CURRENT_TIMESTAMP")
|
||
params.append(memory_id)
|
||
|
||
result = conn.execute(
|
||
f"UPDATE memories SET {', '.join(updates)} WHERE id = ?",
|
||
params,
|
||
)
|
||
|
||
if result.rowcount > 0:
|
||
log.info("memory_updated", memory_id=memory_id)
|
||
# Action verb is driven by status change when applicable; otherwise "updated"
|
||
if status == "active" and before_snapshot["status"] == "candidate":
|
||
action = "promoted"
|
||
elif status == "invalid" and before_snapshot["status"] == "candidate":
|
||
action = "rejected"
|
||
elif status == "invalid":
|
||
action = "invalidated"
|
||
elif status == "superseded":
|
||
action = "superseded"
|
||
else:
|
||
action = "updated"
|
||
_audit_memory(
|
||
memory_id=memory_id,
|
||
action=action,
|
||
actor=actor,
|
||
before=before_snapshot,
|
||
after=after_snapshot,
|
||
note=note,
|
||
)
|
||
return True
|
||
return False
|
||
|
||
|
||
def invalidate_memory(memory_id: str, actor: str = "api") -> bool:
|
||
"""Mark a memory as invalid (error correction)."""
|
||
return update_memory(memory_id, status="invalid", actor=actor)
|
||
|
||
|
||
def supersede_memory(memory_id: str, actor: str = "api") -> bool:
|
||
"""Mark a memory as superseded (replaced by newer info)."""
|
||
return update_memory(memory_id, status="superseded", actor=actor)
|
||
|
||
|
||
def promote_memory(memory_id: str, actor: str = "api", note: str = "") -> bool:
|
||
"""Promote a candidate memory to active (Phase 9 Commit C review queue).
|
||
|
||
Returns False if the memory does not exist or is not currently a
|
||
candidate. Raises ValueError only if the promotion would create a
|
||
duplicate active memory (delegates to update_memory's existing check).
|
||
"""
|
||
with get_connection() as conn:
|
||
row = conn.execute(
|
||
"SELECT status FROM memories WHERE id = ?", (memory_id,)
|
||
).fetchone()
|
||
if row is None:
|
||
return False
|
||
if row["status"] != "candidate":
|
||
return False
|
||
return update_memory(memory_id, status="active", actor=actor, note=note)
|
||
|
||
|
||
def reject_candidate_memory(memory_id: str, actor: str = "api", note: str = "") -> bool:
|
||
"""Reject a candidate memory (Phase 9 Commit C).
|
||
|
||
Sets the candidate's status to ``invalid`` so it drops out of the
|
||
review queue without polluting the active set. Returns False if the
|
||
memory does not exist or is not currently a candidate.
|
||
"""
|
||
with get_connection() as conn:
|
||
row = conn.execute(
|
||
"SELECT status FROM memories WHERE id = ?", (memory_id,)
|
||
).fetchone()
|
||
if row is None:
|
||
return False
|
||
if row["status"] != "candidate":
|
||
return False
|
||
return update_memory(memory_id, status="invalid", actor=actor, note=note)
|
||
|
||
|
||
def reinforce_memory(
|
||
memory_id: str,
|
||
confidence_delta: float = 0.02,
|
||
) -> tuple[bool, float, float]:
|
||
"""Bump a memory's confidence and reference count (Phase 9 Commit B).
|
||
|
||
Returns a 3-tuple ``(applied, old_confidence, new_confidence)``.
|
||
``applied`` is False if the memory does not exist or is not in the
|
||
``active`` state — reinforcement only touches live memories so the
|
||
candidate queue and invalidated history are never silently revived.
|
||
|
||
Confidence is capped at 1.0. last_referenced_at is set to the current
|
||
UTC time in SQLite-comparable format. reference_count is incremented
|
||
by one per call (not per delta amount).
|
||
"""
|
||
if confidence_delta < 0:
|
||
raise ValueError("confidence_delta must be non-negative for reinforcement")
|
||
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
||
with get_connection() as conn:
|
||
row = conn.execute(
|
||
"SELECT confidence, status FROM memories WHERE id = ?", (memory_id,)
|
||
).fetchone()
|
||
if row is None or row["status"] != "active":
|
||
return False, 0.0, 0.0
|
||
old_confidence = float(row["confidence"])
|
||
new_confidence = min(1.0, old_confidence + confidence_delta)
|
||
conn.execute(
|
||
"UPDATE memories SET confidence = ?, last_referenced_at = ?, "
|
||
"reference_count = COALESCE(reference_count, 0) + 1 "
|
||
"WHERE id = ?",
|
||
(new_confidence, now, memory_id),
|
||
)
|
||
log.info(
|
||
"memory_reinforced",
|
||
memory_id=memory_id,
|
||
old_confidence=round(old_confidence, 4),
|
||
new_confidence=round(new_confidence, 4),
|
||
)
|
||
# Reinforcement writes an audit row per bump. Reinforcement fires often
|
||
# (every captured interaction); this lets you trace which interactions
|
||
# kept which memories alive. Could become chatty but is invaluable for
|
||
# decay/cold-memory analysis. If it becomes an issue, throttle here.
|
||
_audit_memory(
|
||
memory_id=memory_id,
|
||
action="reinforced",
|
||
actor="reinforcement",
|
||
before={"confidence": old_confidence},
|
||
after={"confidence": new_confidence},
|
||
)
|
||
return True, old_confidence, new_confidence
|
||
|
||
|
||
def auto_promote_reinforced(
|
||
min_reference_count: int = 3,
|
||
min_confidence: float = 0.7,
|
||
max_age_days: int = 14,
|
||
) -> list[str]:
|
||
"""Auto-promote candidate memories with strong reinforcement signals.
|
||
|
||
Phase 10: memories that have been reinforced by multiple interactions
|
||
graduate from candidate to active without human review. This rewards
|
||
knowledge that the system keeps referencing organically.
|
||
|
||
Returns a list of promoted memory IDs.
|
||
"""
|
||
from datetime import timedelta
|
||
|
||
cutoff = (
|
||
datetime.now(timezone.utc) - timedelta(days=max_age_days)
|
||
).strftime("%Y-%m-%d %H:%M:%S")
|
||
promoted: list[str] = []
|
||
with get_connection() as conn:
|
||
rows = conn.execute(
|
||
"SELECT id, content, memory_type, project, confidence, "
|
||
"reference_count FROM memories "
|
||
"WHERE status = 'candidate' "
|
||
"AND COALESCE(reference_count, 0) >= ? "
|
||
"AND confidence >= ? "
|
||
"AND last_referenced_at >= ?",
|
||
(min_reference_count, min_confidence, cutoff),
|
||
).fetchall()
|
||
|
||
for row in rows:
|
||
mid = row["id"]
|
||
ok = promote_memory(
|
||
mid,
|
||
actor="phase10-auto-promote",
|
||
note=f"ref_count={row['reference_count']} confidence={row['confidence']:.2f}",
|
||
)
|
||
if ok:
|
||
promoted.append(mid)
|
||
log.info(
|
||
"memory_auto_promoted",
|
||
memory_id=mid,
|
||
memory_type=row["memory_type"],
|
||
project=row["project"] or "(global)",
|
||
reference_count=row["reference_count"],
|
||
confidence=round(row["confidence"], 3),
|
||
)
|
||
return promoted
|
||
|
||
|
||
def extend_reinforced_valid_until(
|
||
min_reference_count: int = 5,
|
||
permanent_reference_count: int = 10,
|
||
extension_days: int = 90,
|
||
imminent_expiry_days: int = 30,
|
||
) -> list[dict]:
|
||
"""Phase 6 C.3 — transient-to-durable auto-extension.
|
||
|
||
For active memories with valid_until within the next N days AND
|
||
reference_count >= min_reference_count: extend valid_until by
|
||
extension_days. If reference_count >= permanent_reference_count,
|
||
clear valid_until entirely (becomes permanent).
|
||
|
||
Matches the user's intuition: "something transient becomes important
|
||
if you keep coming back to it". The system watches reinforcement
|
||
signals and extends expiry so context packs keep seeing durable
|
||
facts instead of letting them decay out.
|
||
|
||
Returns a list of {memory_id, action, old, new} dicts for each
|
||
memory touched.
|
||
"""
|
||
from datetime import timedelta
|
||
|
||
now = datetime.now(timezone.utc)
|
||
horizon = (now + timedelta(days=imminent_expiry_days)).strftime("%Y-%m-%d")
|
||
new_expiry = (now + timedelta(days=extension_days)).strftime("%Y-%m-%d")
|
||
now_str = now.strftime("%Y-%m-%d %H:%M:%S")
|
||
|
||
extended: list[dict] = []
|
||
|
||
with get_connection() as conn:
|
||
rows = conn.execute(
|
||
"SELECT id, valid_until, reference_count FROM memories "
|
||
"WHERE status = 'active' "
|
||
"AND valid_until IS NOT NULL AND valid_until != '' "
|
||
"AND substr(valid_until, 1, 10) <= ? "
|
||
"AND COALESCE(reference_count, 0) >= ?",
|
||
(horizon, min_reference_count),
|
||
).fetchall()
|
||
|
||
for r in rows:
|
||
mid = r["id"]
|
||
old_vu = r["valid_until"]
|
||
ref_count = int(r["reference_count"] or 0)
|
||
|
||
if ref_count >= permanent_reference_count:
|
||
# Permanent promotion
|
||
conn.execute(
|
||
"UPDATE memories SET valid_until = NULL, updated_at = ? WHERE id = ?",
|
||
(now_str, mid),
|
||
)
|
||
extended.append({
|
||
"memory_id": mid, "action": "made_permanent",
|
||
"old_valid_until": old_vu, "new_valid_until": None,
|
||
"reference_count": ref_count,
|
||
})
|
||
else:
|
||
# 90-day extension
|
||
conn.execute(
|
||
"UPDATE memories SET valid_until = ?, updated_at = ? WHERE id = ?",
|
||
(new_expiry, now_str, mid),
|
||
)
|
||
extended.append({
|
||
"memory_id": mid, "action": "extended",
|
||
"old_valid_until": old_vu, "new_valid_until": new_expiry,
|
||
"reference_count": ref_count,
|
||
})
|
||
|
||
# Audit rows via the shared framework (fail-open)
|
||
for ex in extended:
|
||
try:
|
||
_audit_memory(
|
||
memory_id=ex["memory_id"],
|
||
action="valid_until_extended",
|
||
actor="transient-to-durable",
|
||
before={"valid_until": ex["old_valid_until"]},
|
||
after={"valid_until": ex["new_valid_until"]},
|
||
note=f"reinforced {ex['reference_count']}x; {ex['action']}",
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
if extended:
|
||
log.info("reinforced_valid_until_extended", count=len(extended))
|
||
return extended
|
||
|
||
|
||
def decay_unreferenced_memories(
|
||
idle_days_threshold: int = 30,
|
||
daily_decay_factor: float = 0.97,
|
||
supersede_confidence_floor: float = 0.30,
|
||
actor: str = "confidence-decay",
|
||
) -> dict[str, list]:
|
||
"""Phase 7D — daily confidence decay on cold memories.
|
||
|
||
For every active, non-graduated memory with ``reference_count == 0``
|
||
AND whose last activity (``last_referenced_at`` if set, else
|
||
``created_at``) is older than ``idle_days_threshold``: multiply
|
||
confidence by ``daily_decay_factor`` (0.97/day ≈ 2-month half-life).
|
||
|
||
If the decayed confidence falls below ``supersede_confidence_floor``,
|
||
auto-supersede the memory with note "decayed, no references".
|
||
Supersession is non-destructive — the row stays queryable via
|
||
``status='superseded'`` for audit.
|
||
|
||
Reinforcement already bumps confidence back up, so a decayed memory
|
||
that later gets referenced reverses its trajectory naturally.
|
||
|
||
The job is idempotent-per-day: running it multiple times in one day
|
||
decays extra, but the cron runs once/day so this stays on-policy.
|
||
If a day's cron gets skipped, we under-decay (safe direction —
|
||
memories age slower, not faster, than the policy).
|
||
|
||
Returns {"decayed": [...], "superseded": [...]} with per-memory
|
||
before/after snapshots for audit/observability.
|
||
"""
|
||
from datetime import timedelta
|
||
|
||
if not (0.0 < daily_decay_factor < 1.0):
|
||
raise ValueError("daily_decay_factor must be between 0 and 1 (exclusive)")
|
||
if not (0.0 <= supersede_confidence_floor <= 1.0):
|
||
raise ValueError("supersede_confidence_floor must be in [0,1]")
|
||
|
||
cutoff_dt = datetime.now(timezone.utc) - timedelta(days=idle_days_threshold)
|
||
cutoff_str = cutoff_dt.strftime("%Y-%m-%d %H:%M:%S")
|
||
now_str = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
||
|
||
decayed: list[dict] = []
|
||
superseded: list[dict] = []
|
||
|
||
with get_connection() as conn:
|
||
# COALESCE(last_referenced_at, created_at) is the effective "last
|
||
# activity" — if a memory was never reinforced, we measure age
|
||
# from creation. "IS NOT status graduated" is enforced to keep
|
||
# graduated memories (which are frozen pointers to entities)
|
||
# out of the decay pool.
|
||
rows = conn.execute(
|
||
"SELECT id, confidence, last_referenced_at, created_at "
|
||
"FROM memories "
|
||
"WHERE status = 'active' "
|
||
"AND COALESCE(reference_count, 0) = 0 "
|
||
"AND COALESCE(last_referenced_at, created_at) < ?",
|
||
(cutoff_str,),
|
||
).fetchall()
|
||
|
||
for r in rows:
|
||
mid = r["id"]
|
||
old_conf = float(r["confidence"])
|
||
new_conf = max(0.0, old_conf * daily_decay_factor)
|
||
|
||
if new_conf < supersede_confidence_floor:
|
||
# Auto-supersede
|
||
conn.execute(
|
||
"UPDATE memories SET status = 'superseded', "
|
||
"confidence = ?, updated_at = ? WHERE id = ?",
|
||
(new_conf, now_str, mid),
|
||
)
|
||
superseded.append({
|
||
"memory_id": mid,
|
||
"old_confidence": old_conf,
|
||
"new_confidence": new_conf,
|
||
})
|
||
else:
|
||
conn.execute(
|
||
"UPDATE memories SET confidence = ?, updated_at = ? WHERE id = ?",
|
||
(new_conf, now_str, mid),
|
||
)
|
||
decayed.append({
|
||
"memory_id": mid,
|
||
"old_confidence": old_conf,
|
||
"new_confidence": new_conf,
|
||
})
|
||
|
||
# Audit rows outside the transaction. We skip per-decay audit because
|
||
# it would be too chatty (potentially hundreds of rows/day for no
|
||
# human value); supersessions ARE audited because those are
|
||
# status-changing events humans may want to review.
|
||
for entry in superseded:
|
||
_audit_memory(
|
||
memory_id=entry["memory_id"],
|
||
action="superseded",
|
||
actor=actor,
|
||
before={"status": "active", "confidence": entry["old_confidence"]},
|
||
after={"status": "superseded", "confidence": entry["new_confidence"]},
|
||
note=f"decayed below floor {supersede_confidence_floor}, no references",
|
||
)
|
||
|
||
if decayed or superseded:
|
||
log.info(
|
||
"confidence_decay_run",
|
||
decayed=len(decayed),
|
||
superseded=len(superseded),
|
||
idle_days_threshold=idle_days_threshold,
|
||
daily_decay_factor=daily_decay_factor,
|
||
)
|
||
return {"decayed": decayed, "superseded": superseded}
|
||
|
||
|
||
def expire_stale_candidates(
|
||
max_age_days: int = 14,
|
||
) -> list[str]:
|
||
"""Reject candidate memories that sat in queue too long unreinforced.
|
||
|
||
Candidates older than ``max_age_days`` with zero reinforcement are
|
||
auto-rejected to prevent unbounded queue growth. Returns rejected IDs.
|
||
"""
|
||
from datetime import timedelta
|
||
|
||
cutoff = (
|
||
datetime.now(timezone.utc) - timedelta(days=max_age_days)
|
||
).strftime("%Y-%m-%d %H:%M:%S")
|
||
expired: list[str] = []
|
||
with get_connection() as conn:
|
||
rows = conn.execute(
|
||
"SELECT id FROM memories "
|
||
"WHERE status = 'candidate' "
|
||
"AND COALESCE(reference_count, 0) = 0 "
|
||
"AND created_at < ?",
|
||
(cutoff,),
|
||
).fetchall()
|
||
|
||
for row in rows:
|
||
mid = row["id"]
|
||
ok = reject_candidate_memory(
|
||
mid,
|
||
actor="candidate-expiry",
|
||
note=f"unreinforced for {max_age_days}+ days",
|
||
)
|
||
if ok:
|
||
expired.append(mid)
|
||
log.info("memory_expired", memory_id=mid)
|
||
return expired
|
||
|
||
|
||
def get_memories_for_context(
|
||
memory_types: list[str] | None = None,
|
||
project: str | None = None,
|
||
budget: int = 500,
|
||
header: str = "--- AtoCore Memory ---",
|
||
footer: str = "--- End Memory ---",
|
||
query: str | None = None,
|
||
) -> tuple[str, int]:
|
||
"""Get formatted memories for context injection.
|
||
|
||
Returns (formatted_text, char_count).
|
||
|
||
Budget allocation per Master Plan section 9:
|
||
identity: 5%, preference: 5%, rest from retrieval budget
|
||
|
||
The caller can override ``header`` / ``footer`` to distinguish
|
||
multiple memory blocks in the same pack (e.g. identity/preference
|
||
vs project/knowledge memories).
|
||
|
||
When ``query`` is provided, candidates within each memory type
|
||
are ranked by lexical overlap against the query (stemmed token
|
||
intersection, ties broken by confidence). Without a query,
|
||
candidates fall through in the order ``get_memories`` returns
|
||
them — which is effectively "by confidence desc".
|
||
"""
|
||
if memory_types is None:
|
||
memory_types = ["identity", "preference"]
|
||
|
||
if budget <= 0:
|
||
return "", 0
|
||
wrapper_chars = len(header) + len(footer) + 2
|
||
if budget <= wrapper_chars:
|
||
return "", 0
|
||
|
||
available = budget - wrapper_chars
|
||
selected_entries: list[str] = []
|
||
used = 0
|
||
|
||
# Pre-tokenize the query once. ``_score_memory_for_query`` is a
|
||
# free function below that reuses the reinforcement tokenizer so
|
||
# lexical scoring here matches the reinforcement matcher.
|
||
query_tokens: set[str] | None = None
|
||
if query:
|
||
from atocore.memory.reinforcement import _normalize, _tokenize
|
||
|
||
query_tokens = _tokenize(_normalize(query))
|
||
if not query_tokens:
|
||
query_tokens = None
|
||
|
||
# Collect ALL candidates across the requested types into one
|
||
# pool, then rank globally before the budget walk. Ranking per
|
||
# type and walking types in order would starve later types when
|
||
# the first type's candidates filled the budget — even if a
|
||
# later-type candidate matched the query perfectly. Type order
|
||
# is preserved as a stable tiebreaker inside
|
||
# ``_rank_memories_for_query`` via Python's stable sort.
|
||
pool: list[Memory] = []
|
||
seen_ids: set[str] = set()
|
||
for mtype in memory_types:
|
||
for mem in get_memories(
|
||
memory_type=mtype,
|
||
project=project,
|
||
min_confidence=0.5,
|
||
limit=30,
|
||
):
|
||
if mem.id in seen_ids:
|
||
continue
|
||
seen_ids.add(mem.id)
|
||
pool.append(mem)
|
||
|
||
# Phase 3: filter out expired memories (valid_until in the past).
|
||
# Raw API queries still return them (for audit/history) but context
|
||
# packs must not surface stale facts.
|
||
if pool:
|
||
pool = _filter_expired(pool)
|
||
|
||
if query_tokens is not None:
|
||
pool = _rank_memories_for_query(pool, query_tokens, query=query)
|
||
|
||
# Per-entry cap prevents a single long memory from monopolizing
|
||
# the band. With 16 p06 memories competing for ~700 chars, an
|
||
# uncapped 530-char overview memory fills the entire budget before
|
||
# a query-relevant 150-char memory gets a slot. The cap ensures at
|
||
# least 2-3 entries fit regardless of individual memory length.
|
||
max_entry_chars = 250
|
||
for mem in pool:
|
||
content = mem.content
|
||
if len(content) > max_entry_chars:
|
||
content = content[:max_entry_chars - 3].rstrip() + "..."
|
||
entry = f"[{mem.memory_type}] {content}"
|
||
entry_len = len(entry) + 1
|
||
if entry_len > available - used:
|
||
continue
|
||
selected_entries.append(entry)
|
||
used += entry_len
|
||
|
||
if not selected_entries:
|
||
return "", 0
|
||
|
||
lines = [header, *selected_entries, footer]
|
||
text = "\n".join(lines)
|
||
|
||
log.info("memories_for_context", count=len(selected_entries), chars=len(text))
|
||
return text, len(text)
|
||
|
||
|
||
def _filter_expired(memories: list["Memory"]) -> list["Memory"]:
|
||
"""Drop memories whose valid_until is in the past (UTC comparison)."""
|
||
now_iso = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||
out = []
|
||
for m in memories:
|
||
vu = (m.valid_until or "").strip()
|
||
if not vu:
|
||
out.append(m)
|
||
continue
|
||
# Compare as string (ISO dates/timestamps sort correctly lexicographically
|
||
# when they have the same format; date-only vs full ts both start YYYY-MM-DD).
|
||
if vu[:10] >= now_iso:
|
||
out.append(m)
|
||
# else: expired, drop silently
|
||
return out
|
||
|
||
|
||
def _rank_memories_for_query(
|
||
memories: list["Memory"],
|
||
query_tokens: set[str],
|
||
query: str | None = None,
|
||
) -> list["Memory"]:
|
||
"""Rerank a memory list by lexical overlap with a pre-tokenized query.
|
||
|
||
Primary key: overlap_density (overlap_count / memory_token_count),
|
||
which rewards short focused memories that match the query precisely
|
||
over long overview memories that incidentally share a few tokens.
|
||
Secondary: absolute overlap count. Tertiary: domain-tag match.
|
||
Quaternary: confidence.
|
||
|
||
Phase 3: domain_tags contribute a boost when they appear in the
|
||
query text. A memory tagged [optics, thermal] for a query about
|
||
"optics coating" gets promoted above a memory without those tags.
|
||
Tag boost fires AFTER content-overlap density so it only breaks
|
||
ties among content-similar candidates.
|
||
"""
|
||
from atocore.memory.reinforcement import _normalize, _tokenize
|
||
|
||
query_lower = (query or "").lower()
|
||
|
||
scored: list[tuple[float, int, int, float, Memory]] = []
|
||
for mem in memories:
|
||
mem_tokens = _tokenize(_normalize(mem.content))
|
||
overlap = len(mem_tokens & query_tokens) if mem_tokens else 0
|
||
density = overlap / len(mem_tokens) if mem_tokens else 0.0
|
||
|
||
# Tag boost: count how many of the memory's domain_tags appear
|
||
# as substrings in the raw query. Strong signal for topical match.
|
||
tag_hits = 0
|
||
for tag in (mem.domain_tags or []):
|
||
if tag and tag in query_lower:
|
||
tag_hits += 1
|
||
|
||
scored.append((density, overlap, tag_hits, mem.confidence, mem))
|
||
scored.sort(key=lambda t: (t[0], t[1], t[2], t[3]), reverse=True)
|
||
return [mem for _, _, _, _, mem in scored]
|
||
|
||
|
||
def _row_to_memory(row) -> Memory:
|
||
"""Convert a DB row to Memory dataclass."""
|
||
import json as _json
|
||
keys = row.keys() if hasattr(row, "keys") else []
|
||
last_ref = row["last_referenced_at"] if "last_referenced_at" in keys else None
|
||
ref_count = row["reference_count"] if "reference_count" in keys else 0
|
||
tags_raw = row["domain_tags"] if "domain_tags" in keys else None
|
||
try:
|
||
tags = _json.loads(tags_raw) if tags_raw else []
|
||
if not isinstance(tags, list):
|
||
tags = []
|
||
except Exception:
|
||
tags = []
|
||
valid_until = row["valid_until"] if "valid_until" in keys else None
|
||
return Memory(
|
||
id=row["id"],
|
||
memory_type=row["memory_type"],
|
||
content=row["content"],
|
||
project=row["project"] or "",
|
||
source_chunk_id=row["source_chunk_id"] or "",
|
||
confidence=row["confidence"],
|
||
status=row["status"],
|
||
created_at=row["created_at"],
|
||
updated_at=row["updated_at"],
|
||
last_referenced_at=last_ref or "",
|
||
reference_count=int(ref_count or 0),
|
||
domain_tags=tags,
|
||
valid_until=valid_until or "",
|
||
)
|
||
|
||
|
||
def _validate_confidence(confidence: float) -> None:
|
||
if not 0.0 <= confidence <= 1.0:
|
||
raise ValueError("Confidence must be between 0.0 and 1.0")
|
||
|
||
|
||
# ---------------------------------------------------------------------
|
||
# Phase 7A — Memory Consolidation: merge-candidate lifecycle
|
||
# ---------------------------------------------------------------------
|
||
#
|
||
# The detector (scripts/memory_dedup.py) writes proposals into
|
||
# memory_merge_candidates. The triage UI lists pending rows, a human
|
||
# reviews, and on approve we execute the merge here — never at detect
|
||
# time. This keeps the audit trail clean: every mutation is a human
|
||
# decision.
|
||
|
||
|
||
def create_merge_candidate(
|
||
memory_ids: list[str],
|
||
similarity: float,
|
||
proposed_content: str,
|
||
proposed_memory_type: str,
|
||
proposed_project: str,
|
||
proposed_tags: list[str] | None = None,
|
||
proposed_confidence: float = 0.6,
|
||
reason: str = "",
|
||
) -> str | None:
|
||
"""Insert a merge-candidate row. Returns the new row id, or None if
|
||
a pending candidate already covers this exact set of memory ids
|
||
(idempotent scan — re-running the detector doesn't double-create)."""
|
||
import json as _json
|
||
|
||
if not memory_ids or len(memory_ids) < 2:
|
||
raise ValueError("merge candidate requires at least 2 memory_ids")
|
||
|
||
memory_ids_sorted = sorted(set(memory_ids))
|
||
memory_ids_json = _json.dumps(memory_ids_sorted)
|
||
tags_json = _json.dumps(_normalize_tags(proposed_tags))
|
||
candidate_id = str(uuid.uuid4())
|
||
|
||
with get_connection() as conn:
|
||
# Idempotency: same sorted-id set already pending? skip.
|
||
existing = conn.execute(
|
||
"SELECT id FROM memory_merge_candidates "
|
||
"WHERE status = 'pending' AND memory_ids = ?",
|
||
(memory_ids_json,),
|
||
).fetchone()
|
||
if existing:
|
||
return None
|
||
|
||
conn.execute(
|
||
"INSERT INTO memory_merge_candidates "
|
||
"(id, status, memory_ids, similarity, proposed_content, "
|
||
"proposed_memory_type, proposed_project, proposed_tags, "
|
||
"proposed_confidence, reason) "
|
||
"VALUES (?, 'pending', ?, ?, ?, ?, ?, ?, ?, ?)",
|
||
(
|
||
candidate_id, memory_ids_json, float(similarity or 0.0),
|
||
(proposed_content or "")[:2000],
|
||
(proposed_memory_type or "knowledge")[:50],
|
||
(proposed_project or "")[:100],
|
||
tags_json,
|
||
max(0.0, min(1.0, float(proposed_confidence))),
|
||
(reason or "")[:500],
|
||
),
|
||
)
|
||
log.info(
|
||
"merge_candidate_created",
|
||
candidate_id=candidate_id,
|
||
memory_count=len(memory_ids_sorted),
|
||
similarity=round(similarity, 4),
|
||
)
|
||
return candidate_id
|
||
|
||
|
||
def get_merge_candidates(status: str = "pending", limit: int = 100) -> list[dict]:
|
||
"""List merge candidates with their source memories inlined."""
|
||
import json as _json
|
||
|
||
with get_connection() as conn:
|
||
rows = conn.execute(
|
||
"SELECT * FROM memory_merge_candidates "
|
||
"WHERE status = ? ORDER BY created_at DESC LIMIT ?",
|
||
(status, limit),
|
||
).fetchall()
|
||
|
||
out = []
|
||
for r in rows:
|
||
try:
|
||
mem_ids = _json.loads(r["memory_ids"] or "[]")
|
||
except Exception:
|
||
mem_ids = []
|
||
try:
|
||
tags = _json.loads(r["proposed_tags"] or "[]")
|
||
except Exception:
|
||
tags = []
|
||
|
||
sources = []
|
||
for mid in mem_ids:
|
||
srow = conn.execute(
|
||
"SELECT id, memory_type, content, project, confidence, "
|
||
"status, reference_count, domain_tags, valid_until "
|
||
"FROM memories WHERE id = ?",
|
||
(mid,),
|
||
).fetchone()
|
||
if srow:
|
||
try:
|
||
stags = _json.loads(srow["domain_tags"] or "[]")
|
||
except Exception:
|
||
stags = []
|
||
sources.append({
|
||
"id": srow["id"],
|
||
"memory_type": srow["memory_type"],
|
||
"content": srow["content"],
|
||
"project": srow["project"] or "",
|
||
"confidence": srow["confidence"],
|
||
"status": srow["status"],
|
||
"reference_count": int(srow["reference_count"] or 0),
|
||
"domain_tags": stags,
|
||
"valid_until": srow["valid_until"] or "",
|
||
})
|
||
|
||
out.append({
|
||
"id": r["id"],
|
||
"status": r["status"],
|
||
"memory_ids": mem_ids,
|
||
"similarity": r["similarity"],
|
||
"proposed_content": r["proposed_content"] or "",
|
||
"proposed_memory_type": r["proposed_memory_type"] or "knowledge",
|
||
"proposed_project": r["proposed_project"] or "",
|
||
"proposed_tags": tags,
|
||
"proposed_confidence": r["proposed_confidence"],
|
||
"reason": r["reason"] or "",
|
||
"created_at": r["created_at"],
|
||
"resolved_at": r["resolved_at"],
|
||
"resolved_by": r["resolved_by"],
|
||
"result_memory_id": r["result_memory_id"],
|
||
"sources": sources,
|
||
})
|
||
return out
|
||
|
||
|
||
def reject_merge_candidate(candidate_id: str, actor: str = "human-triage", note: str = "") -> bool:
|
||
"""Mark a merge candidate as rejected. Source memories stay untouched."""
|
||
now_str = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
||
with get_connection() as conn:
|
||
result = conn.execute(
|
||
"UPDATE memory_merge_candidates "
|
||
"SET status = 'rejected', resolved_at = ?, resolved_by = ? "
|
||
"WHERE id = ? AND status = 'pending'",
|
||
(now_str, actor, candidate_id),
|
||
)
|
||
if result.rowcount == 0:
|
||
return False
|
||
log.info("merge_candidate_rejected", candidate_id=candidate_id, actor=actor, note=note[:100])
|
||
return True
|
||
|
||
|
||
def merge_memories(
|
||
candidate_id: str,
|
||
actor: str = "human-triage",
|
||
override_content: str | None = None,
|
||
override_tags: list[str] | None = None,
|
||
) -> str | None:
|
||
"""Execute an approved merge candidate.
|
||
|
||
1. Validate all source memories still status=active
|
||
2. Create the new merged memory (status=active)
|
||
3. Mark each source status=superseded with an audit row pointing at
|
||
the new merged id
|
||
4. Mark the candidate status=approved, record result_memory_id
|
||
5. Write a consolidated audit row on the new memory
|
||
|
||
Returns the new merged memory's id, or None if the candidate cannot
|
||
be executed (already resolved, source tampered, etc.).
|
||
|
||
``override_content`` and ``override_tags`` let the UI pass the human's
|
||
edits before clicking approve.
|
||
"""
|
||
import json as _json
|
||
|
||
now_str = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
||
|
||
with get_connection() as conn:
|
||
row = conn.execute(
|
||
"SELECT * FROM memory_merge_candidates WHERE id = ?",
|
||
(candidate_id,),
|
||
).fetchone()
|
||
if row is None or row["status"] != "pending":
|
||
log.warning("merge_candidate_not_pending", candidate_id=candidate_id)
|
||
return None
|
||
|
||
try:
|
||
mem_ids = _json.loads(row["memory_ids"] or "[]")
|
||
except Exception:
|
||
mem_ids = []
|
||
if not mem_ids or len(mem_ids) < 2:
|
||
log.warning("merge_candidate_invalid_memory_ids", candidate_id=candidate_id)
|
||
return None
|
||
|
||
# Snapshot sources + validate all active
|
||
source_rows = []
|
||
for mid in mem_ids:
|
||
srow = conn.execute(
|
||
"SELECT * FROM memories WHERE id = ?", (mid,)
|
||
).fetchone()
|
||
if srow is None or srow["status"] != "active":
|
||
log.warning(
|
||
"merge_source_not_active",
|
||
candidate_id=candidate_id,
|
||
memory_id=mid,
|
||
actual_status=(srow["status"] if srow else "missing"),
|
||
)
|
||
return None
|
||
source_rows.append(srow)
|
||
|
||
# Build merged memory fields — prefer human overrides, then proposed
|
||
content = (override_content or row["proposed_content"] or "").strip()
|
||
if not content:
|
||
log.warning("merge_candidate_empty_content", candidate_id=candidate_id)
|
||
return None
|
||
|
||
merged_type = (row["proposed_memory_type"] or source_rows[0]["memory_type"]).lower()
|
||
if merged_type not in MEMORY_TYPES:
|
||
merged_type = source_rows[0]["memory_type"]
|
||
|
||
merged_project = row["proposed_project"] or source_rows[0]["project"] or ""
|
||
merged_project = resolve_project_name(merged_project)
|
||
|
||
# Tags: override wins, else proposed, else union of sources
|
||
if override_tags is not None:
|
||
merged_tags = _normalize_tags(override_tags)
|
||
else:
|
||
try:
|
||
proposed_tags = _json.loads(row["proposed_tags"] or "[]")
|
||
except Exception:
|
||
proposed_tags = []
|
||
if proposed_tags:
|
||
merged_tags = _normalize_tags(proposed_tags)
|
||
else:
|
||
union: list[str] = []
|
||
for srow in source_rows:
|
||
try:
|
||
stags = _json.loads(srow["domain_tags"] or "[]")
|
||
except Exception:
|
||
stags = []
|
||
for t in stags:
|
||
if isinstance(t, str) and t and t not in union:
|
||
union.append(t)
|
||
merged_tags = union
|
||
|
||
# confidence = max; reference_count = sum
|
||
merged_confidence = max(float(s["confidence"]) for s in source_rows)
|
||
total_refs = sum(int(s["reference_count"] or 0) for s in source_rows)
|
||
|
||
# valid_until: if any source is permanent (None/empty), merged is permanent.
|
||
# Otherwise take the latest (lexical compare on ISO dates works).
|
||
merged_vu: str | None = "" # placeholder
|
||
has_permanent = any(not (s["valid_until"] or "").strip() for s in source_rows)
|
||
if has_permanent:
|
||
merged_vu = None
|
||
else:
|
||
merged_vu = max((s["valid_until"] or "").strip() for s in source_rows) or None
|
||
|
||
new_id = str(uuid.uuid4())
|
||
tags_json = _json.dumps(merged_tags)
|
||
|
||
conn.execute(
|
||
"INSERT INTO memories (id, memory_type, content, project, "
|
||
"source_chunk_id, confidence, status, domain_tags, valid_until, "
|
||
"reference_count, last_referenced_at) "
|
||
"VALUES (?, ?, ?, ?, NULL, ?, 'active', ?, ?, ?, ?)",
|
||
(
|
||
new_id, merged_type, content[:2000], merged_project,
|
||
merged_confidence, tags_json, merged_vu, total_refs, now_str,
|
||
),
|
||
)
|
||
|
||
# Mark sources superseded
|
||
for srow in source_rows:
|
||
conn.execute(
|
||
"UPDATE memories SET status = 'superseded', updated_at = ? "
|
||
"WHERE id = ?",
|
||
(now_str, srow["id"]),
|
||
)
|
||
|
||
# Mark candidate approved
|
||
conn.execute(
|
||
"UPDATE memory_merge_candidates SET status = 'approved', "
|
||
"resolved_at = ?, resolved_by = ?, result_memory_id = ? WHERE id = ?",
|
||
(now_str, actor, new_id, candidate_id),
|
||
)
|
||
|
||
# Audit rows (out of the transaction; fail-open via _audit_memory)
|
||
_audit_memory(
|
||
memory_id=new_id,
|
||
action="created_via_merge",
|
||
actor=actor,
|
||
after={
|
||
"memory_type": merged_type,
|
||
"content": content,
|
||
"project": merged_project,
|
||
"confidence": merged_confidence,
|
||
"domain_tags": merged_tags,
|
||
"reference_count": total_refs,
|
||
"merged_from": list(mem_ids),
|
||
"merge_candidate_id": candidate_id,
|
||
},
|
||
note=f"merged {len(mem_ids)} sources via candidate {candidate_id[:8]}",
|
||
)
|
||
for srow in source_rows:
|
||
_audit_memory(
|
||
memory_id=srow["id"],
|
||
action="superseded",
|
||
actor=actor,
|
||
before={"status": "active", "content": srow["content"]},
|
||
after={"status": "superseded", "superseded_by": new_id},
|
||
note=f"merged into {new_id}",
|
||
)
|
||
|
||
log.info(
|
||
"merge_executed",
|
||
candidate_id=candidate_id,
|
||
result_memory_id=new_id,
|
||
source_count=len(source_rows),
|
||
actor=actor,
|
||
)
|
||
return new_id
|