feat: Phase 7A — semantic memory dedup ("sleep cycle" V1)
New table memory_merge_candidates + service functions to cluster near-duplicate active memories within (project, memory_type) buckets, draft a unified content via LLM, and merge on human approval. Source memories become superseded (never deleted); merged memory carries union of tags, max of confidence, sum of reference_count. - schema migration for memory_merge_candidates - atocore.memory.similarity: cosine + transitive clustering - atocore.memory._dedup_prompt: stdlib-only LLM prompt preserving every specific - service: merge_memories / create_merge_candidate / get_merge_candidates / reject_merge_candidate - scripts/memory_dedup.py: host-side detector (HTTP-only, idempotent) - 5 API endpoints under /admin/memory/merge-candidates* + /admin/memory/dedup-scan - triage UI: purple "🔗 Merge Candidates" section + "🔗 Scan for duplicates" bar - batch-extract.sh Step B3 (0.90 daily, 0.85 Sundays) - deploy/dalidou/dedup-watcher.sh for UI-triggered scans - 21 new tests (374 → 395) - docs/PHASE-7-MEMORY-CONSOLIDATION.md covering 7A-7H roadmap Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -377,6 +377,177 @@ _ENTITY_TRIAGE_CSS = """
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Phase 7A — Merge candidates (semantic dedup)
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
_MERGE_TRIAGE_CSS = """
|
||||
<style>
|
||||
.cand-merge { border-left: 3px solid #8b5cf6; }
|
||||
.merge-type { background: #8b5cf6; color: white; padding: 0.1rem 0.5rem; border-radius: 3px; font-size: 0.75rem; }
|
||||
.merge-sources { margin: 0.5rem 0 0.8rem 0; display: flex; flex-direction: column; gap: 0.35rem; }
|
||||
.merge-source { background: var(--bg); border: 1px dashed var(--border); border-radius: 4px; padding: 0.4rem 0.6rem; font-size: 0.85rem; }
|
||||
.merge-source-meta { font-family: monospace; font-size: 0.72rem; opacity: 0.7; margin-bottom: 0.2rem; }
|
||||
.merge-arrow { text-align: center; font-size: 1.1rem; opacity: 0.5; margin: 0.3rem 0; }
|
||||
.merge-proposed { background: var(--card); border: 1px solid #8b5cf6; border-radius: 4px; padding: 0.5rem; }
|
||||
.btn-merge-approve { background: #8b5cf6; color: white; border-color: #8b5cf6; }
|
||||
.btn-merge-approve:hover { background: #7c3aed; }
|
||||
</style>
|
||||
"""
|
||||
|
||||
|
||||
def _render_merge_card(cand: dict) -> str:
|
||||
import json as _json
|
||||
cid = _escape(cand.get("id", ""))
|
||||
sim = cand.get("similarity") or 0.0
|
||||
sources = cand.get("sources") or []
|
||||
proposed_content = cand.get("proposed_content") or ""
|
||||
proposed_tags = cand.get("proposed_tags") or []
|
||||
proposed_project = cand.get("proposed_project") or ""
|
||||
reason = cand.get("reason") or ""
|
||||
|
||||
src_html = "".join(
|
||||
f"""
|
||||
<div class="merge-source">
|
||||
<div class="merge-source-meta">
|
||||
{_escape(s.get('id','')[:8])} · [{_escape(s.get('memory_type',''))}]
|
||||
· {_escape(s.get('project','') or '(global)')}
|
||||
· conf {float(s.get('confidence',0)):.2f}
|
||||
· refs {int(s.get('reference_count',0))}
|
||||
</div>
|
||||
<div>{_escape((s.get('content') or '')[:300])}</div>
|
||||
</div>
|
||||
"""
|
||||
for s in sources
|
||||
)
|
||||
tags_str = ", ".join(proposed_tags)
|
||||
return f"""
|
||||
<div class="cand cand-merge" id="mcand-{cid}" data-merge-id="{cid}">
|
||||
<div class="cand-head">
|
||||
<span class="cand-type merge-type">[merge · {len(sources)} sources]</span>
|
||||
<span class="cand-project">{_escape(proposed_project or '(global)')}</span>
|
||||
<span class="cand-meta">sim ≥ {sim:.2f}</span>
|
||||
</div>
|
||||
<div class="merge-sources">{src_html}</div>
|
||||
<div class="merge-arrow">↓ merged into ↓</div>
|
||||
<div class="merge-proposed">
|
||||
<textarea class="cand-content" id="mcontent-{cid}">{_escape(proposed_content)}</textarea>
|
||||
<div class="cand-meta-row">
|
||||
<label class="cand-field-label">Tags:
|
||||
<input type="text" class="cand-tags-input" id="mtags-{cid}" value="{_escape(tags_str)}" placeholder="tag1, tag2">
|
||||
</label>
|
||||
</div>
|
||||
{f'<div class="auto-triage-msg" style="margin-top:0.4rem;">💡 {_escape(reason)}</div>' if reason else ''}
|
||||
</div>
|
||||
<div class="cand-actions">
|
||||
<button class="btn-merge-approve" data-merge-id="{cid}" title="Approve merge">✅ Approve Merge</button>
|
||||
<button class="btn-reject" data-merge-id="{cid}" data-merge-reject="1" title="Keep separate">❌ Keep Separate</button>
|
||||
</div>
|
||||
<div class="cand-status" id="mstatus-{cid}"></div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
|
||||
_MERGE_TRIAGE_SCRIPT = """
|
||||
<script>
|
||||
async function mergeApprove(id) {
|
||||
const st = document.getElementById('mstatus-' + id);
|
||||
st.textContent = 'Merging…';
|
||||
st.className = 'cand-status ok';
|
||||
const content = document.getElementById('mcontent-' + id).value;
|
||||
const tagsRaw = document.getElementById('mtags-' + id).value;
|
||||
const tags = tagsRaw.split(',').map(t => t.trim()).filter(Boolean);
|
||||
const r = await fetch('/admin/memory/merge-candidates/' + encodeURIComponent(id) + '/approve', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({actor: 'human-triage', content: content, domain_tags: tags}),
|
||||
});
|
||||
if (r.ok) {
|
||||
const data = await r.json();
|
||||
st.textContent = '✅ Merged → ' + (data.result_memory_id || '').slice(0, 8);
|
||||
setTimeout(() => {
|
||||
const card = document.getElementById('mcand-' + id);
|
||||
if (card) { card.style.opacity = '0'; setTimeout(() => card.remove(), 300); }
|
||||
}, 600);
|
||||
} else {
|
||||
const err = await r.text();
|
||||
st.textContent = '❌ ' + r.status + ': ' + err.slice(0, 120);
|
||||
st.className = 'cand-status err';
|
||||
}
|
||||
}
|
||||
|
||||
async function mergeReject(id) {
|
||||
const st = document.getElementById('mstatus-' + id);
|
||||
st.textContent = 'Rejecting…';
|
||||
st.className = 'cand-status ok';
|
||||
const r = await fetch('/admin/memory/merge-candidates/' + encodeURIComponent(id) + '/reject', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({actor: 'human-triage'}),
|
||||
});
|
||||
if (r.ok) {
|
||||
st.textContent = '❌ Kept separate';
|
||||
setTimeout(() => {
|
||||
const card = document.getElementById('mcand-' + id);
|
||||
if (card) { card.style.opacity = '0'; setTimeout(() => card.remove(), 300); }
|
||||
}, 400);
|
||||
} else st.textContent = '❌ ' + r.status;
|
||||
}
|
||||
|
||||
document.addEventListener('click', (e) => {
|
||||
const mid = e.target.dataset?.mergeId;
|
||||
if (!mid) return;
|
||||
if (e.target.classList.contains('btn-merge-approve')) mergeApprove(mid);
|
||||
else if (e.target.dataset?.mergeReject) mergeReject(mid);
|
||||
});
|
||||
|
||||
async function requestDedupScan() {
|
||||
const btn = document.getElementById('dedup-btn');
|
||||
const status = document.getElementById('dedup-status');
|
||||
btn.disabled = true;
|
||||
btn.textContent = 'Queuing…';
|
||||
status.textContent = '';
|
||||
status.className = 'auto-triage-msg';
|
||||
const threshold = parseFloat(document.getElementById('dedup-threshold').value || '0.88');
|
||||
const r = await fetch('/admin/memory/dedup-scan', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({project: '', similarity_threshold: threshold, max_batch: 50}),
|
||||
});
|
||||
if (r.ok) {
|
||||
status.textContent = `✓ Queued dedup scan at threshold ${threshold}. Host watcher runs every 2 min; refresh in ~3 min to see merge candidates.`;
|
||||
status.className = 'auto-triage-msg ok';
|
||||
} else {
|
||||
status.textContent = '✗ ' + r.status;
|
||||
status.className = 'auto-triage-msg err';
|
||||
}
|
||||
setTimeout(() => {
|
||||
btn.disabled = false;
|
||||
btn.textContent = '🔗 Scan for duplicates';
|
||||
}, 2000);
|
||||
}
|
||||
</script>
|
||||
"""
|
||||
|
||||
|
||||
def _render_dedup_bar() -> str:
|
||||
return """
|
||||
<div class="auto-triage-bar">
|
||||
<button id="dedup-btn" onclick="requestDedupScan()" title="Run semantic dedup scan on Dalidou host">
|
||||
🔗 Scan for duplicates
|
||||
</button>
|
||||
<label class="cand-field-label" style="margin:0 0.5rem;">
|
||||
Threshold:
|
||||
<input id="dedup-threshold" type="number" min="0.70" max="0.99" step="0.01" value="0.88"
|
||||
style="width:70px; padding:0.25rem; background:var(--bg); color:var(--text); border:1px solid var(--border); border-radius:3px;">
|
||||
</label>
|
||||
<span id="dedup-status" class="auto-triage-msg">
|
||||
Finds semantically near-duplicate active memories and proposes LLM-drafted merges for review. Source memories become <code>superseded</code> on approve; nothing is deleted.
|
||||
</span>
|
||||
</div>
|
||||
"""
|
||||
|
||||
|
||||
def _render_graduation_bar() -> str:
|
||||
"""The 'Graduate memories → entity candidates' control bar."""
|
||||
from atocore.projects.registry import load_project_registry
|
||||
@@ -478,26 +649,51 @@ def render_triage_page(limit: int = 100) -> str:
|
||||
except Exception as e:
|
||||
entity_candidates = []
|
||||
|
||||
total = len(mem_candidates) + len(entity_candidates)
|
||||
try:
|
||||
from atocore.memory.service import get_merge_candidates
|
||||
merge_candidates = get_merge_candidates(status="pending", limit=limit)
|
||||
except Exception:
|
||||
merge_candidates = []
|
||||
|
||||
total = len(mem_candidates) + len(entity_candidates) + len(merge_candidates)
|
||||
graduation_bar = _render_graduation_bar()
|
||||
dedup_bar = _render_dedup_bar()
|
||||
|
||||
if total == 0:
|
||||
body = _TRIAGE_CSS + _ENTITY_TRIAGE_CSS + f"""
|
||||
body = _TRIAGE_CSS + _ENTITY_TRIAGE_CSS + _MERGE_TRIAGE_CSS + f"""
|
||||
<div class="triage-header">
|
||||
<h1>Triage Queue</h1>
|
||||
</div>
|
||||
{graduation_bar}
|
||||
{dedup_bar}
|
||||
<div class="empty">
|
||||
<p>🎉 No candidates to review.</p>
|
||||
<p>The auto-triage pipeline keeps this queue empty unless something needs your judgment.</p>
|
||||
<p>Use the 🎓 Graduate memories button above to propose new entity candidates from existing memories.</p>
|
||||
<p>Use 🎓 Graduate memories to propose entity candidates, or 🔗 Scan for duplicates to find near-duplicate memories to merge.</p>
|
||||
</div>
|
||||
""" + _GRADUATION_SCRIPT
|
||||
""" + _GRADUATION_SCRIPT + _MERGE_TRIAGE_SCRIPT
|
||||
return render_html("Triage — AtoCore", body, breadcrumbs=[("Wiki", "/wiki"), ("Triage", "")])
|
||||
|
||||
# Memory cards
|
||||
mem_cards = "".join(_render_candidate_card(c) for c in mem_candidates)
|
||||
|
||||
# Merge cards (Phase 7A)
|
||||
merge_cards_html = ""
|
||||
if merge_candidates:
|
||||
merge_cards = "".join(_render_merge_card(c) for c in merge_candidates)
|
||||
merge_cards_html = f"""
|
||||
<div class="section-break">
|
||||
<h2>🔗 Merge Candidates ({len(merge_candidates)})</h2>
|
||||
<p class="auto-triage-msg">
|
||||
Semantically near-duplicate active memories. Approving merges the sources
|
||||
into the proposed unified memory; sources become <code>superseded</code>
|
||||
(not deleted — still queryable). You can edit the draft content and tags
|
||||
before approving.
|
||||
</p>
|
||||
</div>
|
||||
{merge_cards}
|
||||
"""
|
||||
|
||||
# Entity cards
|
||||
ent_cards_html = ""
|
||||
if entity_candidates:
|
||||
@@ -513,11 +709,12 @@ def render_triage_page(limit: int = 100) -> str:
|
||||
{ent_cards}
|
||||
"""
|
||||
|
||||
body = _TRIAGE_CSS + _ENTITY_TRIAGE_CSS + f"""
|
||||
body = _TRIAGE_CSS + _ENTITY_TRIAGE_CSS + _MERGE_TRIAGE_CSS + f"""
|
||||
<div class="triage-header">
|
||||
<h1>Triage Queue</h1>
|
||||
<span class="count">
|
||||
<span id="cand-count">{len(mem_candidates)}</span> memory ·
|
||||
{len(merge_candidates)} merge ·
|
||||
{len(entity_candidates)} entity
|
||||
</span>
|
||||
</div>
|
||||
@@ -536,10 +733,12 @@ def render_triage_page(limit: int = 100) -> str:
|
||||
</span>
|
||||
</div>
|
||||
{graduation_bar}
|
||||
{dedup_bar}
|
||||
<h2>📝 Memory Candidates ({len(mem_candidates)})</h2>
|
||||
{mem_cards}
|
||||
{merge_cards_html}
|
||||
{ent_cards_html}
|
||||
""" + _TRIAGE_SCRIPT + _ENTITY_TRIAGE_SCRIPT + _GRADUATION_SCRIPT
|
||||
""" + _TRIAGE_SCRIPT + _ENTITY_TRIAGE_SCRIPT + _GRADUATION_SCRIPT + _MERGE_TRIAGE_SCRIPT
|
||||
|
||||
return render_html(
|
||||
"Triage — AtoCore",
|
||||
|
||||
Reference in New Issue
Block a user