feat: Phase 7A.1 — autonomous merge tiering (sonnet → opus → human)
Dedup detector now merges high-confidence duplicates silently instead
of piling every proposal into a human triage queue. Matches the 3-tier
escalation pattern that auto_triage already uses.
Tiering decision per cluster:
TIER-1 auto-approve: sonnet confidence >= 0.8 AND min_pairwise_sim >= 0.92
AND all sources share project+type → auto-merge silently
(actor="auto-dedup-tier1" in audit log)
TIER-2 escalation: sonnet 0.5-0.8 conf OR sim 0.85-0.92 → opus second opinion.
Opus confirms with conf >= 0.8 → auto-merge (actor="auto-dedup-tier2").
Opus overrides (reject) → skip silently.
Opus low conf → human triage with opus's refined draft.
HUMAN triage: Only the genuinely ambiguous land in /admin/triage.
Env-tunable thresholds:
ATOCORE_DEDUP_AUTO_APPROVE_CONF (0.8)
ATOCORE_DEDUP_AUTO_APPROVE_SIM (0.92)
ATOCORE_DEDUP_TIER2_MIN_CONF (0.5)
ATOCORE_DEDUP_TIER2_MIN_SIM (0.85)
ATOCORE_DEDUP_TIER2_MODEL (opus)
New flag --no-auto-approve for kill-switch testing (everything → human queue).
Tests: +6 (tier-2 prompt content, same_bucket edges, min_pairwise_similarity
on identical + transitive clusters). 395 → 401.
Rationale: user asked for autonomous behavior — "this needs to be intelligent,
I don't want to manually triage stuff". Matches the consolidation principle:
never discard details, but let the brain tidy up on its own for the easy cases.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,8 @@ from __future__ import annotations
|
||||
import pytest
|
||||
|
||||
from atocore.memory._dedup_prompt import (
|
||||
TIER2_SYSTEM_PROMPT,
|
||||
build_tier2_user_message,
|
||||
normalize_merge_verdict,
|
||||
parse_merge_verdict,
|
||||
)
|
||||
@@ -119,6 +121,120 @@ def test_normalize_merge_verdict_rejects_unknown_action():
|
||||
assert normalize_merge_verdict({"action": "?", "content": "x"}) is None
|
||||
|
||||
|
||||
# --- Tier-2 (Phase 7A.1) ---
|
||||
|
||||
|
||||
def test_tier2_prompt_is_stricter():
|
||||
# The tier-2 system prompt must explicitly instruct the model to be
|
||||
# stricter than tier-1 — that's the whole point of escalation.
|
||||
assert "STRICTER" in TIER2_SYSTEM_PROMPT
|
||||
assert "REJECT" in TIER2_SYSTEM_PROMPT
|
||||
|
||||
|
||||
def test_build_tier2_user_message_includes_tier1_draft():
|
||||
sources = [{
|
||||
"id": "abc12345", "content": "source text A",
|
||||
"memory_type": "knowledge", "project": "p04",
|
||||
"domain_tags": ["optics"], "confidence": 0.6,
|
||||
"valid_until": "", "reference_count": 2,
|
||||
}, {
|
||||
"id": "def67890", "content": "source text B",
|
||||
"memory_type": "knowledge", "project": "p04",
|
||||
"domain_tags": ["optics"], "confidence": 0.7,
|
||||
"valid_until": "", "reference_count": 1,
|
||||
}]
|
||||
tier1 = {
|
||||
"action": "merge",
|
||||
"content": "unified draft by tier1",
|
||||
"memory_type": "knowledge",
|
||||
"project": "p04",
|
||||
"domain_tags": ["optics"],
|
||||
"confidence": 0.65,
|
||||
"reason": "near-paraphrase",
|
||||
}
|
||||
msg = build_tier2_user_message(sources, tier1)
|
||||
assert "source text A" in msg
|
||||
assert "source text B" in msg
|
||||
assert "TIER-1 DRAFT" in msg
|
||||
assert "unified draft by tier1" in msg
|
||||
assert "near-paraphrase" in msg
|
||||
# Should end asking for a verdict
|
||||
assert "verdict" in msg.lower()
|
||||
|
||||
|
||||
# --- Tiering helpers (min_pairwise_similarity, same_bucket) ---
|
||||
|
||||
|
||||
def test_same_bucket_true_for_matching():
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"memory_dedup_for_test",
|
||||
"scripts/memory_dedup.py",
|
||||
)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
|
||||
sources = [
|
||||
{"memory_type": "knowledge", "project": "p04"},
|
||||
{"memory_type": "knowledge", "project": "p04"},
|
||||
]
|
||||
assert mod.same_bucket(sources) is True
|
||||
|
||||
|
||||
def test_same_bucket_false_for_mixed():
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"memory_dedup_for_test",
|
||||
"scripts/memory_dedup.py",
|
||||
)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
|
||||
# Different project
|
||||
assert mod.same_bucket([
|
||||
{"memory_type": "knowledge", "project": "p04"},
|
||||
{"memory_type": "knowledge", "project": "p05"},
|
||||
]) is False
|
||||
# Different memory_type
|
||||
assert mod.same_bucket([
|
||||
{"memory_type": "knowledge", "project": "p04"},
|
||||
{"memory_type": "project", "project": "p04"},
|
||||
]) is False
|
||||
|
||||
|
||||
def test_min_pairwise_similarity_identical_texts():
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"memory_dedup_for_test",
|
||||
"scripts/memory_dedup.py",
|
||||
)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
|
||||
# Three identical texts — min should be ~1.0
|
||||
ms = mod.min_pairwise_similarity(["hello world"] * 3)
|
||||
assert 0.99 <= ms <= 1.0
|
||||
|
||||
|
||||
def test_min_pairwise_similarity_mixed_cluster():
|
||||
"""Transitive cluster A~B~C with A and C actually quite different
|
||||
should expose a low min even though A~B and B~C are high."""
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"memory_dedup_for_test",
|
||||
"scripts/memory_dedup.py",
|
||||
)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
|
||||
ms = mod.min_pairwise_similarity([
|
||||
"Antoine prefers OAuth over API keys",
|
||||
"Antoine's OAuth preference",
|
||||
"USB SSD mandatory for polisher firmware",
|
||||
])
|
||||
assert ms < 0.6 # Third is unrelated; min is far below threshold
|
||||
|
||||
|
||||
# --- create_merge_candidate idempotency ---
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user