220 lines
8.8 KiB
Python
220 lines
8.8 KiB
Python
|
|
"""Tests for 3-tier triage escalation logic (Phase Triage Quality).
|
||
|
|
|
||
|
|
The actual LLM calls are gated by ``shutil.which('claude')`` and can't be
|
||
|
|
exercised in CI without the CLI, so we mock the tier functions directly
|
||
|
|
and verify the control-flow (escalation routing, discard vs human, project
|
||
|
|
misattribution, metadata update).
|
||
|
|
"""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import sys
|
||
|
|
from pathlib import Path
|
||
|
|
from unittest import mock
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
|
||
|
|
# Import the script as a module for unit testing
|
||
|
|
_SCRIPTS = str(Path(__file__).resolve().parent.parent / "scripts")
|
||
|
|
if _SCRIPTS not in sys.path:
|
||
|
|
sys.path.insert(0, _SCRIPTS)
|
||
|
|
|
||
|
|
import auto_triage # noqa: E402
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.fixture(autouse=True)
|
||
|
|
def reset_thresholds(monkeypatch):
|
||
|
|
"""Make sure env-var overrides don't leak between tests."""
|
||
|
|
monkeypatch.setattr(auto_triage, "AUTO_PROMOTE_MIN_CONFIDENCE", 0.8)
|
||
|
|
monkeypatch.setattr(auto_triage, "ESCALATION_CONFIDENCE_THRESHOLD", 0.75)
|
||
|
|
monkeypatch.setattr(auto_triage, "TIER3_ACTION", "discard")
|
||
|
|
monkeypatch.setattr(auto_triage, "TIER1_MODEL", "sonnet")
|
||
|
|
monkeypatch.setattr(auto_triage, "TIER2_MODEL", "opus")
|
||
|
|
|
||
|
|
|
||
|
|
def test_parse_verdict_captures_suggested_project():
|
||
|
|
raw = '{"verdict": "promote", "confidence": 0.9, "reason": "clear", "suggested_project": "p04-gigabit"}'
|
||
|
|
v = auto_triage.parse_verdict(raw)
|
||
|
|
assert v["verdict"] == "promote"
|
||
|
|
assert v["suggested_project"] == "p04-gigabit"
|
||
|
|
|
||
|
|
|
||
|
|
def test_parse_verdict_defaults_suggested_project_to_empty():
|
||
|
|
raw = '{"verdict": "reject", "confidence": 0.9, "reason": "dup"}'
|
||
|
|
v = auto_triage.parse_verdict(raw)
|
||
|
|
assert v["suggested_project"] == ""
|
||
|
|
|
||
|
|
|
||
|
|
def test_high_confidence_tier1_promote_no_escalation():
|
||
|
|
"""Tier 1 confident promote → no tier 2 call."""
|
||
|
|
cand = {"id": "m1", "content": "x", "memory_type": "knowledge", "project": "p-test"}
|
||
|
|
|
||
|
|
with mock.patch("auto_triage.triage_one") as t1, \
|
||
|
|
mock.patch("auto_triage.triage_escalation") as t2, \
|
||
|
|
mock.patch("auto_triage.api_post"), \
|
||
|
|
mock.patch("auto_triage._apply_metadata_update"):
|
||
|
|
t1.return_value = {
|
||
|
|
"verdict": "promote", "confidence": 0.95, "reason": "clear",
|
||
|
|
"domain_tags": [], "valid_until": "", "suggested_project": "",
|
||
|
|
}
|
||
|
|
action, _ = auto_triage.process_candidate(
|
||
|
|
cand, "http://fake", {"p-test": []}, {"p-test": []},
|
||
|
|
{"p-test": []}, dry_run=False,
|
||
|
|
)
|
||
|
|
assert action == "promote"
|
||
|
|
t2.assert_not_called()
|
||
|
|
|
||
|
|
|
||
|
|
def test_high_confidence_tier1_reject_no_escalation():
|
||
|
|
cand = {"id": "m1", "content": "x", "memory_type": "knowledge", "project": "p-test"}
|
||
|
|
|
||
|
|
with mock.patch("auto_triage.triage_one") as t1, \
|
||
|
|
mock.patch("auto_triage.triage_escalation") as t2, \
|
||
|
|
mock.patch("auto_triage.api_post"):
|
||
|
|
t1.return_value = {
|
||
|
|
"verdict": "reject", "confidence": 0.9, "reason": "duplicate",
|
||
|
|
"domain_tags": [], "valid_until": "", "suggested_project": "",
|
||
|
|
}
|
||
|
|
action, _ = auto_triage.process_candidate(
|
||
|
|
cand, "http://fake", {"p-test": []}, {"p-test": []},
|
||
|
|
{"p-test": []}, dry_run=False,
|
||
|
|
)
|
||
|
|
assert action == "reject"
|
||
|
|
t2.assert_not_called()
|
||
|
|
|
||
|
|
|
||
|
|
def test_low_confidence_escalates_to_tier2():
|
||
|
|
"""Tier 1 low confidence → tier 2 is consulted."""
|
||
|
|
cand = {"id": "m1", "content": "x", "memory_type": "knowledge", "project": "p-test"}
|
||
|
|
|
||
|
|
with mock.patch("auto_triage.triage_one") as t1, \
|
||
|
|
mock.patch("auto_triage.triage_escalation") as t2, \
|
||
|
|
mock.patch("auto_triage.api_post"), \
|
||
|
|
mock.patch("auto_triage._apply_metadata_update"):
|
||
|
|
t1.return_value = {
|
||
|
|
"verdict": "promote", "confidence": 0.6, "reason": "maybe",
|
||
|
|
"domain_tags": [], "valid_until": "", "suggested_project": "",
|
||
|
|
}
|
||
|
|
t2.return_value = {
|
||
|
|
"verdict": "promote", "confidence": 0.9, "reason": "opus agrees",
|
||
|
|
"domain_tags": [], "valid_until": "", "suggested_project": "",
|
||
|
|
}
|
||
|
|
action, note = auto_triage.process_candidate(
|
||
|
|
cand, "http://fake", {"p-test": []}, {"p-test": []},
|
||
|
|
{"p-test": []}, dry_run=False,
|
||
|
|
)
|
||
|
|
assert action == "promote"
|
||
|
|
assert "opus" in note
|
||
|
|
t2.assert_called_once()
|
||
|
|
|
||
|
|
|
||
|
|
def test_needs_human_tier1_always_escalates():
|
||
|
|
cand = {"id": "m1", "content": "x", "memory_type": "knowledge", "project": "p-test"}
|
||
|
|
|
||
|
|
with mock.patch("auto_triage.triage_one") as t1, \
|
||
|
|
mock.patch("auto_triage.triage_escalation") as t2, \
|
||
|
|
mock.patch("auto_triage.api_post"):
|
||
|
|
t1.return_value = {
|
||
|
|
"verdict": "needs_human", "confidence": 0.5, "reason": "uncertain",
|
||
|
|
"domain_tags": [], "valid_until": "", "suggested_project": "",
|
||
|
|
}
|
||
|
|
t2.return_value = {
|
||
|
|
"verdict": "reject", "confidence": 0.88, "reason": "opus decided",
|
||
|
|
"domain_tags": [], "valid_until": "", "suggested_project": "",
|
||
|
|
}
|
||
|
|
action, _ = auto_triage.process_candidate(
|
||
|
|
cand, "http://fake", {"p-test": []}, {"p-test": []},
|
||
|
|
{"p-test": []}, dry_run=False,
|
||
|
|
)
|
||
|
|
assert action == "reject"
|
||
|
|
t2.assert_called_once()
|
||
|
|
|
||
|
|
|
||
|
|
def test_tier2_uncertain_leads_to_discard_by_default(monkeypatch):
|
||
|
|
cand = {"id": "m1", "content": "x", "memory_type": "knowledge", "project": "p-test"}
|
||
|
|
monkeypatch.setattr(auto_triage, "TIER3_ACTION", "discard")
|
||
|
|
|
||
|
|
with mock.patch("auto_triage.triage_one") as t1, \
|
||
|
|
mock.patch("auto_triage.triage_escalation") as t2, \
|
||
|
|
mock.patch("auto_triage.api_post") as api_post:
|
||
|
|
t1.return_value = {
|
||
|
|
"verdict": "needs_human", "confidence": 0.4, "reason": "unclear",
|
||
|
|
"domain_tags": [], "valid_until": "", "suggested_project": "",
|
||
|
|
}
|
||
|
|
t2.return_value = {
|
||
|
|
"verdict": "needs_human", "confidence": 0.5, "reason": "still unclear",
|
||
|
|
"domain_tags": [], "valid_until": "", "suggested_project": "",
|
||
|
|
}
|
||
|
|
action, _ = auto_triage.process_candidate(
|
||
|
|
cand, "http://fake", {"p-test": []}, {"p-test": []},
|
||
|
|
{"p-test": []}, dry_run=False,
|
||
|
|
)
|
||
|
|
assert action == "discard"
|
||
|
|
# Should have called reject on the API
|
||
|
|
api_post.assert_called_once()
|
||
|
|
assert "reject" in api_post.call_args.args[1]
|
||
|
|
|
||
|
|
|
||
|
|
def test_tier2_uncertain_goes_to_human_when_configured(monkeypatch):
|
||
|
|
cand = {"id": "m1", "content": "x", "memory_type": "knowledge", "project": "p-test"}
|
||
|
|
monkeypatch.setattr(auto_triage, "TIER3_ACTION", "human")
|
||
|
|
|
||
|
|
with mock.patch("auto_triage.triage_one") as t1, \
|
||
|
|
mock.patch("auto_triage.triage_escalation") as t2, \
|
||
|
|
mock.patch("auto_triage.api_post") as api_post:
|
||
|
|
t1.return_value = {
|
||
|
|
"verdict": "needs_human", "confidence": 0.4, "reason": "unclear",
|
||
|
|
"domain_tags": [], "valid_until": "", "suggested_project": "",
|
||
|
|
}
|
||
|
|
t2.return_value = {
|
||
|
|
"verdict": "needs_human", "confidence": 0.5, "reason": "still unclear",
|
||
|
|
"domain_tags": [], "valid_until": "", "suggested_project": "",
|
||
|
|
}
|
||
|
|
action, _ = auto_triage.process_candidate(
|
||
|
|
cand, "http://fake", {"p-test": []}, {"p-test": []},
|
||
|
|
{"p-test": []}, dry_run=False,
|
||
|
|
)
|
||
|
|
assert action == "human"
|
||
|
|
# Should NOT have touched the API — leave candidate in queue
|
||
|
|
api_post.assert_not_called()
|
||
|
|
|
||
|
|
|
||
|
|
def test_dry_run_does_not_call_api():
|
||
|
|
cand = {"id": "m1", "content": "x", "memory_type": "knowledge", "project": "p-test"}
|
||
|
|
|
||
|
|
with mock.patch("auto_triage.triage_one") as t1, \
|
||
|
|
mock.patch("auto_triage.api_post") as api_post:
|
||
|
|
t1.return_value = {
|
||
|
|
"verdict": "promote", "confidence": 0.9, "reason": "clear",
|
||
|
|
"domain_tags": [], "valid_until": "", "suggested_project": "",
|
||
|
|
}
|
||
|
|
action, _ = auto_triage.process_candidate(
|
||
|
|
cand, "http://fake", {"p-test": []}, {"p-test": []},
|
||
|
|
{"p-test": []}, dry_run=True,
|
||
|
|
)
|
||
|
|
assert action == "promote"
|
||
|
|
api_post.assert_not_called()
|
||
|
|
|
||
|
|
|
||
|
|
def test_misattribution_flagged_when_suggestion_differs(capsys):
|
||
|
|
cand = {"id": "m1", "content": "x", "memory_type": "knowledge", "project": "p04-gigabit"}
|
||
|
|
|
||
|
|
with mock.patch("auto_triage.triage_one") as t1, \
|
||
|
|
mock.patch("auto_triage.api_post"), \
|
||
|
|
mock.patch("auto_triage._apply_metadata_update"):
|
||
|
|
t1.return_value = {
|
||
|
|
"verdict": "promote", "confidence": 0.9, "reason": "clear",
|
||
|
|
"domain_tags": [], "valid_until": "",
|
||
|
|
"suggested_project": "p05-interferometer",
|
||
|
|
}
|
||
|
|
auto_triage.process_candidate(
|
||
|
|
cand, "http://fake",
|
||
|
|
{"p04-gigabit": [], "p05-interferometer": []},
|
||
|
|
{"p04-gigabit": [], "p05-interferometer": []},
|
||
|
|
{"p04-gigabit": [], "p05-interferometer": []},
|
||
|
|
dry_run=True,
|
||
|
|
)
|
||
|
|
out = capsys.readouterr().out
|
||
|
|
assert "misattribution" in out
|
||
|
|
assert "p05-interferometer" in out
|