"""Tests for 3-tier triage escalation logic (Phase Triage Quality). The actual LLM calls are gated by ``shutil.which('claude')`` and can't be exercised in CI without the CLI, so we mock the tier functions directly and verify the control-flow (escalation routing, discard vs human, project misattribution, metadata update). """ from __future__ import annotations import sys from pathlib import Path from unittest import mock import pytest # Import the script as a module for unit testing _SCRIPTS = str(Path(__file__).resolve().parent.parent / "scripts") if _SCRIPTS not in sys.path: sys.path.insert(0, _SCRIPTS) import auto_triage # noqa: E402 @pytest.fixture(autouse=True) def reset_thresholds(monkeypatch): """Make sure env-var overrides don't leak between tests.""" monkeypatch.setattr(auto_triage, "AUTO_PROMOTE_MIN_CONFIDENCE", 0.8) monkeypatch.setattr(auto_triage, "ESCALATION_CONFIDENCE_THRESHOLD", 0.75) monkeypatch.setattr(auto_triage, "TIER3_ACTION", "discard") monkeypatch.setattr(auto_triage, "TIER1_MODEL", "sonnet") monkeypatch.setattr(auto_triage, "TIER2_MODEL", "opus") def test_parse_verdict_captures_suggested_project(): raw = '{"verdict": "promote", "confidence": 0.9, "reason": "clear", "suggested_project": "p04-gigabit"}' v = auto_triage.parse_verdict(raw) assert v["verdict"] == "promote" assert v["suggested_project"] == "p04-gigabit" def test_parse_verdict_defaults_suggested_project_to_empty(): raw = '{"verdict": "reject", "confidence": 0.9, "reason": "dup"}' v = auto_triage.parse_verdict(raw) assert v["suggested_project"] == "" def test_high_confidence_tier1_promote_no_escalation(): """Tier 1 confident promote → no tier 2 call.""" cand = {"id": "m1", "content": "x", "memory_type": "knowledge", "project": "p-test"} with mock.patch("auto_triage.triage_one") as t1, \ mock.patch("auto_triage.triage_escalation") as t2, \ mock.patch("auto_triage.api_post"), \ mock.patch("auto_triage._apply_metadata_update"): t1.return_value = { "verdict": "promote", "confidence": 0.95, "reason": "clear", "domain_tags": [], "valid_until": "", "suggested_project": "", } action, _ = auto_triage.process_candidate( cand, "http://fake", {"p-test": []}, {"p-test": []}, {"p-test": []}, dry_run=False, ) assert action == "promote" t2.assert_not_called() def test_high_confidence_tier1_reject_no_escalation(): cand = {"id": "m1", "content": "x", "memory_type": "knowledge", "project": "p-test"} with mock.patch("auto_triage.triage_one") as t1, \ mock.patch("auto_triage.triage_escalation") as t2, \ mock.patch("auto_triage.api_post"): t1.return_value = { "verdict": "reject", "confidence": 0.9, "reason": "duplicate", "domain_tags": [], "valid_until": "", "suggested_project": "", } action, _ = auto_triage.process_candidate( cand, "http://fake", {"p-test": []}, {"p-test": []}, {"p-test": []}, dry_run=False, ) assert action == "reject" t2.assert_not_called() def test_low_confidence_escalates_to_tier2(): """Tier 1 low confidence → tier 2 is consulted.""" cand = {"id": "m1", "content": "x", "memory_type": "knowledge", "project": "p-test"} with mock.patch("auto_triage.triage_one") as t1, \ mock.patch("auto_triage.triage_escalation") as t2, \ mock.patch("auto_triage.api_post"), \ mock.patch("auto_triage._apply_metadata_update"): t1.return_value = { "verdict": "promote", "confidence": 0.6, "reason": "maybe", "domain_tags": [], "valid_until": "", "suggested_project": "", } t2.return_value = { "verdict": "promote", "confidence": 0.9, "reason": "opus agrees", "domain_tags": [], "valid_until": "", "suggested_project": "", } action, note = auto_triage.process_candidate( cand, "http://fake", {"p-test": []}, {"p-test": []}, {"p-test": []}, dry_run=False, ) assert action == "promote" assert "opus" in note t2.assert_called_once() def test_needs_human_tier1_always_escalates(): cand = {"id": "m1", "content": "x", "memory_type": "knowledge", "project": "p-test"} with mock.patch("auto_triage.triage_one") as t1, \ mock.patch("auto_triage.triage_escalation") as t2, \ mock.patch("auto_triage.api_post"): t1.return_value = { "verdict": "needs_human", "confidence": 0.5, "reason": "uncertain", "domain_tags": [], "valid_until": "", "suggested_project": "", } t2.return_value = { "verdict": "reject", "confidence": 0.88, "reason": "opus decided", "domain_tags": [], "valid_until": "", "suggested_project": "", } action, _ = auto_triage.process_candidate( cand, "http://fake", {"p-test": []}, {"p-test": []}, {"p-test": []}, dry_run=False, ) assert action == "reject" t2.assert_called_once() def test_tier2_uncertain_leads_to_discard_by_default(monkeypatch): cand = {"id": "m1", "content": "x", "memory_type": "knowledge", "project": "p-test"} monkeypatch.setattr(auto_triage, "TIER3_ACTION", "discard") with mock.patch("auto_triage.triage_one") as t1, \ mock.patch("auto_triage.triage_escalation") as t2, \ mock.patch("auto_triage.api_post") as api_post: t1.return_value = { "verdict": "needs_human", "confidence": 0.4, "reason": "unclear", "domain_tags": [], "valid_until": "", "suggested_project": "", } t2.return_value = { "verdict": "needs_human", "confidence": 0.5, "reason": "still unclear", "domain_tags": [], "valid_until": "", "suggested_project": "", } action, _ = auto_triage.process_candidate( cand, "http://fake", {"p-test": []}, {"p-test": []}, {"p-test": []}, dry_run=False, ) assert action == "discard" # Should have called reject on the API api_post.assert_called_once() assert "reject" in api_post.call_args.args[1] def test_tier2_uncertain_goes_to_human_when_configured(monkeypatch): cand = {"id": "m1", "content": "x", "memory_type": "knowledge", "project": "p-test"} monkeypatch.setattr(auto_triage, "TIER3_ACTION", "human") with mock.patch("auto_triage.triage_one") as t1, \ mock.patch("auto_triage.triage_escalation") as t2, \ mock.patch("auto_triage.api_post") as api_post: t1.return_value = { "verdict": "needs_human", "confidence": 0.4, "reason": "unclear", "domain_tags": [], "valid_until": "", "suggested_project": "", } t2.return_value = { "verdict": "needs_human", "confidence": 0.5, "reason": "still unclear", "domain_tags": [], "valid_until": "", "suggested_project": "", } action, _ = auto_triage.process_candidate( cand, "http://fake", {"p-test": []}, {"p-test": []}, {"p-test": []}, dry_run=False, ) assert action == "human" # Should NOT have touched the API — leave candidate in queue api_post.assert_not_called() def test_dry_run_does_not_call_api(): cand = {"id": "m1", "content": "x", "memory_type": "knowledge", "project": "p-test"} with mock.patch("auto_triage.triage_one") as t1, \ mock.patch("auto_triage.api_post") as api_post: t1.return_value = { "verdict": "promote", "confidence": 0.9, "reason": "clear", "domain_tags": [], "valid_until": "", "suggested_project": "", } action, _ = auto_triage.process_candidate( cand, "http://fake", {"p-test": []}, {"p-test": []}, {"p-test": []}, dry_run=True, ) assert action == "promote" api_post.assert_not_called() def test_misattribution_flagged_when_suggestion_differs(capsys): cand = {"id": "m1", "content": "x", "memory_type": "knowledge", "project": "p04-gigabit"} with mock.patch("auto_triage.triage_one") as t1, \ mock.patch("auto_triage.api_post"), \ mock.patch("auto_triage._apply_metadata_update"): t1.return_value = { "verdict": "promote", "confidence": 0.9, "reason": "clear", "domain_tags": [], "valid_until": "", "suggested_project": "p05-interferometer", } auto_triage.process_candidate( cand, "http://fake", {"p04-gigabit": [], "p05-interferometer": []}, {"p04-gigabit": [], "p05-interferometer": []}, {"p04-gigabit": [], "p05-interferometer": []}, dry_run=True, ) out = capsys.readouterr().out assert "misattribution" in out assert "p05-interferometer" in out