"""Tests for the LLM-assisted extractor path. Focused on the parser and failure-mode contracts — the actual network call is exercised out of band by running ``python scripts/extractor_eval.py --mode llm`` against the frozen labeled corpus with ``ANTHROPIC_API_KEY`` set. These tests only exercise the pieces that don't need network. """ from __future__ import annotations import os from unittest.mock import patch import pytest from atocore.interactions.service import Interaction from atocore.memory.extractor_llm import ( LLM_EXTRACTOR_VERSION, _parse_candidates, extract_candidates_llm, extract_candidates_llm_verbose, ) import atocore.memory.extractor_llm as extractor_llm def _make_interaction(prompt: str = "p", response: str = "r") -> Interaction: return Interaction( id="test-id", prompt=prompt, response=response, response_summary="", project="", client="test", session_id="", ) def test_parser_handles_empty_array(): result = _parse_candidates("[]", _make_interaction()) assert result == [] def test_parser_handles_malformed_json(): result = _parse_candidates("{ not valid json", _make_interaction()) assert result == [] def test_parser_strips_markdown_fences(): raw = "```json\n[{\"type\": \"knowledge\", \"content\": \"x is y\", \"project\": \"\", \"confidence\": 0.5}]\n```" result = _parse_candidates(raw, _make_interaction()) assert len(result) == 1 assert result[0].memory_type == "knowledge" assert result[0].content == "x is y" def test_parser_strips_surrounding_prose(): raw = "Here are the candidates:\n[{\"type\": \"project\", \"content\": \"foo\", \"project\": \"p04\", \"confidence\": 0.6}]\nThat's it." result = _parse_candidates(raw, _make_interaction()) assert len(result) == 1 assert result[0].memory_type == "project" # Model returned "p04" with no interaction scope — unscoped path # resolves via registry if available, otherwise stays as-is def test_parser_drops_invalid_memory_types(): raw = '[{"type": "nonsense", "content": "x"}, {"type": "project", "content": "y"}]' result = _parse_candidates(raw, _make_interaction()) assert len(result) == 1 assert result[0].memory_type == "project" def test_parser_drops_empty_content(): raw = '[{"type": "knowledge", "content": " "}, {"type": "knowledge", "content": "real"}]' result = _parse_candidates(raw, _make_interaction()) assert len(result) == 1 assert result[0].content == "real" def test_parser_clamps_confidence_to_unit_interval(): raw = '[{"type": "knowledge", "content": "c1", "confidence": 2.5}, {"type": "knowledge", "content": "c2", "confidence": -0.4}]' result = _parse_candidates(raw, _make_interaction()) assert result[0].confidence == 1.0 assert result[1].confidence == 0.0 def test_parser_defaults_confidence_on_missing_field(): raw = '[{"type": "knowledge", "content": "c1"}]' result = _parse_candidates(raw, _make_interaction()) assert result[0].confidence == 0.5 def test_parser_tags_version_and_rule(): raw = '[{"type": "project", "content": "c1"}]' result = _parse_candidates(raw, _make_interaction()) assert result[0].rule == "llm_extraction" assert result[0].extractor_version == LLM_EXTRACTOR_VERSION assert result[0].source_interaction_id == "test-id" def test_case_a_empty_model_scoped_interaction(): """Case A: model returns empty project, interaction is scoped. Interaction scope wins.""" raw = '[{"type": "project", "content": "machine works offline"}]' interaction = _make_interaction() interaction.project = "p06-polisher" result = _parse_candidates(raw, interaction) assert result[0].project == "p06-polisher" def test_case_b_empty_model_unscoped_interaction(): """Case B: both empty. Project stays empty.""" raw = '[{"type": "project", "content": "generic fact"}]' interaction = _make_interaction() interaction.project = "" result = _parse_candidates(raw, interaction) assert result[0].project == "" def test_case_c_unregistered_model_scoped_interaction(tmp_data_dir, project_registry): """Case C: model returns unregistered project, interaction is scoped. Interaction scope wins.""" from atocore.models.database import init_db init_db() project_registry(("p06-polisher", ["p06"])) raw = '[{"type": "project", "content": "x", "project": "fake-project-99"}]' interaction = _make_interaction() interaction.project = "p06-polisher" result = _parse_candidates(raw, interaction) assert result[0].project == "p06-polisher" def test_case_d_unregistered_model_unscoped_keeps_tag(tmp_data_dir, project_registry): """Case D: model returns unregistered project, interaction is unscoped. Keeps the model's tag for auto-project-detection (new behavior).""" from atocore.models.database import init_db init_db() project_registry(("p06-polisher", ["p06"])) raw = '[{"type": "project", "content": "x", "project": "new-lead-project"}]' interaction = _make_interaction() interaction.project = "" result = _parse_candidates(raw, interaction) assert result[0].project == "new-lead-project" def test_case_e_matching_model_and_interaction(tmp_data_dir, project_registry): """Case E: model returns same project as interaction. Works.""" from atocore.models.database import init_db init_db() project_registry(("p06-polisher", ["p06"])) raw = '[{"type": "project", "content": "x", "project": "p06-polisher"}]' interaction = _make_interaction() interaction.project = "p06-polisher" result = _parse_candidates(raw, interaction) assert result[0].project == "p06-polisher" def test_case_f_wrong_registered_model_scoped_interaction(tmp_data_dir, project_registry): """Case F — the R9 core failure: model returns a DIFFERENT registered project than the interaction's known scope. Interaction scope wins. This is the case that was broken before the R9 fix.""" from atocore.models.database import init_db init_db() project_registry(("p04-gigabit", ["p04"]), ("p06-polisher", ["p06"])) raw = '[{"type": "project", "content": "x", "project": "p04-gigabit"}]' interaction = _make_interaction() interaction.project = "p06-polisher" result = _parse_candidates(raw, interaction) assert result[0].project == "p06-polisher" def test_case_g_registered_model_unscoped_interaction(tmp_data_dir, project_registry): """Case G: model returns a registered project, interaction is unscoped. Model project accepted (only way to get a project for unscoped captures).""" from atocore.models.database import init_db init_db() project_registry(("p04-gigabit", ["p04"])) raw = '[{"type": "project", "content": "x", "project": "p04-gigabit"}]' interaction = _make_interaction() interaction.project = "" result = _parse_candidates(raw, interaction) assert result[0].project == "p04-gigabit" def test_missing_cli_returns_empty(monkeypatch): """If ``claude`` is not on PATH the extractor returns empty, never raises.""" monkeypatch.setattr(extractor_llm, "_cli_available", lambda: False) result = extract_candidates_llm_verbose(_make_interaction("p", "some real response")) assert result.candidates == [] assert result.error == "claude_cli_missing" def test_empty_response_returns_empty(monkeypatch): monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True) result = extract_candidates_llm_verbose(_make_interaction("p", "")) assert result.candidates == [] assert result.error == "empty_response" def test_subprocess_timeout_returns_empty(monkeypatch): """A subprocess timeout must not raise into the caller.""" monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True) import subprocess as _sp def _boom(*a, **kw): raise _sp.TimeoutExpired(cmd=a[0] if a else "claude", timeout=1) monkeypatch.setattr(extractor_llm.subprocess, "run", _boom) result = extract_candidates_llm_verbose(_make_interaction("p", "real response")) assert result.candidates == [] assert result.error == "timeout" def test_subprocess_nonzero_exit_returns_empty(monkeypatch): """A non-zero CLI exit (auth failure, etc.) must not raise.""" monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True) class _Completed: returncode = 1 stdout = "" stderr = "auth failed" monkeypatch.setattr(extractor_llm.subprocess, "run", lambda *a, **kw: _Completed()) result = extract_candidates_llm_verbose(_make_interaction("p", "real response")) assert result.candidates == [] assert result.error == "exit_1" def test_happy_path_parses_stdout(monkeypatch): monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True) class _Completed: returncode = 0 stdout = '[{"type": "project", "content": "p04 selected Option B", "project": "p04-gigabit", "confidence": 0.6}]' stderr = "" monkeypatch.setattr(extractor_llm.subprocess, "run", lambda *a, **kw: _Completed()) result = extract_candidates_llm_verbose(_make_interaction("p", "r")) assert len(result.candidates) == 1 assert result.candidates[0].memory_type == "project" assert result.candidates[0].project == "p04-gigabit" assert abs(result.candidates[0].confidence - 0.6) < 1e-9