"""Integration tests for the extraction + triage pipeline (R8). Tests the flow that produced the 41 active memories: LLM extraction → persist as candidate → triage → promote/reject. Uses mocked subprocess to avoid real claude -p calls. """ from __future__ import annotations from unittest.mock import patch import pytest from atocore.memory.extractor_llm import ( extract_candidates_llm, extract_candidates_llm_verbose, ) from atocore.memory.service import create_memory, get_memories from atocore.models.database import init_db import atocore.memory.extractor_llm as extractor_llm def _make_interaction(**kw): from atocore.interactions.service import Interaction return Interaction( id=kw.get("id", "test-pipe-1"), prompt=kw.get("prompt", "test prompt"), response=kw.get("response", ""), response_summary="", project=kw.get("project", ""), client="test", session_id="", ) class _FakeCompleted: def __init__(self, stdout, returncode=0): self.stdout = stdout self.stderr = "" self.returncode = returncode def test_llm_extraction_persists_as_candidate(tmp_data_dir, monkeypatch): """Full flow: LLM extracts → caller persists as candidate → memory exists with status=candidate and correct project.""" init_db() monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True) monkeypatch.setattr( extractor_llm.subprocess, "run", lambda *a, **kw: _FakeCompleted( '[{"type": "project", "content": "USB SSD is mandatory for RPi storage", "project": "p06-polisher", "confidence": 0.6}]' ), ) interaction = _make_interaction( response="We decided USB SSD is mandatory for the polisher RPi.", project="p06-polisher", ) candidates = extract_candidates_llm(interaction) assert len(candidates) == 1 assert candidates[0].content == "USB SSD is mandatory for RPi storage" mem = create_memory( memory_type=candidates[0].memory_type, content=candidates[0].content, project=candidates[0].project, confidence=candidates[0].confidence, status="candidate", ) assert mem.status == "candidate" assert mem.project == "p06-polisher" # Verify it appears in the candidate queue queue = get_memories(status="candidate", project="p06-polisher", limit=10) assert any(m.id == mem.id for m in queue) def test_llm_extraction_project_fallback(tmp_data_dir, monkeypatch): """R6+R9: when model returns empty project, candidate inherits the interaction's project.""" init_db() monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True) monkeypatch.setattr( extractor_llm.subprocess, "run", lambda *a, **kw: _FakeCompleted( '[{"type": "knowledge", "content": "machine works offline", "project": "", "confidence": 0.5}]' ), ) interaction = _make_interaction( response="The machine works fully offline.", project="p06-polisher", ) candidates = extract_candidates_llm(interaction) assert len(candidates) == 1 assert candidates[0].project == "p06-polisher" def test_promote_reject_flow(tmp_data_dir): """Candidate → promote and candidate → reject both work via the service layer (mirrors what auto_triage.py does via HTTP).""" from atocore.memory.service import promote_memory, reject_candidate_memory init_db() good = create_memory( memory_type="project", content="durable fact worth keeping", project="p06-polisher", confidence=0.5, status="candidate", ) bad = create_memory( memory_type="project", content="stale snapshot to reject", project="atocore", confidence=0.5, status="candidate", ) promote_memory(good.id) reject_candidate_memory(bad.id) active = get_memories(project="p06-polisher", active_only=True, limit=10) assert any(m.id == good.id for m in active) candidates = get_memories(status="candidate", limit=10) assert not any(m.id == good.id for m in candidates) assert not any(m.id == bad.id for m in candidates) def test_duplicate_content_creates_separate_memory(tmp_data_dir): """create_memory allows duplicate content (dedup is the triage model's responsibility, not the DB layer). Both memories exist.""" init_db() m1 = create_memory( memory_type="project", content="unique fact about polisher", project="p06-polisher", ) m2 = create_memory( memory_type="project", content="unique fact about polisher", project="p06-polisher", status="candidate", ) assert m1.id != m2.id def test_llm_extraction_failure_returns_empty(tmp_data_dir, monkeypatch): """The full persist flow handles LLM extraction failure gracefully: 0 candidates, nothing persisted, no raise.""" init_db() monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True) monkeypatch.setattr( extractor_llm.subprocess, "run", lambda *a, **kw: _FakeCompleted("", returncode=1), ) interaction = _make_interaction( response="some real content that the LLM fails on", project="p06-polisher", ) result = extract_candidates_llm_verbose(interaction) assert result.candidates == [] assert "exit_1" in result.error # Nothing in the candidate queue queue = get_memories(status="candidate", limit=10) assert len(queue) == 0