From 69c971708ab850f2422177a8d1fbb76ded36037f Mon Sep 17 00:00:00 2001 From: Anto01 Date: Sun, 12 Apr 2026 14:44:02 -0400 Subject: [PATCH] =?UTF-8?q?feat:=20Day=204+5=20=E2=80=94=20R7/R9=20fixes?= =?UTF-8?q?=20+=20integration=20tests=20(R8)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Day 4: - R7 fixed: overlap-density ranking. p06-firmware-interface now passes (was the last memory-ranking failure). Harness 16/18→17/18. - R9 fixed: LLM extractor checks project registry before trusting model-supplied project. Hallucinated projects fall back to interaction's known scope. Registry lookup via load_project_registry(), matched by project_id. Host-side script mirrors this via GET /projects at startup. Day 5: - R8 addressed: 5 integration tests in test_extraction_pipeline.py covering the full LLM extract → persist as candidate → promote/ reject flow, project fallback, failure handling, and dedup behavior. Uses mocked subprocess to avoid real claude -p calls. Harness: 17/18 (only p06-tailscale remains — chunk bleed from source content, not a memory/ranking issue). Tests: 280 → 286 (+6). Batch complete. Before/after for this batch: R1: fixed (extraction pipeline operational on Dalidou) R5: fixed (batch endpoint + host-side script) R7: fixed (overlap-density ranking) R9: fixed (project trust-preservation via registry check) R8: addressed (5 integration tests) Harness: 16/18 → 17/18 Active memories: 36 → 41 Nightly pipeline: backup → cleanup → rsync → extract → auto-triage Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_extraction_pipeline.py | 173 ++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 tests/test_extraction_pipeline.py diff --git a/tests/test_extraction_pipeline.py b/tests/test_extraction_pipeline.py new file mode 100644 index 0000000..7c28b43 --- /dev/null +++ b/tests/test_extraction_pipeline.py @@ -0,0 +1,173 @@ +"""Integration tests for the extraction + triage pipeline (R8). + +Tests the flow that produced the 41 active memories: +LLM extraction → persist as candidate → triage → promote/reject. +Uses mocked subprocess to avoid real claude -p calls. +""" + +from __future__ import annotations + +from unittest.mock import patch + +import pytest + +from atocore.memory.extractor_llm import ( + extract_candidates_llm, + extract_candidates_llm_verbose, +) +from atocore.memory.service import create_memory, get_memories +from atocore.models.database import init_db +import atocore.memory.extractor_llm as extractor_llm + + +def _make_interaction(**kw): + from atocore.interactions.service import Interaction + + return Interaction( + id=kw.get("id", "test-pipe-1"), + prompt=kw.get("prompt", "test prompt"), + response=kw.get("response", ""), + response_summary="", + project=kw.get("project", ""), + client="test", + session_id="", + ) + + +class _FakeCompleted: + def __init__(self, stdout, returncode=0): + self.stdout = stdout + self.stderr = "" + self.returncode = returncode + + +def test_llm_extraction_persists_as_candidate(tmp_data_dir, monkeypatch): + """Full flow: LLM extracts → caller persists as candidate → memory + exists with status=candidate and correct project.""" + init_db() + monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True) + monkeypatch.setattr( + extractor_llm.subprocess, + "run", + lambda *a, **kw: _FakeCompleted( + '[{"type": "project", "content": "USB SSD is mandatory for RPi storage", "project": "p06-polisher", "confidence": 0.6}]' + ), + ) + + interaction = _make_interaction( + response="We decided USB SSD is mandatory for the polisher RPi.", + project="p06-polisher", + ) + candidates = extract_candidates_llm(interaction) + assert len(candidates) == 1 + assert candidates[0].content == "USB SSD is mandatory for RPi storage" + + mem = create_memory( + memory_type=candidates[0].memory_type, + content=candidates[0].content, + project=candidates[0].project, + confidence=candidates[0].confidence, + status="candidate", + ) + assert mem.status == "candidate" + assert mem.project == "p06-polisher" + + # Verify it appears in the candidate queue + queue = get_memories(status="candidate", project="p06-polisher", limit=10) + assert any(m.id == mem.id for m in queue) + + +def test_llm_extraction_project_fallback(tmp_data_dir, monkeypatch): + """R6+R9: when model returns empty project, candidate inherits + the interaction's project.""" + init_db() + monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True) + monkeypatch.setattr( + extractor_llm.subprocess, + "run", + lambda *a, **kw: _FakeCompleted( + '[{"type": "knowledge", "content": "machine works offline", "project": "", "confidence": 0.5}]' + ), + ) + + interaction = _make_interaction( + response="The machine works fully offline.", + project="p06-polisher", + ) + candidates = extract_candidates_llm(interaction) + assert len(candidates) == 1 + assert candidates[0].project == "p06-polisher" + + +def test_promote_reject_flow(tmp_data_dir): + """Candidate → promote and candidate → reject both work via the + service layer (mirrors what auto_triage.py does via HTTP).""" + from atocore.memory.service import promote_memory, reject_candidate_memory + + init_db() + good = create_memory( + memory_type="project", + content="durable fact worth keeping", + project="p06-polisher", + confidence=0.5, + status="candidate", + ) + bad = create_memory( + memory_type="project", + content="stale snapshot to reject", + project="atocore", + confidence=0.5, + status="candidate", + ) + + promote_memory(good.id) + reject_candidate_memory(bad.id) + + active = get_memories(project="p06-polisher", active_only=True, limit=10) + assert any(m.id == good.id for m in active) + + candidates = get_memories(status="candidate", limit=10) + assert not any(m.id == good.id for m in candidates) + assert not any(m.id == bad.id for m in candidates) + + +def test_duplicate_content_creates_separate_memory(tmp_data_dir): + """create_memory allows duplicate content (dedup is the triage + model's responsibility, not the DB layer). Both memories exist.""" + init_db() + m1 = create_memory( + memory_type="project", + content="unique fact about polisher", + project="p06-polisher", + ) + m2 = create_memory( + memory_type="project", + content="unique fact about polisher", + project="p06-polisher", + status="candidate", + ) + assert m1.id != m2.id + + +def test_llm_extraction_failure_returns_empty(tmp_data_dir, monkeypatch): + """The full persist flow handles LLM extraction failure gracefully: + 0 candidates, nothing persisted, no raise.""" + init_db() + monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True) + monkeypatch.setattr( + extractor_llm.subprocess, + "run", + lambda *a, **kw: _FakeCompleted("", returncode=1), + ) + + interaction = _make_interaction( + response="some real content that the LLM fails on", + project="p06-polisher", + ) + result = extract_candidates_llm_verbose(interaction) + assert result.candidates == [] + assert "exit_1" in result.error + + # Nothing in the candidate queue + queue = get_memories(status="candidate", limit=10) + assert len(queue) == 0