From 69c971708ab850f2422177a8d1fbb76ded36037f Mon Sep 17 00:00:00 2001
From: Anto01 <antoine.letarte@gmail.com>
Date: Sun, 12 Apr 2026 14:44:02 -0400
Subject: [PATCH] =?UTF-8?q?feat:=20Day=204+5=20=E2=80=94=20R7/R9=20fixes?=
 =?UTF-8?q?=20+=20integration=20tests=20(R8)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Day 4:
- R7 fixed: overlap-density ranking. p06-firmware-interface now
  passes (was the last memory-ranking failure). Harness 16/18→17/18.
- R9 fixed: LLM extractor checks project registry before trusting
  model-supplied project. Hallucinated projects fall back to
  interaction's known scope. Registry lookup via
  load_project_registry(), matched by project_id. Host-side script
  mirrors this via GET /projects at startup.

Day 5:
- R8 addressed: 5 integration tests in test_extraction_pipeline.py
  covering the full LLM extract → persist as candidate → promote/
  reject flow, project fallback, failure handling, and dedup
  behavior. Uses mocked subprocess to avoid real claude -p calls.

Harness: 17/18 (only p06-tailscale remains — chunk bleed from
source content, not a memory/ranking issue).
Tests: 280 → 286 (+6).

Batch complete. Before/after for this batch:
  R1:  fixed (extraction pipeline operational on Dalidou)
  R5:  fixed (batch endpoint + host-side script)
  R7:  fixed (overlap-density ranking)
  R9:  fixed (project trust-preservation via registry check)
  R8:  addressed (5 integration tests)
  Harness: 16/18 → 17/18
  Active memories: 36 → 41
  Nightly pipeline: backup → cleanup → rsync → extract → auto-triage

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/test_extraction_pipeline.py | 173 ++++++++++++++++++++++++++++++
 1 file changed, 173 insertions(+)
 create mode 100644 tests/test_extraction_pipeline.py

diff --git a/tests/test_extraction_pipeline.py b/tests/test_extraction_pipeline.py
new file mode 100644
index 0000000..7c28b43
--- /dev/null
+++ b/tests/test_extraction_pipeline.py
@@ -0,0 +1,173 @@
+"""Integration tests for the extraction + triage pipeline (R8).
+
+Tests the flow that produced the 41 active memories:
+LLM extraction → persist as candidate → triage → promote/reject.
+Uses mocked subprocess to avoid real claude -p calls.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import patch
+
+import pytest
+
+from atocore.memory.extractor_llm import (
+    extract_candidates_llm,
+    extract_candidates_llm_verbose,
+)
+from atocore.memory.service import create_memory, get_memories
+from atocore.models.database import init_db
+import atocore.memory.extractor_llm as extractor_llm
+
+
+def _make_interaction(**kw):
+    from atocore.interactions.service import Interaction
+
+    return Interaction(
+        id=kw.get("id", "test-pipe-1"),
+        prompt=kw.get("prompt", "test prompt"),
+        response=kw.get("response", ""),
+        response_summary="",
+        project=kw.get("project", ""),
+        client="test",
+        session_id="",
+    )
+
+
+class _FakeCompleted:
+    def __init__(self, stdout, returncode=0):
+        self.stdout = stdout
+        self.stderr = ""
+        self.returncode = returncode
+
+
+def test_llm_extraction_persists_as_candidate(tmp_data_dir, monkeypatch):
+    """Full flow: LLM extracts → caller persists as candidate → memory
+    exists with status=candidate and correct project."""
+    init_db()
+    monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True)
+    monkeypatch.setattr(
+        extractor_llm.subprocess,
+        "run",
+        lambda *a, **kw: _FakeCompleted(
+            '[{"type": "project", "content": "USB SSD is mandatory for RPi storage", "project": "p06-polisher", "confidence": 0.6}]'
+        ),
+    )
+
+    interaction = _make_interaction(
+        response="We decided USB SSD is mandatory for the polisher RPi.",
+        project="p06-polisher",
+    )
+    candidates = extract_candidates_llm(interaction)
+    assert len(candidates) == 1
+    assert candidates[0].content == "USB SSD is mandatory for RPi storage"
+
+    mem = create_memory(
+        memory_type=candidates[0].memory_type,
+        content=candidates[0].content,
+        project=candidates[0].project,
+        confidence=candidates[0].confidence,
+        status="candidate",
+    )
+    assert mem.status == "candidate"
+    assert mem.project == "p06-polisher"
+
+    # Verify it appears in the candidate queue
+    queue = get_memories(status="candidate", project="p06-polisher", limit=10)
+    assert any(m.id == mem.id for m in queue)
+
+
+def test_llm_extraction_project_fallback(tmp_data_dir, monkeypatch):
+    """R6+R9: when model returns empty project, candidate inherits
+    the interaction's project."""
+    init_db()
+    monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True)
+    monkeypatch.setattr(
+        extractor_llm.subprocess,
+        "run",
+        lambda *a, **kw: _FakeCompleted(
+            '[{"type": "knowledge", "content": "machine works offline", "project": "", "confidence": 0.5}]'
+        ),
+    )
+
+    interaction = _make_interaction(
+        response="The machine works fully offline.",
+        project="p06-polisher",
+    )
+    candidates = extract_candidates_llm(interaction)
+    assert len(candidates) == 1
+    assert candidates[0].project == "p06-polisher"
+
+
+def test_promote_reject_flow(tmp_data_dir):
+    """Candidate → promote and candidate → reject both work via the
+    service layer (mirrors what auto_triage.py does via HTTP)."""
+    from atocore.memory.service import promote_memory, reject_candidate_memory
+
+    init_db()
+    good = create_memory(
+        memory_type="project",
+        content="durable fact worth keeping",
+        project="p06-polisher",
+        confidence=0.5,
+        status="candidate",
+    )
+    bad = create_memory(
+        memory_type="project",
+        content="stale snapshot to reject",
+        project="atocore",
+        confidence=0.5,
+        status="candidate",
+    )
+
+    promote_memory(good.id)
+    reject_candidate_memory(bad.id)
+
+    active = get_memories(project="p06-polisher", active_only=True, limit=10)
+    assert any(m.id == good.id for m in active)
+
+    candidates = get_memories(status="candidate", limit=10)
+    assert not any(m.id == good.id for m in candidates)
+    assert not any(m.id == bad.id for m in candidates)
+
+
+def test_duplicate_content_creates_separate_memory(tmp_data_dir):
+    """create_memory allows duplicate content (dedup is the triage
+    model's responsibility, not the DB layer). Both memories exist."""
+    init_db()
+    m1 = create_memory(
+        memory_type="project",
+        content="unique fact about polisher",
+        project="p06-polisher",
+    )
+    m2 = create_memory(
+        memory_type="project",
+        content="unique fact about polisher",
+        project="p06-polisher",
+        status="candidate",
+    )
+    assert m1.id != m2.id
+
+
+def test_llm_extraction_failure_returns_empty(tmp_data_dir, monkeypatch):
+    """The full persist flow handles LLM extraction failure gracefully:
+    0 candidates, nothing persisted, no raise."""
+    init_db()
+    monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True)
+    monkeypatch.setattr(
+        extractor_llm.subprocess,
+        "run",
+        lambda *a, **kw: _FakeCompleted("", returncode=1),
+    )
+
+    interaction = _make_interaction(
+        response="some real content that the LLM fails on",
+        project="p06-polisher",
+    )
+    result = extract_candidates_llm_verbose(interaction)
+    assert result.candidates == []
+    assert "exit_1" in result.error
+
+    # Nothing in the candidate queue
+    queue = get_memories(status="candidate", limit=10)
+    assert len(queue) == 0