feat: Day 4+5 — R7/R9 fixes + integration tests (R8)
Day 4: - R7 fixed: overlap-density ranking. p06-firmware-interface now passes (was the last memory-ranking failure). Harness 16/18→17/18. - R9 fixed: LLM extractor checks project registry before trusting model-supplied project. Hallucinated projects fall back to interaction's known scope. Registry lookup via load_project_registry(), matched by project_id. Host-side script mirrors this via GET /projects at startup. Day 5: - R8 addressed: 5 integration tests in test_extraction_pipeline.py covering the full LLM extract → persist as candidate → promote/ reject flow, project fallback, failure handling, and dedup behavior. Uses mocked subprocess to avoid real claude -p calls. Harness: 17/18 (only p06-tailscale remains — chunk bleed from source content, not a memory/ranking issue). Tests: 280 → 286 (+6). Batch complete. Before/after for this batch: R1: fixed (extraction pipeline operational on Dalidou) R5: fixed (batch endpoint + host-side script) R7: fixed (overlap-density ranking) R9: fixed (project trust-preservation via registry check) R8: addressed (5 integration tests) Harness: 16/18 → 17/18 Active memories: 36 → 41 Nightly pipeline: backup → cleanup → rsync → extract → auto-triage Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
173
tests/test_extraction_pipeline.py
Normal file
173
tests/test_extraction_pipeline.py
Normal file
@@ -0,0 +1,173 @@
|
||||
"""Integration tests for the extraction + triage pipeline (R8).
|
||||
|
||||
Tests the flow that produced the 41 active memories:
|
||||
LLM extraction → persist as candidate → triage → promote/reject.
|
||||
Uses mocked subprocess to avoid real claude -p calls.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from atocore.memory.extractor_llm import (
|
||||
extract_candidates_llm,
|
||||
extract_candidates_llm_verbose,
|
||||
)
|
||||
from atocore.memory.service import create_memory, get_memories
|
||||
from atocore.models.database import init_db
|
||||
import atocore.memory.extractor_llm as extractor_llm
|
||||
|
||||
|
||||
def _make_interaction(**kw):
|
||||
from atocore.interactions.service import Interaction
|
||||
|
||||
return Interaction(
|
||||
id=kw.get("id", "test-pipe-1"),
|
||||
prompt=kw.get("prompt", "test prompt"),
|
||||
response=kw.get("response", ""),
|
||||
response_summary="",
|
||||
project=kw.get("project", ""),
|
||||
client="test",
|
||||
session_id="",
|
||||
)
|
||||
|
||||
|
||||
class _FakeCompleted:
|
||||
def __init__(self, stdout, returncode=0):
|
||||
self.stdout = stdout
|
||||
self.stderr = ""
|
||||
self.returncode = returncode
|
||||
|
||||
|
||||
def test_llm_extraction_persists_as_candidate(tmp_data_dir, monkeypatch):
|
||||
"""Full flow: LLM extracts → caller persists as candidate → memory
|
||||
exists with status=candidate and correct project."""
|
||||
init_db()
|
||||
monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True)
|
||||
monkeypatch.setattr(
|
||||
extractor_llm.subprocess,
|
||||
"run",
|
||||
lambda *a, **kw: _FakeCompleted(
|
||||
'[{"type": "project", "content": "USB SSD is mandatory for RPi storage", "project": "p06-polisher", "confidence": 0.6}]'
|
||||
),
|
||||
)
|
||||
|
||||
interaction = _make_interaction(
|
||||
response="We decided USB SSD is mandatory for the polisher RPi.",
|
||||
project="p06-polisher",
|
||||
)
|
||||
candidates = extract_candidates_llm(interaction)
|
||||
assert len(candidates) == 1
|
||||
assert candidates[0].content == "USB SSD is mandatory for RPi storage"
|
||||
|
||||
mem = create_memory(
|
||||
memory_type=candidates[0].memory_type,
|
||||
content=candidates[0].content,
|
||||
project=candidates[0].project,
|
||||
confidence=candidates[0].confidence,
|
||||
status="candidate",
|
||||
)
|
||||
assert mem.status == "candidate"
|
||||
assert mem.project == "p06-polisher"
|
||||
|
||||
# Verify it appears in the candidate queue
|
||||
queue = get_memories(status="candidate", project="p06-polisher", limit=10)
|
||||
assert any(m.id == mem.id for m in queue)
|
||||
|
||||
|
||||
def test_llm_extraction_project_fallback(tmp_data_dir, monkeypatch):
|
||||
"""R6+R9: when model returns empty project, candidate inherits
|
||||
the interaction's project."""
|
||||
init_db()
|
||||
monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True)
|
||||
monkeypatch.setattr(
|
||||
extractor_llm.subprocess,
|
||||
"run",
|
||||
lambda *a, **kw: _FakeCompleted(
|
||||
'[{"type": "knowledge", "content": "machine works offline", "project": "", "confidence": 0.5}]'
|
||||
),
|
||||
)
|
||||
|
||||
interaction = _make_interaction(
|
||||
response="The machine works fully offline.",
|
||||
project="p06-polisher",
|
||||
)
|
||||
candidates = extract_candidates_llm(interaction)
|
||||
assert len(candidates) == 1
|
||||
assert candidates[0].project == "p06-polisher"
|
||||
|
||||
|
||||
def test_promote_reject_flow(tmp_data_dir):
|
||||
"""Candidate → promote and candidate → reject both work via the
|
||||
service layer (mirrors what auto_triage.py does via HTTP)."""
|
||||
from atocore.memory.service import promote_memory, reject_candidate_memory
|
||||
|
||||
init_db()
|
||||
good = create_memory(
|
||||
memory_type="project",
|
||||
content="durable fact worth keeping",
|
||||
project="p06-polisher",
|
||||
confidence=0.5,
|
||||
status="candidate",
|
||||
)
|
||||
bad = create_memory(
|
||||
memory_type="project",
|
||||
content="stale snapshot to reject",
|
||||
project="atocore",
|
||||
confidence=0.5,
|
||||
status="candidate",
|
||||
)
|
||||
|
||||
promote_memory(good.id)
|
||||
reject_candidate_memory(bad.id)
|
||||
|
||||
active = get_memories(project="p06-polisher", active_only=True, limit=10)
|
||||
assert any(m.id == good.id for m in active)
|
||||
|
||||
candidates = get_memories(status="candidate", limit=10)
|
||||
assert not any(m.id == good.id for m in candidates)
|
||||
assert not any(m.id == bad.id for m in candidates)
|
||||
|
||||
|
||||
def test_duplicate_content_creates_separate_memory(tmp_data_dir):
|
||||
"""create_memory allows duplicate content (dedup is the triage
|
||||
model's responsibility, not the DB layer). Both memories exist."""
|
||||
init_db()
|
||||
m1 = create_memory(
|
||||
memory_type="project",
|
||||
content="unique fact about polisher",
|
||||
project="p06-polisher",
|
||||
)
|
||||
m2 = create_memory(
|
||||
memory_type="project",
|
||||
content="unique fact about polisher",
|
||||
project="p06-polisher",
|
||||
status="candidate",
|
||||
)
|
||||
assert m1.id != m2.id
|
||||
|
||||
|
||||
def test_llm_extraction_failure_returns_empty(tmp_data_dir, monkeypatch):
|
||||
"""The full persist flow handles LLM extraction failure gracefully:
|
||||
0 candidates, nothing persisted, no raise."""
|
||||
init_db()
|
||||
monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True)
|
||||
monkeypatch.setattr(
|
||||
extractor_llm.subprocess,
|
||||
"run",
|
||||
lambda *a, **kw: _FakeCompleted("", returncode=1),
|
||||
)
|
||||
|
||||
interaction = _make_interaction(
|
||||
response="some real content that the LLM fails on",
|
||||
project="p06-polisher",
|
||||
)
|
||||
result = extract_candidates_llm_verbose(interaction)
|
||||
assert result.candidates == []
|
||||
assert "exit_1" in result.error
|
||||
|
||||
# Nothing in the candidate queue
|
||||
queue = get_memories(status="candidate", limit=10)
|
||||
assert len(queue) == 0
|
||||
Reference in New Issue
Block a user